Usuari:TronaBot/Python/ranking.py
Aparença
< Usuari:TronaBot | Python
#!/usr/bin/env python
#-*- encoding: utf8 -*-
# Copyleft (!C) 2013 w:ca:User:Coet
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse, codecs, time, os, sys, re
from datetime import datetime
try:
import MySQLdb
sql_available = True
except:
#we aren't on toolserver, api will be required
sql_available = False
#loading superfolder (pywikipedia)
sys.path.append(os.path.split(os.getcwd())[0])
import wikipedia as pywikilib
import query as api
#user modules
sys.path.append("/home/pasqual/public_html/pyuserlib")
import common
from dateutil.relativedelta import relativedelta as time_delta
from func import format_number
def last_hours_api_ts(h):
date = datetime.now()
date = date-timedelta(hours=h)
return date.strftime("%Y-%m-%dT%H:%M:%SZ")
class API(object):
def __init__(self, site):
self.site=site
def query(self, params):
return api.GetData(params, self.site)
def allusers(self):
#action=query&list=allusers&auprop=groups&aulimit=max&auwitheditsonly&auactiveusers&auexcludegroup=bot&auprop=editcount
aufrom=True
users=[]
params = {
"action":"query",
"list": "allusers",
"aulimit":"max",
"auwitheditsonly":"",
#"auactiveusers":"",
"auprop":["editcount","groups"],
"auexcludegroup":["bot"],
}
while aufrom:
if isinstance(aufrom, basestring):
params['aufrom'] = aufrom
data = self.query(params)
users += data['query']['allusers']
print time.strftime("[%H:%M:%S]"), len(users)
aufrom = data['query-continue']['allusers']['aufrom'] if data.has_key("query-continue") else None
return users
def last_edit(self, user):
#action=query&list=usercontribs&ucuser=Coet&ucprop=title|timestamp&uclimit=1
params = {
"action":"query",
"list": "usercontribs",
"ucuser": user,
"uclimit": 1,
"ucprop": "timestamp"
}
data = self.query(params)
return data['query']['usercontribs'][0]['timestamp']
class SQL(object):
def __init__(self, lang="ca", host=None, db=None, fname=None, cfg='~/.my.cnf', ):
self.host = host or "%swiki-p.db.toolserver.org" % lang
self.db = db or '%swiki_p' % lang
self.cfg = cfg
path= "/home/pasqual/temp/"
self.fname = fname and "%s%s.log" % (path,fname) or "%ssql-%s.log"% (path, time.strftime("%y%m%d_%H%M%S"))
def start(self):
self.conn = MySQLdb.connect(host=self.host, db=self.db, read_default_file=self.cfg, use_unicode=True)
self.cursor = self.conn.cursor()
def stop(self):
self.cursor.close()
self.conn.close()
def query(self, sql, **kwargs):
self.start()
if kwargs:
self.cursor.execute(sql.format(**kwargs))
else:
self.cursor.execute(sql)
result = self.cursor.fetchall()
self.stop()
return result
def output(self, sql, **kwargs):
new_sql = """mysql -h {host} -e "USE {db}; {sql}" > {fname}"""
if kwargs:
sql = sql.format(**kwargs)
sql = new_sql.format(
host=self.host,
db=self.db,
sql=sql,
fname = self.fname
)
print sql
os.system(sql)
def by_api(fname):
query = API(site)
users = query.allusers()
users_by_num_of_edits = {}
for user in users:
if user['name'] in skip:continue
if not users_by_num_of_edits.has_key(user['editcount']):
users_by_num_of_edits[user['editcount']]=[]
users_by_num_of_edits[user['editcount']].append((user['name'], user['groups']))
counter=1
goods=[]
break_loop=False
i=0
for num_of_edits in reversed(sorted(users_by_num_of_edits.keys())):
goods.append((num_of_edits,[]))
for user in users_by_num_of_edits[num_of_edits]:
goods[i][1].append((user[0], query.last_edit(user[0]), "sysop" in user[1]))
counter+=1
if counter>500:
break_loop=True
break
i+=1
if break_loop:break
f=codecs.open(fname, "w","utf-8")
pywikilib.json.dump(goods, f, indent=4, encoding="utf8")
f.close()
#os.startfile(fname)
return goods
def by_sql(fname):
sql = SQL(fname="ranking")
query = (
#"SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; "
"SELECT user_name, user_editcount, ug_group "
"FROM user "
"LEFT JOIN user_groups "
"ON user_id = ug_user "
"WHERE user_editcount >= {min} "
"ORDER BY user_editcount DESC LIMIT {limit};"
)
min=25
users = sql.query(query, min=min, limit=5000)
users_by_num_of_edits=set()
for row in users:
username = unicode(row[0], "utf-8")
if username in skip:continue
if len(row)==1:
users_by_num_of_edits.add((username, row[1], row[2]))
pywikilib.output(u"%s usuaris amb més de %i edicions." % (len(users_by_num_of_edits), min))
goods_dict={}
api = API(site)
for user, editcount, group in users_by_num_of_edits:
if goods.has_key(editcount):
goods[editcount].append((user, api.last_edit(user), group))
else:
goods[editcount]=[(user, api.last_edit(user), group)]
goods=[]
for ec in goods:
goods=(ec, [])
for user, editcount, group in goods_dict[ec]:
goods.append((user, last_edit, group))
f=codecs.open(fname, "w","utf-8")
pywikilib.json.dump(goods, f, indent=4, encoding="utf8")
f.close()
#os.startfile(fname)
return goods
def by_cache(fname):
f=codecs.open(fname, "r", "utf-8")
users_by_num_of_edits = pywikilib.json.load(f, encoding="utf8")
f.close()
return users_by_num_of_edits
def test_sql():
sql = SQL(fname="ranking")
query = (
#"SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; "
"SELECT user_name, user_editcount, ug_group " #, rev_timestamp "
"FROM user "
"LEFT JOIN user_groups "
"ON user_id = ug_user "
#"RIGHT JOIN revision "
#"ON user_id = rev_user "
"WHERE user_editcount >= {min} "
"ORDER BY user_editcount DESC LIMIT {limit};"
)
sql.output(query, limit=5000, min=25)
def main():
fname="logs/ranking_500_cawiki.log"
if args.api:
users = by_api(fname)
elif sql_available and args.sql:
users = by_sql(fname)
elif args.cache:
users = by_cache(fname)
else:
users ={}
if args.testsql:
test_sql()
return
text=[u"{{/begin|500}}"]
i=0;j=1
for noe in users:
for user, ts, groups in users[i][1]:
#user = user[0]; ts=user[1
ts=common.Date(ts)
td = time_delta(datetime.now(),ts.to_datetime())
fmt_usr = (td.years*365)+(td.months*30)+td.days >= 30 and u'<span style="color:gray">%s</span>' % user or user
text.append(
u"|-\n| {idx} || [[Usuari:{usr}|{fmt_usr}]]{grp} || [[Especial:Contributions/{usr}|{noe}]] || {lc}". format(
idx = j,
fmt_usr = fmt_usr,
grp = groups and " (Admin)" or "",
noe=format_number(noe[0],0),
usr=user,
lc=ts.to_cest().strftime("%Y-%m-%d %H:%M")
)
)
j+=1
i+=1
if i>len(users):break
print time.strftime("[%H:%M:%S]"), j-1
text+=["{{/end}}","{{nobots|allow=TronaBot}}"]
text="\n".join(text)
fname=fname.replace(".log","_text.log")
f=codecs.open(fname, "w","utf-8")
f.write(text)
f.close()
if args.openfiles:os.startfile(fname)
if args.edit:page.put(text, u"Bot: actualització")
if __name__ == '__main__':
#python2.7 ranking_usuaris.py -AE
parser = argparse.ArgumentParser()
parser.add_argument("--sql", "-S",action="store_true", default=False)
parser.add_argument("--api", "-A",action="store_true", default=False)
parser.add_argument("--cache", "-C",action="store_true", default=False)
parser.add_argument("--edit", "-E",action="store_true", default=False)
parser.add_argument("--openfiles", "-O",action="store_true", default=False)
parser.add_argument("--testsql", "-t",action="store_true", default=False)
args = parser.parse_args()
site = pywikilib.getSite()
title = u"Viquipèdia:Llista de viquipedistes per nombre d'edicions"
page = pywikilib.Page(site, title)
bot_page = pywikilib.Page(site, u"%s/unflaggedbots" % title)
exception_page = pywikilib.Page(site, u"%s/exclude" % title)
re_link = re.compile(ur"\* *\[\[Us(?:er|uari|ària) *:([^\]|]+?)(?:\|[^\]]*)?\]\]")
bots = re_link.findall(bot_page.get())
users = re_link.findall(exception_page.get())
skip = bots + users
pywikilib.output("S'ha inclós %i excepcions. (%i usuaris i %i bots.)" % (len(skip), len(users), len(bots)))
main()
pywikilib.stopme()