Skip to content

Commit 1f3fd79

Browse files
committed
Adding scripts for getting gadgets stats
* Script for getting default gadgets from all wikis * Script to query all DBs and combine number of users of each gadget
1 parent c50edd1 commit 1f3fd79

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

default_gadgets.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
# (C) eranroz
4+
#
5+
# Distributed under the terms of the MIT license.
6+
7+
import re
8+
import pywikibot
9+
from pywikibot.exceptions import NoPage
10+
import time
11+
12+
def extract_defaults(wikitext):
13+
default_re = re.compile('\*\s*([^\|]+?)[\|\[].*default\s*[\|\]]')
14+
match_lines = [default_re.match(line) for line in wikitext.split('\n')]
15+
return [m.group(1) for m in match_lines if m]
16+
17+
def getGadgets():
18+
families = ['wikipedia']
19+
site = pywikibot.getSite('en', 'wikipedia')
20+
gadgets_def = pywikibot.Page(site,'MediaWiki:Gadgets-definition')
21+
wikitext = gadgets_def.get()
22+
print(extract_defaults(wikitext))
23+
24+
def family_default_gadgets(family='wikipedia'):
25+
site = pywikibot.getSite('en', family)
26+
family_sites = [pywikibot.getSite(lang, family) for lang in site.languages()]
27+
gadgets_dict = dict() # key - gadget name, value - wikipedias listi
28+
print('going over %i wikies'%len(family_sites))
29+
for wiki_i, wiki in enumerate(family_sites):
30+
if wiki_i%25==0:
31+
time.sleep(1) # sleep between
32+
print('%i: %s' % (wiki_i, wiki.code))
33+
gadgets_def = pywikibot.Page(wiki, 'MediaWiki:Gadgets-definition')
34+
try:
35+
wikitext = gadgets_def.get()
36+
default_gadgets = extract_defaults(wikitext)
37+
for gadget in default_gadgets:
38+
gadget = gadget.replace('<!--', '').replace('-->', '')
39+
gadget_item = '[[:%s:%s:MediaWiki:Gadget-%s|%s]]'%(family,wiki.code, gadget, wiki.code)
40+
if gadget not in gadgets_dict:
41+
gadgets_dict[gadget] = [ gadget_item ]
42+
else:
43+
gadgets_dict[gadget].append( gadget_item )
44+
except NoPage:
45+
continue
46+
output = [('| [[Gadgets/%s|%s]] || %s || %i'%(k, k, ', '.join(v), len(v)), len(v)) for k,v in gadgets_dict.items()]
47+
output.sort(key=lambda x:-x[1]) # sort by popularity
48+
output = """Default gadgets in project %s.
49+
{| class="wikitable sortable plainlinks"
50+
! Gadget !! languages !! #
51+
|-
52+
%s
53+
|}
54+
"""%( family, '\n|-\n'.join(map(lambda x:x[0], output)))
55+
print(output)
56+
meta_wiki = pywikibot.getSite('en', 'meta')
57+
meta_page = pywikibot.Page(meta_wiki, 'Gadgets/%s/default'%(family))
58+
meta_page.put(output, 'Default gadgets in %s'%family)
59+
60+
if __name__ == '__main__':
61+
families = ['wikipedia', 'wikibooks', 'wiktionary','wikiquote', 'wikinews', 'wikisource', 'wikivoyage', 'wikiversity']
62+
for fam in families:
63+
family_default_gadgets(fam)

gadgets_popular.py

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
# (C) eranroz
4+
#
5+
# Distributed under the terms of the MIT license.! /usr/bin/env python
6+
7+
import datetime
8+
import MySQLdb
9+
import pywikibot
10+
11+
familyGadgets = dict() # family -> (gadget -> [(lang, users)])
12+
13+
def fillStatsForCluster(host, dbList):
14+
clusterHost = cluster
15+
conn = MySQLdb.connect(host=host,
16+
read_default_file='~/replica.my.cnf')
17+
cursor = conn.cursor()
18+
for db, lang, family in dbList:
19+
print 'Querying ',db
20+
if family not in familyGadgets:
21+
familyGadgets[family] = dict()
22+
gadgetsDict = familyGadgets[family]
23+
cursor.execute('USE `%s_p`'%db)
24+
try:
25+
cursor.execute('''
26+
/* gadgets_popular.py SLOW_OK */
27+
SELECT
28+
up_property,
29+
COUNT(*)
30+
FROM %s_p.user_properties_anon
31+
WHERE up_property LIKE 'gadget-%%'
32+
AND up_value = 1
33+
GROUP BY up_property;
34+
'''%db)
35+
except:
36+
continue
37+
for row in cursor.fetchall():
38+
gadgetName = row[0].split('gadget-', 1)[1]
39+
if gadgetName not in gadgetsDict:
40+
gadgetsDict[gadgetName]=[]
41+
langLink = '[[:%s:%s:MediaWiki:%s|%s]]' % (family,lang,row[0], lang)
42+
count = row[1]
43+
gadgetsDict[gadgetName].append((langLink,count))
44+
cursor.close()
45+
conn.close()
46+
47+
48+
report_template = u'''\
49+
Cross-project gadgets preferences statistics.
50+
51+
'''
52+
report_family_template= u'''
53+
Gagets statistics for %s projects as of %s.
54+
----
55+
{| class="wikitable sortable plainlinks" style="width:85%%; margin:auto;"
56+
|- style="white-space:nowrap;"
57+
! Gadget
58+
! wikis (number of users)
59+
! total number of users
60+
|-
61+
%s
62+
|}
63+
'''
64+
65+
conn = MySQLdb.connect(host='enwiki.labsdb',
66+
db='meta_p',
67+
read_default_file='~/replica.my.cnf')
68+
cursor = conn.cursor()
69+
cursor.execute('''
70+
select slice,dbname,lang,family from meta_p.wiki
71+
where is_closed=0
72+
and family in ('wikibooks','wikipedia','wiktionary','wikiquote','wikisource','wikinews','wikiversity','wikivoyage')
73+
and dbname not like 'test%'
74+
''')
75+
servers,dbnames,wikiLangs,wikiFamilies = zip(*cursor.fetchall())
76+
nameToCluster=dict()
77+
for clus, db, lang, family in zip(servers,dbnames,wikiLangs,wikiFamilies):
78+
if clus not in nameToCluster:
79+
nameToCluster[clus]=[]
80+
nameToCluster[clus].append((db,lang,family))
81+
82+
for cluster, wikisMetaData in nameToCluster.iteritems():
83+
print 'Filling data from cluster ', cluster
84+
fillStatsForCluster(cluster, wikisMetaData)
85+
86+
report_text = report_template
87+
for family, gadgets in familyGadgets.iteritems():
88+
gadgetsDetails = [(gadgetName,', '.join([u'%s (%s)'%(link,str(count)) for link, count in langData]), sum([count for link,count in langData])) for gadgetName, langData in gadgets.iteritems()]
89+
gadgetsDetails.sort(key=lambda x:x[2], reverse=True)
90+
91+
gadgetsInfo = [u'| [[Gadgets/%s]] || %s || %i'%(gadgetName, langData, totalCount) for gadgetName, langData, totalCount in gadgetsDetails]
92+
family_report = report_family_template % (family, datetime.datetime.now().strftime('%B %Y'), '\n|-\n'.join(gadgetsInfo))
93+
meta_wiki = pywikibot.getSite('meta', 'meta')
94+
meta_page = pywikibot.Page(meta_wiki, 'Gadgets/%s'%(family))
95+
meta_page.put(family_report, 'Update')
96+
report_text = report_text+'\n'+ family_report
97+
try:
98+
resFile = file('gadgetsData.wikitext','w')
99+
print>>resFile,report_text
100+
resFile.close()
101+
except:
102+
pass
103+
print report_text
104+
cursor.close()
105+
conn.close()
106+

0 commit comments

Comments
 (0)