-
Notifications
You must be signed in to change notification settings - Fork 0
/
commons1.py
134 lines (112 loc) · 4.07 KB
/
commons1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/python
# -*- coding: utf-8 -*-
#$ -l h_rt=0:40:00
#$ -l s_rt=0:35:00
#$ -l virtual_free=100M
#$ -j y
#$ -o $HOME/commons_deletion.out
'''
list images that will be deleted from Commons and are used in local wiki
'''
import sys, os
import pywikibot
import MySQLdb
from time import strftime
# "constants"
# wikipedia category namespace
WP_CATEGORY_NS = 14
# output debug messages
DEBUG = False
def connectWikiDatabase(lang):
'''
Connect to the wiki database
'''
if (lang):
conn = MySQLdb.connect(host="commonswiki.analytics.db.svc.eqiad.wmflabs", db="commonswiki_p", read_default_file="~/replica.my.cnf")
cursor = conn.cursor()
return (conn, cursor)
def getSubCats(cursor, sourceCat):
outSubCats = []
sourceCat = sourceCat.replace(u' ', u'_')
query = """SELECT page_title
FROM commonswiki_p.categorylinks
LEFT JOIN commonswiki_p.page ON page_id = cl_from
WHERE cl_to = %s
AND page_namespace = %s"""
cursor.execute(query, (sourceCat, WP_CATEGORY_NS))
if DEBUG:
print cursor._executed
while True:
try:
(subCat,) = cursor.fetchone()
outSubCats.append( subCat )
except TypeError:
break
return outSubCats
def getImages(cursor, imgCat):
outImages = []
query = """SELECT DISTINCT lvwiki_p.imagelinks.il_to
FROM lvwiki_p.imagelinks, commonswiki_p.image, commonswiki_p.categorylinks, commonswiki_p.page
WHERE lvwiki_p.imagelinks.il_to = commonswiki_p.image.img_name
AND commonswiki_p.image.img_name = commonswiki_p.page.page_title
AND commonswiki_p.categorylinks.cl_from = commonswiki_p.page.page_id
AND commonswiki_p.categorylinks.cl_to = %s
AND NOT EXISTS(
SELECT 1
FROM lvwiki_p.image
WHERE lvwiki_p.image.img_name = lvwiki_p.imagelinks.il_to
)
ORDER BY lvwiki_p.imagelinks.il_to"""
cursor.execute(query, (imgCat, ))
if DEBUG:
print cursor._executed
while True:
try:
(imgName,) = cursor.fetchone()
imgName = unicode(imgName, "utf-8")
outImages.append( imgName )
except TypeError:
break
return outImages
def main():
targetWiki = 'lv'
outText = u''
wikiSite = pywikibot.getSite(u'lv', u'wikipedia')
galPageName = u'User:Edgars2007/Commons delete'
(conn, cursor) = connectWikiDatabase(targetWiki)
imgCats = getSubCats(cursor, 'Deletion_requests')
imgCats.extend( getSubCats(cursor, 'Media_without_a_source') )
imgCats.extend( getSubCats(cursor, 'Media_without_a_license') )
imgCats.extend( getSubCats(cursor, 'Media_missing_permission') )
imgCats.extend( getSubCats(cursor, 'Media_uploaded_without_a_license') )
imgCats.append('Other_speedy_deletions')
imgCats.append('Copyright_violations')
imgCats.append('Items_with_disputed_copyright_information')
imgCats.append('Pending_fair_use_deletes')
imgCats.extend( getSubCats(cursor, 'Pending_fair_use_deletes') )
imgCats.append('Possibly_out_of_scope')
totalImageCount = 0
for imgCat in imgCats:
delImages = getImages(cursor, imgCat)
if ( len(delImages) ):
totalImageCount += len(delImages)
outText += u'== [[commons:Category:' + imgCat + u"]] ==\n"
outText += u"<gallery>\n"
for delImage in delImages:
outText += delImage + u"| \n"
outText += u"</gallery>\n"
outText += u"\n"
galleryPage = pywikibot.Page(wikiSite, galPageName)
localtime = strftime("%Y-%m-%d %H:%M:%S")
addTxt = u"Updated: " + localtime + u" \n\n"
outText = addTxt + outText
#print outText
commentText = u'%d images' % totalImageCount
#galleryPage.put(outText, comment = commentText, botflag=False, minor=False)
galleryPage.text = outText
galleryPage.save(summary=commentText, botflag=False, minor=False)
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()