-
Notifications
You must be signed in to change notification settings - Fork 0
/
addVKPAIimages.py
83 lines (61 loc) · 1.98 KB
/
addVKPAIimages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import toolforge
import pywikibot, requests, re, os
import pywikibot
from customFuncs import basic_sparql
conn = toolforge.connect('commonswiki_p','analytics')
site = pywikibot.Site('wikidata', 'wikidata')
commonssite = pywikibot.Site('commons', 'commons')
repo = site.data_repository()
prop = 'P18'
#os.chdir(r'projects/vkpai3')
null = ''
def encode_if_necessary(b):
if type(b) is bytes:
return b.decode('utf8')
return b
def run_query(query):
#query = query.encode('utf-8')
#print(query)
try:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
except KeyboardInterrupt:
sys.exit()
return rows
#
sparqlquery = """select ?id ?item {
?item wdt:P2494 ?id .
filter not exists {?item wdt:P18 ?coords .}
}"""
file2 = basic_sparql(sparqlquery)
print(len(file2))
#file2 = eval(open("query(5).json", "r", encoding='utf-8').read())['results']['bindings']
file2 = {str(entry['id']['value']):entry['item']['value'].replace('http://www.wikidata.org/entity/','') for entry in file2}
SQL = '''SELECT page_title, el_to
FROM externallinks
join page on page_id=el_from and page_namespace=6
where el_to like "http://saraksts.mantojums.lv/lv/piemineklu-saraksts%"'''
sqlRES = run_query(SQL)
print(len(sqlRES))
def doUPL(currItem,image):
item = pywikibot.ItemPage(repo, currItem)
item.get(get_redirect=True)
#print(entry)
if item.claims and prop in item.claims: return
imagelink = pywikibot.Link( image.replace('_',' '), source=commonssite, defaultNamespace=6)
image = pywikibot.FilePage(imagelink)
newclaim = pywikibot.Claim(repo, prop)
newclaim.setTarget(image)
item.addClaim(newclaim)
for entry in sqlRES:
image,url = entry
image = encode_if_necessary(image)
url = encode_if_necessary(url)
urlMatch = re.search('piemineklu-saraksts\/(\d+)',url)
if not urlMatch: continue
url = str(urlMatch.group(1))
if url not in file2: continue
currItem = file2[url]
doUPL(currItem,image)
#