-
Notifications
You must be signed in to change notification settings - Fork 0
/
request_handler.py
167 lines (125 loc) · 4.3 KB
/
request_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import pymysql
import json
import hashlib
from re import IGNORECASE, sub
import cache_handler
import wiki_api
import tool_db
import wiki_db
from time import time, sleep
#vēl vajadzētu rate limiter
class RequestHandler:
cache = None
apiInst = None
def setAPIInst(self):
if self.apiInst is None:
self.apiInst = wiki_api.WikiAPI()
def cleanFilterParams(self, lang, data):
self.setAPIInst()
nsData = self.apiInst.getWikipediaNamespaces(lang)
returnArr = []
for filterItem in data:
filterType = filterItem['type']
if filterType == 'petscan':
returnArr.append(filterItem)
continue
currTitle = filterItem['specific']['title'].strip()
namespaces = nsData[filterType]
if len(namespaces)>0 and all(f for f in namespaces):
searchString = r"^("+'|'.join(namespaces)+")\:(.*)"
currTitle = sub(searchString,r'\2',currTitle, IGNORECASE)
currTitle = currTitle[:1].upper() + currTitle[1:]
filterItem['specific'].update({'title':currTitle})
returnArr.append(filterItem)
return returnArr
def checkIfValidLanguages(self, fromLang, toLang):
self.setAPIInst()
allWikiLanguages = self.apiInst.getWikipediaLanguages()
if fromLang not in allWikiLanguages or toLang not in allWikiLanguages:
return False
return True
def generateHashForInputData(self, data):
#https://stackoverflow.com/questions/5884066/hashing-a-dictionary
dataForHash = data.copy()
if 'ignoreCache' in dataForHash:
del dataForHash['ignoreCache']
requestInputString = json.dumps(dataForHash, sort_keys=True)
reqHash = hashlib.md5(requestInputString.encode())
return reqHash.hexdigest()
def main(self, inputParams):
startTime = time()
fromLang = inputParams['from']
toLang = inputParams['to']
isLanguagesValid = self.checkIfValidLanguages(fromLang, toLang)
if not isLanguagesValid:
return {
'success': False,
'meta': {
'message': 'Languages not recognised'
}
}
normalizedFilter = self.cleanFilterParams(fromLang, inputParams['filters'])
inputParams['filters'] = normalizedFilter
reqHash = self.generateHashForInputData(inputParams)
toolDB = tool_db.ToolDB()
reqID = toolDB.getRequestID(reqHash)
##šeit vēl jāpārbauda, vai nav saglabāts DB
self.cache = cache_handler.CacheHandler()
cacheResult = self.cache.get(reqHash)
requestData = None
isCached = False
debugLine = False
haveToIgnoreCache = inputParams['ignoreCache'] if 'ignoreCache' in inputParams else False
if haveToIgnoreCache:
db_inst = wiki_db.WikiDB(inputParams['from'],inputParams['to'])
dbRes = db_inst.main(inputParams['filters'])
resultFromDB = dbRes['data']
notifyAboutIncompleteresults = dbRes['wasMaxStatementTime']
isCached = False
debugLine = True
self.cache.setData(reqHash, resultFromDB)
requestData = resultFromDB
elif cacheResult:
requestData = cacheResult
notifyAboutIncompleteresults = False
isCached = True
cacheAge = self.cache.ttl(reqHash)
else:
db_inst = wiki_db.WikiDB(inputParams['from'],inputParams['to'])
dbRes = db_inst.main(inputParams['filters'])
resultFromDB = dbRes['data']
notifyAboutIncompleteresults = dbRes['wasMaxStatementTime']
isCached = False
debugLine = True
self.cache.setData(reqHash, resultFromDB)
requestData = resultFromDB
if not reqID:
reqID = toolDB.saveRequestData(inputParams['from'],inputParams['to'],inputParams, reqHash)
endTime = time()
reqTime = endTime - startTime
return {
'data': requestData,
'success': True,
'meta': {
'debugLine': debugLine,
'time': "{0:.2f}".format(reqTime),
'id': reqID,
'reachedMaxStatementTime': notifyAboutIncompleteresults,
'cached': isCached,
'cache_age': None if not isCached else (self.cache.expireTime - cacheAge)
}
}
#return self.fromCategory
#
if __name__ == '__main__':
inst = RequestHandler()
filters = [
{ 'type': 'category', 'specific': { 'title': '1957 births', 'depth': 5, 'talk': False } },
{ 'type': 'category', 'specific': { 'title': 'Category:1957 births', 'depth': 5, 'talk': False } },
{ 'type': 'template', 'specific': { 'talk': False, 'title': 'infobox park' } },
{ 'type': 'petscan', 'specific': { 'id': '' } }
]
#cleanFilters = inst.cleanFilterParams('en',filters)
#print(cleanFilters)
res = inst.main({ 'from':'en','to':'lv', 'ignoreCache': True, 'filters': filters })
print(res)