forked from EL-BID/SmartReader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummary_processor.py
67 lines (56 loc) · 1.81 KB
/
summary_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import os
import time
from src.database_connectivity import *
from datetime import datetime
from src.process import *
from src.convert_dataset import *
def getJob():
result = summary_collection.find( { 'status': "Queued" } ).limit( 1 )
return result
def updateJobStatus(jobid, status):
summary_collection.update(
{"_id": jobid},
{
"$set": {
"status": status
}
}
)
def run_job(job):
jobid = ""
'''
A document is an object of this form:
{'_id': ObjectId('5c9b842ea1c0a02dea101856'),
'file_path': '/app/AIResearchHelper/SmartReader/Data/text_files_2019-03-27_10-09-50',
'summary_filename': 'summary_json_2019-03-27_10-09-50.json',
'model_name': 'test_1', 'model_file_name': 'model_Artificial_Intelligence_2019-03-25_14_38_33.pkl',
'status': 'Queued', 'timestamp': datetime.datetime(2019, 3, 27, 10, 9, 50, 24000)}
'''
for document in job:
try:
jobid = document["_id"]
updateJobStatus(jobid, "Processing")
output_json = create_summary(document["file_path"], document["model_file_name"])
convert_txt_html(output_json)
if not os.path.isdir('Summaries'):
os.mkdir('Summaries')
json.dump(output_json, open("Summaries/" + document["summary_filename"], "w"))
updateJobStatus(jobid, "Done")
except Exception as e:
print (e)
updateJobStatus(jobid, "Error")
def processNextJob():
print('fetching job')
job = getJob()
jobs_len = job.count()
if jobs_len == 0:
print('no more jobs to process')
return jobs_len
else:
run_job(job)
return jobs_len
while(True):
jobs_check = processNextJob()
if jobs_check == 0:
time.sleep(10)