-
Notifications
You must be signed in to change notification settings - Fork 1
/
my_ml.py
128 lines (90 loc) · 2.98 KB
/
my_ml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from flask import Flask
from flask import request
import json
import requests
from textgenrnn import textgenrnn
import random
app = Flask(__name__)
tagtog_docs_api_url = "http://localhost:9000/-api/documents/v1"
auth = requests.auth.HTTPBasicAuth(username="demo", password="YOUR_PASSWORD")
corpus = [
"It's f*cking good!",
"NIcceeee!",
"I was rather disappointed",
"Meh",
"DISGUSTING :-("
]
textgen = textgenrnn()
random_samples = textgen.generate(10, return_as_list=True)
corpus = iter(corpus + random_samples)
@app.route("/tagtog_webhook", methods=['PUT', 'POST'])
def tagtog_webhook():
# Train with newly saved document
body = request.get_json()
tagtogID = body["tagtogID"]
params = {"owner": body["owner"], "project": body["project"], "ids": tagtogID}
params["output"] = "ann.json"
annjson = (requests.get(tagtog_docs_api_url, params=params, auth=auth)).json()
if not annjson["anncomplete"]:
print("The annotations were changed, but they are not confirmed: {}".format(tagtogID))
return ""
else:
label = parse_label(annjson)
params["output"] = "text"
text = (requests.get(tagtog_docs_api_url, params=params, auth=auth)).text
train((text, label))
# Upload newly predicted document
unseen_unlabeled_sample = collect_unlabeled_sample()
(label, probability, who) = predict(unseen_unlabeled_sample)
predicted_annjson = format_label_as_annjson(label, probability, who)
files = [('file', ('text.txt', unseen_unlabeled_sample)), ('file', ('text.ann.json', predicted_annjson))]
print(unseen_unlabeled_sample, predicted_annjson)
params['format'] = 'default-plus-annjson'
params['output'] = 'weburl'
response = requests.put(tagtog_docs_api_url, params=params, auth=auth, files=files)
print(response.text)
return tagtogID
def parse_label(annjson):
return next(iter(annjson["metas"].values()))["value"]
def format_label_as_annjson(label, probability, who):
format = {
"annotatable": {
"parts": [
]
},
"anncomplete": False,
"sources": [],
"metas": {
"m_1": {
"value": label,
"confidence": {
"state": "pre-added",
"who": [
who
],
"prob": probability
}
}
},
"entities": [],
"relations": []
}
format_as_json = json.dumps(format, ensure_ascii=False)
return format_as_json
def train(new_labeled_sample):
# Do my ML magic
# ...
print("Train with: {}".format(new_labeled_sample))
def collect_unlabeled_sample():
# Where is your data?
# Active Learning -- select samples that are INTERESTING
output = next(corpus)
return output
def predict(text):
# Call your ML
# ...
possible_labels = ["❤️", "😐", "😢"]
prediction = random.choice(possible_labels)
probability = 1/len(possible_labels)
who = "ml:my_ml"
return (prediction, probability, who)