-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
416 lines (377 loc) · 16.6 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
import json
import os
import shutil
import requests
def get_api_data(api_url):
"""
Get the data from the api and return it as a json
return: the data from the api as a json or the related error message
"""
try:
response = requests.get(api_url)
response.raise_for_status() # Raise an exception for non-200 status codes
return response.json()
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
def setup_topics_folder():
"""
Prepare the topics folder for the training material and the eosc jsons
return: None
"""
if os.path.exists("./old_topics"):
shutil.rmtree("./old_topics")
if os.path.exists("./topics"):
shutil.move("./topics", "./old_topics")
os.mkdir("./topics")
if os.path.exists("./old_upload_failures"):
shutil.rmtree("./old_upload_failures")
if os.path.exists("./upload_failures"):
shutil.move("./upload_failures", "./old_upload_failures")
os.mkdir("./upload_failures")
os.mkdir("./upload_failures/new_trainings")
os.mkdir("./upload_failures/updated_trainings")
if os.path.exists("./validated_eosc_jsons"):
shutil.rmtree("./validated_eosc_jsons")
os.mkdir("./validated_eosc_jsons")
os.mkdir("./validated_eosc_jsons/updated_trainings")
os.mkdir("./validated_eosc_jsons/new_trainings")
def fetch_trainings():
"""
Fetch the training material for each topic and save it as a json file in the topics folder
return: None
"""
topics_url = "https://training.galaxyproject.org/training-material/api/topics.json"
topics = get_api_data(topics_url)
for topic in topics:
topic_folder = f"./topics/{topic}"
os.mkdir(topic_folder)
topic_url = f"https://training.galaxyproject.org/training-material/api/topics/{topic}.json"
topic_training_material = get_api_data(topic_url)["materials"]
for training in topic_training_material:
id = f"{training['topic_name']}_{training['tutorial_name']}"
training_json = f"{topic_folder}/{id}.json"
with open(training_json, "w") as f:
json.dump(training, f, sort_keys=True, indent=4)
def compare_training_material():
"""
Compare the old and new training material and return the new and updated training material
return: a dict containing the new and updated training material
"""
new_trainings = []
updated_trainings = []
for topic in os.listdir("./topics"):
for training in os.listdir(f"./topics/{topic}"):
# if the training is in the old topics, compare the trainings
if os.path.exists(f"./old_topics/{topic}/{training}") and os.path.isfile(
f"./old_topics/{topic}/{training}"
):
# compare the trainings and if there is a change, add it to the updated_trainings
with open(f"./topics/{topic}/{training}") as f:
new_training = json.load(f)
with open(f"./old_topics/{topic}/{training}") as f:
old_training = json.load(f)
if new_training != old_training:
updated_trainings.append(f"./topics/{topic}/{training}")
# if the training is not in the old topics, add it to the new_trainings
else:
new_trainings.append(f"./topics/{topic}/{training}")
# if the training failed the validation in the previous run
# add it to the corresponding training list
if os.path.exists(
f"./old_upload_failures/new_trainings/{training}"
) and os.path.isfile(f"./old_upload_failures/new_trainings/{training}"):
new_trainings.append(f"./topics/{topic}/{training}")
if os.path.exists(
f"./old_upload_failures/updated_trainings/{training}"
) and os.path.isfile(f"./old_upload_failures/updated_trainings/{training}"):
updated_trainings.append(f"./topics/{topic}/{training}")
return {"new_trainings": new_trainings, "updated_trainings": updated_trainings}
def translate_level(level: str):
"""
Translate the level of the training material to the eosc format
return: the expertise level of the training material in the eosc format
"""
level_mapping = {
"Introductory": "tr_expertise_level-beginner",
"Any": "tr_expertise_level-all",
"Intermediate": "tr_expertise_level-intermediate",
"Advanced": "tr_expertise_level-advanced",
}
return (
level_mapping.get(level, "tr_expertise_level-advanced")
or "tr_expertise_level-all"
)
def training_to_eosc_json(training_json: str):
"""
Convert the training json to a format that the eosc training material accepts
See https://wiki.eoscfuture.eu/display/PUBLIC/F.+EOSC+Training+Resource+Profile for reference
return: a tuple with the converted json/error message and the training name
"""
with open(training_json) as f:
training = json.load(f)
try:
mandatory_fields = []
# --- Basic Information ---
id = training.get("id")
title = training.get("tutorial_name")
provider_id = "uni-freiburg"
# resource_providers = ...
contributors = training.get("contributors")
authors = []
if training.get("contributions") and "authorship" in training.get(
"contributions"
):
author_ids = training.get("contributions").get("authorship")
for contributor in contributors:
if contributor.get("id") in author_ids:
authors.append(contributor.get("name"))
else:
authors = [contributor.get("name") for contributor in contributors]
authors = list(dict.fromkeys(authors))
webpage = "https://training.galaxyproject.org/training-material" + training.get(
"url"
)
# url_type = ...
# eoscRelatedServices = ...
# alternative_identifiers = ...
# --- Detailed & access Information ---
# description = ...
# keywords = ...
# TODO - Think about how to set license dynamically
license = "https://spdx.org/licenses/CC-BY-4.0"
# TODO - Are the access rights always the same?
access_rights = "tr_access_right-open_access"
# TODO - Think about how to set the version data dynamically
version_date = "2024-01-01"
# --- Learning Information ---
targetGroups = ["target_user-researchers", "target_user-students"]
objectives = training.get("objectives")
if objectives:
learningOutcomes = [objective for objective in objectives]
else:
learningOutcomes = ["No learning outcomes available"]
# learningResourceTypes = ...
expertiseLevel = translate_level(training.get("level"))
# contentResourceTypes = ...
# qualifications = ...
duration = training.get("time_estimation")
# --- Geographical and Language Availability Information ---
languages = ["en"]
geographicalAvailabilities = ["WW"]
# --- Classification Information ---
scientificDomains = [
{
"scientificDomain": "scientific_domain-generic",
"scientificSubdomain": "scientific_subdomain-generic-generic",
}
]
# --- Contact Information ---
main_contributor = contributors[0]
main_name = main_contributor.get("name").split(" ")
# TODO get the email of the first person of the board of the topic and replace placeholder email
main_email = main_contributor.get("email") or "place_holder@uni-freiburg.com"
contact = {
"firstName": main_name[0],
"lastName": main_name[-1],
"email": main_email,
# "phone": "string",
# "position": "string",
# "organisation": "string"
}
# catalogueId = ...
# TODO - populate the optional fields above
# Append variables to mandatory_fields
mandatory_fields.append(("id", id))
mandatory_fields.append(("title", title))
mandatory_fields.append(("resourceOrganisation", provider_id))
mandatory_fields.append(("authors", authors))
mandatory_fields.append(("url", webpage))
mandatory_fields.append(("license", license))
mandatory_fields.append(("accessRights", access_rights))
mandatory_fields.append(("versionDate", version_date))
mandatory_fields.append(("targetGroups", targetGroups))
mandatory_fields.append(("learningOutcomes", learningOutcomes))
mandatory_fields.append(("expertiseLevel", expertiseLevel))
mandatory_fields.append(("languages", languages))
mandatory_fields.append(
("geographicalAvailabilities", geographicalAvailabilities)
)
mandatory_fields.append(("scientificDomains", scientificDomains))
mandatory_fields.append(("contact", contact))
# Check if all mandatory fields are set
for field in mandatory_fields:
if not field[1]:
return (
f"For {training_json} the mandatory field {field[0]} is not set",
training_json,
)
if field[1] == []:
return (
f"For {training_json} the mandatory field {field[0]} is empty",
training_json,
)
if type(field[1]) == dict:
for key, value in field[1].items():
if not value:
return (
f"For {training_json} the mandatory field {key} is not set",
training_json,
)
# catch all exceptions
except Exception as e:
return (f"For {training_json} there was an error: {e}", training_json)
data = {
"id": id,
"title": title,
"resourceOrganisation": provider_id,
# "resourceProviders": [
# "string"
# ],
"authors": authors,
"url": webpage,
# "urlType": "string",
# "eoscRelatedServices": ["string"],
# "alternativeIdentifiers": [
# {
# "type": "string",
# "value": "string"
# }
# ],
# "description": description, # recommended
# "keywords": ["string"], # recommended
"license": license,
"accessRights": access_rights,
"versionDate": version_date,
"targetGroups": targetGroups,
# "learningResourceTypes": ["string"], # recommended
"learningOutcomes": learningOutcomes,
"expertiseLevel": expertiseLevel,
# "contentResourceTypes": ["string"], # recommended
# "qualifications": ["string"], # recommended
"duration": duration, # recommended
"languages": languages,
"geographicalAvailabilities": geographicalAvailabilities,
"scientificDomains": scientificDomains,
"contact": contact,
# "catalogueId": "string"
}
return (data, training_json)
def validate_eosc_json(eosc_json):
"""
Validate the eosc json by checking the payload for errors that arose during the conversion.
If there are no errors, send the json to the eosc training material api for validation.
return: a tuple with the validation response and the training name
"""
payload = eosc_json[0]
training = eosc_json[1]
if type(payload) != dict:
return (f"Json conversion error: \n{payload}", training)
url = "https://api.eosc-portal.eu/trainingResource/validate"
headers = {"Content-Type": "application/json"}
response = requests.post(url, headers=headers, json=payload)
if response.status_code != 200:
return f"API error: \n{response.text}", training, payload
return response.json(), training, payload
def process_validated_trainings(validated_new_trainings, validated_updated_trainings):
"""
Divide the validated trainings into valid and invalid responses and move/copy them to the corresponding folders
return: None
"""
for training in validated_new_trainings:
if str(training[0]) == "True":
# write the converted training to a file in validated_eosc_jsons/new_trainings
training_name = training[1].split("/")[-1]
with open(
f"./validated_eosc_jsons/new_trainings/{training_name}", "w"
) as f:
json.dump(training[2], f, sort_keys=True, indent=4)
else:
# copy the failed training to upload_failures/new_trainings
# and write the error message to a file
shutil.copy(training[1], "./upload_failures/new_trainings")
with open("./upload_failures.txt", "a") as f:
f.write(f"{training}\n")
for training in validated_updated_trainings:
if str(training[0]) == "True":
# write the converted training to a file in validated_eosc_jsons/updated_trainings
training_name = training[1].split("/")[-1]
with open(
f"./validated_eosc_jsons/updated_trainings/{training_name}", "w"
) as f:
json.dump(training[2], f, sort_keys=True, indent=4)
else:
# copy the failed training to upload_failures/updated_trainings
# and write the error message to a file
shutil.copy(training[1], "./upload_failures/updated_trainings")
with open("./upload_failures.txt", "a") as f:
f.write(f"{training}\n")
def upload_training_files(url, headers, authentifier):
"""
Upload the validated training files to the eosc training material api
return: a dict containing the number of successful/failed creations/updates and a list of failure messages
"""
# TODO - handle the responses - failures
successful_creations = 0
failed_creations = 0
successful_updates = 0
failed_updates = 0
create_responses_failures = []
update_responses_failures = []
for training in os.listdir("./validated_eosc_jsons/new_trainings"):
with open(f"./validated_eosc_jsons/{training}") as f:
response = requests.post(url, headers=headers, auth=authentifier, json=f)
if response.status_code != 200:
# append the error message to the responses
create_responses_failures.append(
f"An error occurred: {response.text}", training
)
failed_creations += 1
else:
successful_creations += 1
for training in os.listdir("./validated_eosc_jsons/updated_trainings"):
with open(f"./validated_eosc_jsons/{training}") as f:
response = requests.put(url, headers=headers, auth=authentifier, json=f)
if response.status_code != 200:
# append the error message to the responses
update_responses_failures.append(
f"An error occurred: {response.text}", training
)
failed_updates += 1
else:
successful_updates += 1
response = {
"successful_creations": successful_creations,
"failed_creations": failed_creations,
"create_responses_failures": create_responses_failures,
"successful_updates": successful_updates,
"failed_updates": failed_updates,
"update_responses_failures": update_responses_failures,
}
return response
if __name__ == "__main__":
# get the current training material from the galaxy training material api
setup_topics_folder()
fetch_trainings()
# check if there are new or updated trainings and if so convert them to eosc jsons
training_jsons = compare_training_material()
new_training_eosc_jsons = [
training_to_eosc_json(training_json)
for training_json in training_jsons["new_trainings"]
]
update_training_eosc_jsons = [
training_to_eosc_json(training_json)
for training_json in training_jsons["updated_trainings"]
]
# validate the converted jsons
validated_new_trainings = [
validate_eosc_json(training) for training in new_training_eosc_jsons
]
validated_updated_trainings = [
validate_eosc_json(training) for training in update_training_eosc_jsons
]
# process the validated trainings
process_validated_trainings(validated_new_trainings, validated_updated_trainings)
# TODO - send the valid eosc jsons to the eosc training material api
# headers = {"Content-Type": "application/json"}
# authentifier = ("", "")
# upload_response = upload_training_files(url, headers, authentifier)