-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparsing.py
132 lines (116 loc) · 5.3 KB
/
parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import json
import cv2
import numpy as np
import re
import warnings
from docx2python import docx2python
import win32com.client as win32
from win32com.client import constants
from datetime import datetime
warnings.filterwarnings("ignore")
def extract_json(file_path, output_json_path, output_images_dir):
def treat_value_biopsie(value):
keys_permitted_apart_CIN = ["Cancer", "Normale", "Dystrophie", "Adénocarcinome"]
CIN_List = ["CIN1", "CIN2", "CIN3"]
T = [False] * 3
did_find = False
for i in range(len(CIN_List)):
if CIN_List[i].lower() in value.lower():
did_find = True
T[i] = True
if did_find:
return "".join([CIN_List[i] + " " for i in range(3) if T[i]]).strip().upper()
else:
for key in keys_permitted_apart_CIN:
if key.lower() in value.lower():
return key.upper()
if any(keyword.lower() in value.lower() for keyword in ["MMI", "ECTROPION"]):
return "NORMALE"
return ""
def treat_geno_str(string_0):
if all(keyword in string_0.upper() for keyword in ["NON", "16", "18", "45"]):
return "NON.16.18.45"
elif all(keyword in string_0.upper() for keyword in ["NON", "16", "18"]):
return "NON.16.18"
else:
numbers = ["16", "18", "31", "33", "45", "52", "58"]
result = ".".join([el for el in numbers if el in string_0])
return result
def preprocessing_frottis(frotti):
frottis_class = ["Normale", "ASC-US", "L-SIL", "H-SIL", "ASC-H", "AGC", "Cancer"]
result = " ".join([el.upper() for el in frottis_class if el.upper() in frotti.upper()])
return result
def preprocessing_HPV(HPV):
HPV_list = [
'Positif IHC', 'Positif ARNM', 'Positif HC', 'Positif ARN', 'Positif PERSISTANT',
'Positif HIS', 'Positif', 'Négatif IHC NEG', 'Négatif IIHC', 'Négatif IHC', 'Négatif'
]
if HPV.upper() == 'Positif ARN M'.upper():
return 'Positif ARNM'
for el in HPV_list:
if el.upper() in HPV.upper():
return el.upper()
return ''
def save_as_docx(path, target_path):
word = win32.gencache.EnsureDispatch('Word.Application')
doc = word.Documents.Open(path)
doc.Activate()
word.ActiveDocument.SaveAs(target_path, FileFormat=constants.wdFormatXMLDocument)
doc.Close(False)
def treat_date(date):
date_split = date.split("/")
if len(date_split) == 1:
return date_split[0]
elif len(date_split) == 2:
return f"{date_split[1]}-{date_split[0]}"
elif len(date_split) == 3:
return f"{date_split[2]}-{date_split[1]}-{date_split[0]}"
return ""
# Load DOCX file and convert to temp DOCX format
directory = os.path.dirname(__file__)
target_path = os.path.join(directory, "temp", "temp.docx")
save_as_docx(file_path, target_path)
doc_result = docx2python(target_path)
historical_information = {"Vaccin": False}
output = {
"Frottis": {}, "HPV": {}, "Biopsie": {}, "Erad": {}, "Vaccin": "",
"Age": "40", "date_colposcopie": "2022-11-31"
}
# Extract information
for el in list(doc_result.body):
for el_1 in el:
for el_2 in el_1:
for el_3 in el_2:
if "GARDASIL" in el_3:
historical_information["Vaccin"] = True
# Process keys and motif consultations as per your second file logic
for key in ["Frottis", "HPV", "Biopsie"]:
processed_value = None
if key == "Frottis":
processed_value = preprocessing_frottis(el_3)
elif key == "HPV":
processed_value = preprocessing_HPV(el_3)
elif key == "Biopsie":
processed_value = treat_value_biopsie(el_3)
if processed_value:
output[key][treat_date(historical_information.get("Date", "2022-11-31"))] = processed_value
# Set additional fields for the output JSON
output["Vaccin"] = "oui" if historical_information["Vaccin"] else "non"
if "AGE" in historical_information:
output["Age"] = historical_information["AGE"]
if "Date" in historical_information:
output["date_colposcopie"] = treat_date(historical_information["Date"])
# Save JSON data
with open(output_json_path, 'w') as json_file:
json.dump(output, json_file)
# Process images and save them
image_paths = ["path/to/image1.png", "path/to/image2.png"] # Define paths to images
saved_image_paths = []
for i, image_path in enumerate(image_paths):
image = cv2.imread(image_path)
if image is not None:
save_image_path = os.path.join(output_images_dir, f"image_{i+1}.png")
cv2.imwrite(save_image_path, image)
saved_image_paths.append(save_image_path)
return output, saved_image_paths