-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
85 lines (64 loc) · 2.63 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import argparse
from utils.message_retriever import retrieve_messages_list
from utils.json_exporter import export_to_json
from utils.extract_messages_infos import extract_messages_info
from utils.anonymizer import anonymize_messages_object
from utils.thread_ids_importer import import_thread_ids
from utils.messages_exporter import export_messages
from dotenv import load_dotenv
load_dotenv()
def main(model):
# Create a list of thread ids to extract messages and to anonymize
thread_ids = []
# Load the thread ids from a stored db
thread_ids = import_thread_ids()
# List of raw threads
raw_threads = []
# List of anonymized threads
anonymized_threads = []
if thread_ids:
print("Thread IDs loaded successfully.")
for thread_id in thread_ids:
print(f"Thread ID: {thread_id}")
# Get the raw messages object from openAI
messages_list_raw = retrieve_messages_list(thread_id)
if messages_list_raw:
# Prefilter the relevant information from the thread messages object
messages_dict = extract_messages_info(messages_list_raw)
# Write the raw messages object to a file -> needs to be removed later as we do not want to store this info
raw_threads.append(messages_dict)
# Anonymize the messages object and write anonymized output to a file for storage
# Use the 'model' parameter here
messages_dict_anonymized = anonymize_messages_object(
messages_dict, method=model
)
anonymized_threads.append(messages_dict_anonymized)
else:
print("No messages found or an error occurred.")
else:
print("No thread IDs found.")
if raw_threads:
export_to_json(raw_threads, "raw.json")
else:
print("No raw threads to export.")
if anonymized_threads:
export_to_json(anonymized_threads, "anonymized.json")
# Write the anonymized data to the database
export_messages(anonymized_threads)
else:
print("No anonymized threads to export.")
if __name__ == "__main__":
# Set up command-line argument parsing
parser = argparse.ArgumentParser(
description="Anonymize messages using a specified model."
)
parser.add_argument(
"--model",
type=str,
choices=["huggingface", "spacy"],
required=True,
help="Specify the anonymization model to use ('huggingface' or 'spacy').",
)
args = parser.parse_args()
# Run the main function with the selected model
main(args.model)