-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_text.py
27 lines (19 loc) · 997 Bytes
/
preprocess_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import argparse
import numpy as np
from preprocessor.preprocessor import Preprocessor
parser = argparse.ArgumentParser(description='Extract all features from text and save them.')
parser.add_argument('--use_tfidf', action='store_true',
help='if set, tf-idf embeddings will be calculated')
parser.add_argument('--use_bert', action='store_true',
help='if set, bert embeddings will be calculated')
parser.add_argument('--use_doc2vec', action='store_true',
help='if set, doc2vec embeddings will be calculated')
parser.add_argument('--representation_size', type=int, default=256,
help='the dimension of the embedding vectors')
if __name__ == "__main__":
args = parser.parse_args()
p = Preprocessor(representation_size=args.representation_size,
use_tfidf=args.use_tfidf,
use_bert=args.use_bert,
use_doc2vec=args.use_doc2vec)
p.preprocess()