-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvector.py
47 lines (39 loc) · 1.15 KB
/
vector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import numpy as np
class Vector():
'''
This class create a vector for each text in our dataset
It also provid a function for calculating tf
'''
def __init__(self, tokens, vocabulary):
self.tokens = tokens
self.vocabulary = vocabulary
self.size = len(tokens)
def __str__(self):
return f"<Vector size={self.size}>"
def __len__(self):
'''
return the size of vector: len(Vector)
'''
return self.size
def tf(self):
'''
encode tokens by frequency
return a frequency array
'''
encode_vec = np.zeros([len(self.vocabulary)], dtype='int16')
join_set = set(self.tokens) & set(self.vocabulary.words)
for token in self.tokens:
if token in join_set:
encode_vec[self.vocabulary.pos(token)] += 1
return encode_vec/(len(join_set) + 1)
def test():
'''
testing the Vector class and its functions
'''
pass
if __name__ == "__main__":
'''
run the test while running this python file
'''
test()