-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlemmatizer.py
43 lines (34 loc) · 956 Bytes
/
lemmatizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import nltk
## install wordnet if not exist
try:
from nltk.corpus import wordnet
except:
nltk.download('wordnet')
from nltk.corpus import wordnet
class Lemmatizer():
'''
The Lemmatizer class using wordnet dataset build a Lemmatizer
'''
def __init__(self):
pass
def lemmatize(self, word, pos=None):
'''
The lemmatize function take word and its pos as input
output the result from wordnet.morphy(word, pos) if exist
otherwise return the input word
'''
lemmas = wordnet.morphy(word, pos)
if lemmas:
return lemmas
else:
return word
def test():
'''
function for testing
'''
test_word_list = ['cats', 'running']
lemmatizer = Lemmatizer()
for word in test_word_list:
print(f'{word} : {lemmatizer.lemmatize(word)}')
if __name__ == "__main__":
test()