-
Notifications
You must be signed in to change notification settings - Fork 0
/
features.py
108 lines (97 loc) · 2.18 KB
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# coding: utf-8
def get_features(info, info_prev):
gr = info.get('gr')
if info_prev is not None:
lex_prev = info_prev.get('lex')
else:
lex_prev = None
features = []
''' aspect '''
if 'ipf' in gr:
aspect = 'ipf'
elif 'pf' in gr:
aspect = 'pf'
else:
aspect = None
features.append(aspect)
''' tense '''
if 'praes' in gr:
tense = 'praes'
elif 'praet' in gr:
tense = 'praet'
elif 'fut' in gr:
tense = 'fut'
else:
tense = None
features.append(tense)
''' person '''
if '1p' in gr:
pers = '1'
elif '2p' in gr:
pers = '2'
elif '3p' in gr:
pers = '3'
else:
pers = None
features.append(pers)
''' number '''
if 'sg' in gr:
num = 'sg'
elif 'pl' in gr:
num = 'pl'
else:
num = None
features.append(num)
''' transitivity '''
if 'tran' in gr:
trans = 'tran'
elif 'intr' in gr:
trans = 'intr'
else:
trans = None
features.append(trans)
''' voice '''
if 'act' in gr:
voice = 'act'
elif 'pass' in gr:
voice = 'pass'
elif 'med' in gr:
voice = 'med'
else:
voice = None
features.append(voice)
''' form '''
if 'inf' in gr and lex_prev != 'быть':
form = 'inf'
elif lex_prev == 'быть':
features[1] = 'fut'
gr_prev = info_prev.get('gr')
if 'sg' in gr_prev:
features[3] = 'sg'
elif 'pl' in gr_prev:
features[3] = 'pl'
if '1p' in gr_prev:
features[2] = '1'
elif '2p' in gr_prev:
features[2] = '2'
elif '3p' in gr_prev:
features[2] = '3'
form = 'fin'
elif 'partcp' in gr:
form = 'partcp'
elif 'ger' in gr:
form = 'ger'
else:
form = 'fin'
features.append(form)
''' mood '''
if 'indic' in gr:
mood = 'indic'
elif 'imper' in gr:
mood = 'imper'
else:
mood = None
features.append(mood)
# to avoid empty cells because of NoneType
features = [str(f) for f in features]
return features