-
Notifications
You must be signed in to change notification settings - Fork 1
/
names.py
68 lines (57 loc) · 1.76 KB
/
names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import tsv
import codecs
languages = [
'Sindarin',
'Quenya',
'Khuzdul',
'English',
'German',
'French',
'Polish',
]
def language_index(language):
try:
return languages.index(language)
except ValueError:
return len(languages)
# canonical -> sorted list of all name dicts
def names():
entries = {}
file = codecs.open("names.tsv", "r", "utf-8")
for name in tsv.DictReader(file):
entry = entries.setdefault(name[u"Canonical"], [])
entry.append(dict(name))
return dict(
(canonical, list(sorted(names, key=language_index)))
for canonical, names in entries.items()
)
# canonical -> language -> name as known in language
def language_names(debug = False):
entries = {}
for canonical, _names in names().items():
subentries = entries.setdefault(canonical, {})
for name in _names:
for language in languages:
if language in name and name[language]:
if debug and language in subentries:
print 'duplicate', language, 'name for', canonical, repr(subentries[language]["Name"]), repr(name["Name"])
subentries[language] = name
return entries
def language_names_debug():
# but do not return
language_names(debug = True)
def names_to_canonicals():
return dict(names_to_canonicals_iter())
def names_to_canonicals_iter():
for canonical, _names in names().items():
for name in _names:
yield name["Name"], canonical
def main():
import sys
from pprint import pprint
command = sys.argv[1].replace('-', '_')
result = globals()[command](*sys.argv[2:])
if result is not None:
pprint(result)
if __name__ == "__main__":
main()