-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.py
74 lines (61 loc) · 1.56 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import spacy
import re
import csv
import pandas as pd
import improver
labels = {
'Raw': '',
'Country': '',
'RegionType': '',
'Region': '',
'CountyType': '',
'County': '',
'Included': '',
'SubLocalityType': '',
'SubLocality': '',
'LocalityType': '',
'Locality': '',
'StreetType': '',
'Street': '',
'HousingType': '',
'Housing': '',
'HostelType': '',
'Hostel': '',
'HouseNumberType': '',
'HouseNumber': '',
'HouseNumberAdditionally': '',
'SectionType': '',
'Section': '',
'ApartmentType': '',
'Apartment': '',
'RoomType': '',
'Room': '',
'Sector': '',
'FloorType': '',
'Floor': '',
'PostCode': '',
'Manually': '',
'NotAddress': '',
'Comment': '',
'AdditionalData': ''
}
nlp = spacy.load('models/model-best')
addresses = pd.read_csv('addresses.csv', sep=';', dtype=str, header=None)
with open('report.csv', 'w') as report:
writer = csv.writer(report, delimiter=';')
writer.writerow(labels.keys())
for index, row in addresses.iterrows():
raw = row[0].lower()
address = improver.improve_address(raw)
doc = nlp(address)
print(address)
ent_list=[(ent.text, ent.label_) for ent in doc.ents]
print('Address => ', raw)
print('NLP => ', str(ent_list))
print('****')
for key in labels.keys(): labels[key] = ''
labels['Raw'] = raw
print(ent_list)
for item in ent_list:
labels[item[1]] = item[0]
writer.writerow(labels.values())