-
Notifications
You must be signed in to change notification settings - Fork 3
/
setup.py
68 lines (61 loc) · 2.73 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from setuptools import setup, find_packages
import ontonotes5
long_description = '''
Ontonotes-5-Parsing
===================
A simple parser of the famous Ontonotes 5 dataset
https://catalog.ldc.upenn.edu/LDC2013T19
This dataset is very useful for experiments with NER, i.e. Named Entity
Recognition. Besides, Ontonotes 5 includes three languages (English,
Arabic, and Chinese), and this fact increases interest to use it in
experiments with multi-lingual NER. But the source format of Ontonotes 5
is very intricate, in my view. Conformably, the goal of this project is
the creation of a special parser to transform Ontonotes 5 into a simple
JSON format. In this format, each annotated sentence is represented as
a dictionary with five keys: text, morphology, syntax, entities, and
language. In their's turn, morphology, syntax, and entities are
specified as dictionaries too, where each dictionary describes labels
(part-of-speech labels, syntactical tags, or entity classes) and their
bounds in the corresponded text.
You can read more detailed information about this Ontonotes 5 parser
in the small documentation https://github.com/nsu-ai/ontonotes-5-parsing/blob/master/readme.md
'''
setup(
name='ontonotes-5-parsing',
version=ontonotes5.__version__,
packages=find_packages(exclude=['tests']),
include_package_data=True,
description='Ontonotes-5-parsing: parser of Ontonotes 5.0 '
'to transform this corpus to a simple JSON format.',
long_description=long_description,
url='https://github.com/nsu-ai/ontonotes-5-parsing',
author='Ivan Bondarenko',
author_email='i.bondarenko@g.nsu.ru',
license='Apache License Version 2.0',
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Science/Research',
'Intended Audience :: Developers',
'Topic :: Software Development',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Text Processing',
'Topic :: Text Processing :: Linguistic',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
],
keywords=['ontonotes', 'ontonotes5', 'ontonotes-5', 'ner', 'nlp',
'multi-lingual', 'named-entity-recognizer',
'named-entity-recognition'],
install_requires=['tqdm>=4.43.0', 'numpy>=1.18.1', 'nltk>=3.5',
'python-Levenshtein>=0.12.0'],
test_suite='tests',
entry_points={
'console_scripts': [
'ontonotes5_to_json = ontonotes5_to_json:main',
'reduce_entities = reduce_entities:main',
'show_statistics = show_statistics:main'
]
}
)