From 7fe37c84d8a246cf98e51986eb8e5e0a9ebe2f0f Mon Sep 17 00:00:00 2001 From: Riccardo Orlando Date: Mon, 10 Aug 2020 13:34:28 +0200 Subject: [PATCH] Fix dataset reader --- README.md | 3 ++- setup.py | 2 +- transformer_srl/dataset_readers.py | 12 +++++------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1b78b4c..ec14f9e 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,14 @@ # Semantic Role Lableing with BERT -Semantic Role Labeling based on [AllenNLP implementation](https://demo.allennlp.org/semantic-role-labeling) of [Shi et al, 2019](https://arxiv.org/abs/1904.05255). It uses [VerbAatlas](http://verbatlas.org/) inventory and it's trained also on predicate disambiguation, in addition to arguments identification and disambiguation. +Semantic Role Labeling based on [AllenNLP implementation](https://demo.allennlp.org/semantic-role-labeling) of [Shi et al, 2019](https://arxiv.org/abs/1904.05255). Can be trained using both PropBank and [VerbAatlas](http://verbatlas.org/) inventories and implements also the predicate disambiguation task, in addition to arguments identification and disambiguation. ### To-Dos - [x] Works with both PropBank and VerbAtlas (infer inventory from dataset reader) - [ ] Compatibility with all models from Huggingface's Transformers. - Now works only with models that accept 1 as token type id +- [ ] Predicate identification (without using spacy) ### Infos diff --git a/setup.py b/setup.py index f692c09..ade319d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="transformer_srl", # Replace with your own username - version="2.2rc13", + version="2.2rc14", author="Riccardo Orlando", author_email="orlandoricc@gmail.com", description="SRL Transformer model", diff --git a/transformer_srl/dataset_readers.py b/transformer_srl/dataset_readers.py index e8c4bd1..a9c6a09 100644 --- a/transformer_srl/dataset_readers.py +++ b/transformer_srl/dataset_readers.py @@ -1,5 +1,7 @@ import logging +import logging from typing import Dict, List, Iterable, Tuple, Any +from typing import Dict, Tuple, List from allennlp.common.file_utils import cached_path from allennlp.data.dataset_readers.dataset_reader import DatasetReader @@ -13,15 +15,10 @@ from allennlp.data.tokenizers import Token from allennlp_models.common.ontonotes import Ontonotes, OntonotesSentence from allennlp_models.structured_prediction import SrlReader +from conllu import parse_incr from overrides import overrides from transformers import AutoTokenizer -from typing import Dict, Tuple, List -import logging - -from conllu import parse_incr - - logger = logging.getLogger(__name__) """ @@ -357,7 +354,6 @@ def _convert_tags_to_wordpiece_tags(self, tags: List[str], offsets: List[int]) - return ["O"] + new_tags + ["O"] def _get_predicate_labels(self, sentence, verb_indicator): - frames = [f if v == 1 else "O" for f, v in zip(frame_labels, verb_indicator)] labels = [] for i, v in enumerate(verb_indicator): if v == 1: @@ -367,6 +363,8 @@ def _get_predicate_labels(self, sentence, verb_indicator): else sentence.predicate_framenet_ids[i] ) labels.append(label) + else: + labels.append("O") return labels