Skip to content

Commit

Permalink
Merge pull request #42 from xiangking/develop
Browse files Browse the repository at this point in the history
bug修复
  • Loading branch information
xiangking authored Mar 26, 2022
2 parents 165d35c + f146fe1 commit f8aff87
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _convert_to_transfomer_ids(self, bert_tokenizer):
continue
global_label[self.cat2id[info_['type']], start_idx+1, end_idx+1] = 1

global_label = torch.tensor(global_label).to_sparse()
global_label = global_label.to_sparse()

features.append({
'input_ids': input_ids,
Expand Down
8 changes: 4 additions & 4 deletions ark_nlp/model/ner/crf_bert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from ark_nlp.factory.optimizer import get_default_crf_bert_optimizer as get_default_model_optimizer
from ark_nlp.factory.optimizer import get_default_crf_bert_optimizer as get_default_crf_bert_optimizer

from ark_nlp.factory.task import BIONERTask as Task
from ark_nlp.factory.task import BIONERTask as CrfBertNERTask
from ark_nlp.factory.task import CRFNERTask as Task
from ark_nlp.factory.task import CRFNERTask as CrfBertNERTask

from ark_nlp.factory.predictor import BIONERPredictor as Predictor
from ark_nlp.factory.predictor import BIONERPredictor as CrfBertNERPredictor
from ark_nlp.factory.predictor import CRFNERPredictor as Predictor
from ark_nlp.factory.predictor import CRFNERPredictor as CrfBertNERPredictor
9 changes: 7 additions & 2 deletions ark_nlp/processor/tokenizer/transfomer.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,13 @@ class TokenTokenizer(TransfomerTokenizer):

def tokenize(self, text, **kwargs):
tokens = []
text = ' '.join([token_ for token_ in text])
tokens = self.vocab.tokenize(text)
for token_ in text:
tokenized_token_ = self.vocab.tokenize(token_)
if tokenized_token_ == []:
tokens.extend([token_])
else:
tokens.extend(tokenized_token_)

return tokens

def sequence_to_ids(self, sequence, **kwargs):
Expand Down

0 comments on commit f8aff87

Please sign in to comment.