Skip to content

Commit

Permalink
Fixing tokenizer.
Browse files Browse the repository at this point in the history
  • Loading branch information
Narsil committed Aug 7, 2024
1 parent 183702c commit 61cfeb7
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions bindings/python/tests/bindings/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,7 @@ def test_decode_special(self):
class TestTokenizerRepr:
def test_repr(self):
tokenizer = Tokenizer(BPE())
out = tokenizer.repr()
print(out)
out = repr(tokenizer)
assert (
out
== 'Tokenizer(version="1.0", truncation=None, padding=None, added_tokens=[], normalizer=None, pre_tokenizer=None, post_processor=None, decoder=None, model=BPE(dropout=None, unk_token=None, continuing_subword_prefix=None, end_of_word_suffix=None, fuse_unk=False, byte_fallback=False, ignore_merges=False, vocab={}, merges=[]))'
Expand All @@ -571,8 +570,7 @@ def test_repr_complete(self):
special_tokens=[("[CLS]", 1), ("[SEP]", 0)],
)
tokenizer.normalizer = Sequence([Lowercase(), Strip()])
out = tokenizer.repr()
print(out)
out = repr(tokenizer)
assert (
out
== 'Tokenizer(version="1.0", truncation=None, padding=None, added_tokens=[], normalizer=Sequence(normalizers=[Lowercase(), Strip(strip_left=True, strip_right=True)]), pre_tokenizer=ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True), post_processor=TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[1], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[0], tokens=["[SEP]"])}), decoder=None, model=BPE(dropout=None, unk_token=None, continuing_subword_prefix=None, end_of_word_suffix=None, fuse_unk=False, byte_fallback=False, ignore_merges=False, vocab={}, merges=[]))'
Expand Down

0 comments on commit 61cfeb7

Please sign in to comment.