Skip to content

Commit b1bc507

Browse files
authoredMar 29, 2024··
Merge pull request #15 from Mdperez19/LissL/FirstUnitTests
LissL/FirstUnitTests
2 parents 1a9bc94 + 1fd9b53 commit b1bc507

File tree

7 files changed

+81
-2
lines changed

7 files changed

+81
-2
lines changed
 

‎pytest.ini

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[pytest]
2+
markers=
3+
SentenceTokenizerNlkt
4+
WordTokenizerNlkt
5+
NormalizerPython
6+
testpaths = tests

‎requirements.txt

572 Bytes
Binary file not shown.

‎spellchecker/natural_language_processing_tools/text_preprocessing/normalizer/Normalizer.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@
33

44
class Normalizer(metaclass=ABCMeta):
55
@abstractmethod
6-
def normalize_sentences_tokens(self, sentences_tokens: list) -> list:
6+
def normalize_sentences_tokens(self,
7+
sentences_tokens: list[list[str]]
8+
) -> list[list[str]]:
79
pass

‎spellchecker/natural_language_processing_tools/text_preprocessing/normalizer/NormalizerPython.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ class NormalizerPython(Normalizer):
55
def __init__(self):
66
pass
77

8-
def normalize_sentences_tokens(self, sentences_tokens: list) -> list:
8+
def normalize_sentences_tokens(self,
9+
sentences_tokens: list[list[str]]
10+
) -> list[list[str]]:
911
normalized_sentences_tokens = []
1012
for tokens_by_sentence in sentences_tokens:
1113
normalized_tokens_by_sentence = []
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import pytest
2+
3+
from spellchecker.natural_language_processing_tools.text_preprocessing.normalizer.NormalizerPython import NormalizerPython
4+
5+
class TestNormalizerPython:
6+
7+
@pytest.fixture
8+
def normalizer(self):
9+
return NormalizerPython()
10+
11+
@pytest.mark.NormalizerPython
12+
def test_normalize_sentences_tokens(self,normalizer):
13+
#Given
14+
tokens = [["¿Cómo", "estás&?"],["Bien", "gracias"]]
15+
expected_normalized_tokens = [["cómo","estás"],["bien","gracias"]]
16+
#When
17+
normalized_tokens = normalizer.normalize_sentences_tokens(tokens)
18+
#Then
19+
assert normalized_tokens == expected_normalized_tokens
20+
@pytest.mark.NormalizerPython
21+
def test_build_normalized_token(self, normalizer):
22+
#Given
23+
token="Hola%_)*+áéÍÓú"
24+
expected_normalized_token="holaáéíóú"
25+
#When
26+
normalized_token = normalizer.build_normalized_token(token)
27+
#Then
28+
assert normalized_token == expected_normalized_token
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import pytest
2+
3+
from spellchecker.natural_language_processing_tools.text_preprocessing.tokenizer.sentence.SentenceTokenizerNltk import \
4+
SentenceTokenizerNltk
5+
6+
class TestSentenceTokenizerNlkt:
7+
8+
@pytest.fixture
9+
def sentence_tokenizer(self):
10+
return SentenceTokenizerNltk()
11+
12+
@pytest.mark.SentenceTokenizerNlkt
13+
def test_tokenize_text_by_sentence(self, sentence_tokenizer):
14+
#Given
15+
text = "Esto es una oración de prueba. Esta es otra oración de prueba."
16+
expected_tokens = ["Esto es una oración de prueba.", "Esta es otra oración de prueba."]
17+
#When
18+
tokens = sentence_tokenizer.tokenize_text_by_sentence(text)
19+
#Then
20+
assert len(tokens) == 2
21+
assert isinstance(tokens, list)
22+
assert tokens == expected_tokens
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import pytest
2+
3+
from spellchecker.natural_language_processing_tools.text_preprocessing.tokenizer.word.WordTokenizerNltk import \
4+
WordTokenizerNltk
5+
6+
7+
class TestWordTokenizerNlkt:
8+
9+
@pytest.fixture
10+
def word_tokenizer(self):
11+
return WordTokenizerNltk()
12+
13+
@pytest.mark.WordTokenizerNlkt
14+
def test_tokenize_sentence_by_words(self, word_tokenizer):
15+
sentence = ["Esto es una oración de prueba que debe mostrar 10 tokens",
16+
"Esta es otra oración que tendrá 8 tokens"]
17+
tokens = word_tokenizer.tokenize_sentence_by_words(sentence)
18+
assert len(tokens) == 2
19+
assert isinstance(tokens, list)

0 commit comments

Comments
 (0)
Please sign in to comment.