Skip to content

Commit

Permalink
Subwords as parameter.
Browse files Browse the repository at this point in the history
  • Loading branch information
mgraffg committed Jun 21, 2024
1 parent 4330385 commit ddf4f3c
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
2 changes: 1 addition & 1 deletion dialectid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__version__ = '0.0.3'
__version__ = '0.0.4'

from dialectid.text_repr import BoW
from dialectid.model import DialectId
15 changes: 12 additions & 3 deletions dialectid/tests/test_text_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,18 @@ def test_bow():
"""Test BoW"""
from b4msa.textmodel import TextModel

bow = BoW(lang='es')
bow = BoW(lang='es', voc_size_exponent=13)
assert isinstance(bow.bow, TextModel)
X = bow.transform(['Buenos dias'])
bow2 = BoW(lang='es', loc='mx')
bow2 = BoW(lang='es', loc='mx', voc_size_exponent=13)
X2 = bow2.transform(['Buenos dias'])
assert (X - X2).sum() != 0
assert (X - X2).sum() != 0


def test_subwords():
"""Test subwords"""

bow = BoW(lang='es', voc_size_exponent=13,
subwords=True)
bow.transform(['Hola'])

4 changes: 4 additions & 0 deletions dialectid/text_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,14 @@ def __init__(self, pretrain: bool=True,
v1: bool=False,
estimator_kwargs: dict=None,
loc: str=None,
subwords: bool=False,
**kwargs):
assert pretrain
assert not v1
self._bow = None
if subwords:
assert loc is None
loc = 'qgrams'
self.loc = loc
if estimator_kwargs is None:
estimator_kwargs = {'dual': True, 'class_weight': 'balanced'}
Expand Down

0 comments on commit ddf4f3c

Please sign in to comment.