Skip to content

Commit

Permalink
skip "aux" mwt in french
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Feb 4, 2024
1 parent 755cd55 commit 64bd308
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 17 deletions.
4 changes: 2 additions & 2 deletions batchalign/pipelines/morphosyntax/ud.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,8 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
auxiliaries.append(token.id[0]+1)
elif lang=="fr" and token.text.strip() == "aujourd'":
auxiliaries.append(token.id[-1])
elif lang=="fr" and token.text.strip() == "aux":
auxiliaries.append(token.id[0])
# elif lang=="fr" and token.text.strip() == "aux":
# auxiliaries.append(token.id[0])
elif (lang=="fr" and token.text.strip() == "au" and
type(token.id) == tuple and indx != 0
and sentence.tokens[indx-1].text != "jusqu'"):
Expand Down
6 changes: 3 additions & 3 deletions batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.4.3-beta.0
Jan 30th, 2024
Beta Evaluation Engine
0.4.3-beta.1
Feb 4th, 2024
Fix aux tagging in fr
25 changes: 13 additions & 12 deletions scratchpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,25 @@
from batchalign import *

########### The Batchalign Core Test Harness ###########
from batchalign.formats.chat.parser import chat_parse_utterance
# from batchalign.formats.chat.parser import chat_parse_utterance

text = "Hey yo kids, whacha gonna do?"
# text = "on va jouer aux arbres ?"

function = "morphosyntax,eval"
lang = "eng"
num_speakers = 1
# function = "morphosyntax"
# lang = "fra"
# num_speakers = 1

forms, delim = chat_parse_utterance(text, None, None, None, None)
utterance = Utterance(content=forms, delim=delim)
# forms, delim = chat_parse_utterance(text, None, None, None, None)
# utterance = Utterance(content=forms, delim=delim)

# utterance = utterance(content=text)
# # utterance = utterance(content=text)

ut = Document(content=[utterance], langs=[lang])
# ut = Document(content=[utterance], langs=[lang])

pipeline = BatchalignPipeline.new(function, lang=lang, num_speakers=num_speakers)
res = pipeline(ut, gold=ut)
# pipeline = BatchalignPipeline.new(function, lang=lang, num_speakers=num_speakers)
# res = pipeline(ut)

# print(str(CHATFile(doc=doc)))
# print(str(CHATFile(doc=res)))

########### The Batchalign String Test Harness ###########
# from batchalign.formats.chat.parser import chat_parse_utterance
Expand Down Expand Up @@ -117,6 +117,7 @@
# # .replace(".mp3", ".cha"))

# _dispatch(function, lang, num_speakers, [in_format], Context(),

# in_dir, out_dir,
# loader, writer, Console())

Expand Down

0 comments on commit 64bd308

Please sign in to comment.