From 64bd308dbcc8afc5568db68fc9adbedaf4dd6439 Mon Sep 17 00:00:00 2001 From: Houjun Liu Date: Sun, 4 Feb 2024 14:44:59 -0800 Subject: [PATCH] skip "aux" mwt in french --- batchalign/pipelines/morphosyntax/ud.py | 4 ++-- batchalign/version | 6 +++--- scratchpad.py | 25 +++++++++++++------------ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/batchalign/pipelines/morphosyntax/ud.py b/batchalign/pipelines/morphosyntax/ud.py index 1086266..80d68aa 100644 --- a/batchalign/pipelines/morphosyntax/ud.py +++ b/batchalign/pipelines/morphosyntax/ud.py @@ -349,8 +349,8 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$" auxiliaries.append(token.id[0]+1) elif lang=="fr" and token.text.strip() == "aujourd'": auxiliaries.append(token.id[-1]) - elif lang=="fr" and token.text.strip() == "aux": - auxiliaries.append(token.id[0]) + # elif lang=="fr" and token.text.strip() == "aux": + # auxiliaries.append(token.id[0]) elif (lang=="fr" and token.text.strip() == "au" and type(token.id) == tuple and indx != 0 and sentence.tokens[indx-1].text != "jusqu'"): diff --git a/batchalign/version b/batchalign/version index b5dd43e..1df925a 100644 --- a/batchalign/version +++ b/batchalign/version @@ -1,3 +1,3 @@ -0.4.3-beta.0 -Jan 30th, 2024 -Beta Evaluation Engine \ No newline at end of file +0.4.3-beta.1 +Feb 4th, 2024 +Fix aux tagging in fr \ No newline at end of file diff --git a/scratchpad.py b/scratchpad.py index f85da54..0aaa6c5 100644 --- a/scratchpad.py +++ b/scratchpad.py @@ -18,25 +18,25 @@ from batchalign import * ########### The Batchalign Core Test Harness ########### -from batchalign.formats.chat.parser import chat_parse_utterance +# from batchalign.formats.chat.parser import chat_parse_utterance -text = "Hey yo kids, whacha gonna do?" +# text = "on va jouer aux arbres ?" -function = "morphosyntax,eval" -lang = "eng" -num_speakers = 1 +# function = "morphosyntax" +# lang = "fra" +# num_speakers = 1 -forms, delim = chat_parse_utterance(text, None, None, None, None) -utterance = Utterance(content=forms, delim=delim) +# forms, delim = chat_parse_utterance(text, None, None, None, None) +# utterance = Utterance(content=forms, delim=delim) -# utterance = utterance(content=text) +# # utterance = utterance(content=text) -ut = Document(content=[utterance], langs=[lang]) +# ut = Document(content=[utterance], langs=[lang]) -pipeline = BatchalignPipeline.new(function, lang=lang, num_speakers=num_speakers) -res = pipeline(ut, gold=ut) +# pipeline = BatchalignPipeline.new(function, lang=lang, num_speakers=num_speakers) +# res = pipeline(ut) -# print(str(CHATFile(doc=doc))) +# print(str(CHATFile(doc=res))) ########### The Batchalign String Test Harness ########### # from batchalign.formats.chat.parser import chat_parse_utterance @@ -117,6 +117,7 @@ # # .replace(".mp3", ".cha")) # _dispatch(function, lang, num_speakers, [in_format], Context(), + # in_dir, out_dir, # loader, writer, Console())