Skip to content

Commit

Permalink
clean up combining diacrictic behavior (closes #7)
Browse files Browse the repository at this point in the history
backslash + diacritic → diacritic
colon + diacritic → colon + backslash + diacritic
  • Loading branch information
mr-martian committed Jun 10, 2023
1 parent 32d969e commit afeae77
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 14 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
AC_PREREQ(2.61)

AC_INIT([lexd], [1.3.3], [awesomeevildudes@gmail.com])
AC_INIT([lexd], [1.3.4], [awesomeevildudes@gmail.com])
AM_INIT_AUTOMAKE
AC_CONFIG_MACRO_DIR([m4])

Expand Down
34 changes: 21 additions & 13 deletions src/lexdcompiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,18 +308,29 @@ LexdCompiler::readTagFilter(char_iter& iter, UnicodeString& line)
return tag_filter_t();
}

void
LexdCompiler::appendSymbol(const UnicodeString& s, lex_token_t& tok)
{
if (shouldCombine) {
tok.symbols.push_back(alphabet_lookup(s));
} else {
for (int c = 0; c < s.length(); c++) {
tok.symbols.push_back(alphabet_lookup(s[c]));
}
}
}

void
LexdCompiler::readSymbol(char_iter& iter, UnicodeString& line, lex_token_t& tok)
{
if (*iter == "\\") {
if (shouldCombine) {
tok.symbols.push_back(alphabet_lookup(*++iter));
if ((*iter).startsWith("\\")) {
if ((*iter).length() == 1) {
appendSymbol(*++iter, tok);
} else {
++iter;
for (int c = 0; c < (*iter).length(); c++) {
tok.symbols.push_back(alphabet_lookup((*iter)[c]));
}
appendSymbol((*iter).tempSubString(1), tok);
}
} else if ((*iter).startsWith(":")) {
appendSymbol((*iter).tempSubString(1), tok);
} else if (*iter == "{" || *iter == "<") {
UChar end = (*iter == "{") ? '}' : '>';
int i = iter.span().first;
Expand All @@ -330,12 +341,8 @@ LexdCompiler::readSymbol(char_iter& iter, UnicodeString& line, lex_token_t& tok)
} else {
die("Multichar symbol didn't end; searching for %S", err(end));
}
} else if (shouldCombine) {
tok.symbols.push_back(alphabet_lookup(*iter));
} else {
for (int c = 0; c < (*iter).length(); c++) {
tok.symbols.push_back(alphabet_lookup((*iter)[c]));
}
appendSymbol(*iter, tok);
}
}

Expand Down Expand Up @@ -554,12 +561,13 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign
--iter;
tags_applied = true;
}
else if(*iter == ":")
else if((*iter).startsWith(":"))
{
if(inleft)
inleft = false;
else
die("Lexicon entry contains multiple colons");
if ((*iter).length() > 1) readSymbol(iter, line, seg.right);
}
else readSymbol(iter, line, (inleft ? seg.left : seg.right));
}
Expand Down
1 change: 1 addition & 0 deletions src/lexdcompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ class LexdCompiler
RepeatMode readModifier(char_iter& iter);
tag_filter_t readTagFilter(char_iter& iter, UnicodeString& line);
tags_t readTags(char_iter& iter, UnicodeString& line);
void appendSymbol(const UnicodeString& s, lex_token_t& tok);
void readSymbol(char_iter& iter, UnicodeString& line, lex_token_t& tok);
int processRegexTokenSeq(char_iter& iter, UnicodeString& line, Transducer* trans, int start_state);
int processRegexGroup(char_iter& iter, UnicodeString& line, Transducer* trans, int start_state, unsigned int depth);
Expand Down
1 change: 1 addition & 0 deletions tests/feature/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ tests = \
anonpat-nospaces \
anonpat-ops \
conflicting-tags \
diacritic \
disjoint-opt \
empty \
empty-patterns \
Expand Down
12 changes: 12 additions & 0 deletions tests/feature/test-diacritic.lexd
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
PATTERNS
X
Y(2)

LEXICON X
:\ֻ
x\ַ

LEXICON Y(2)
a ַ
4 changes: 4 additions & 0 deletions tests/feature/test-diacritic.lexd.txt.strings.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ַ

0 comments on commit afeae77

Please sign in to comment.