Skip to content

Commit

Permalink
redo internals of State
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-martian committed Aug 29, 2024
1 parent 47e4bf9 commit a67d1e0
Show file tree
Hide file tree
Showing 6 changed files with 518 additions and 8 deletions.
2 changes: 2 additions & 0 deletions lttoolbox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ set(LIBLTTOOLBOX_HEADERS
node.h
pattern_list.h
regexp_compiler.h
reusable_state.h
serialiser.h
sorted_vector.h
sorted_vector.hpp
Expand Down Expand Up @@ -53,6 +54,7 @@ set(LIBLTTOOLBOX_SOURCES
node.cc
pattern_list.cc
regexp_compiler.cc
reusable_state.cc
sorted_vector.cc
state.cc
string_utils.cc
Expand Down
30 changes: 24 additions & 6 deletions lttoolbox/fst_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,20 @@ FSTProcessor::filterFinals(const State& state, UStringView casefrom)
uppercase, firstupper, 0);
}

UString
FSTProcessor::filterFinals(const ReusableState& state, UStringView casefrom)
{
bool firstupper = false, uppercase = false;
if (!dictionaryCase) {
firstupper = u_isupper(casefrom[0]);
uppercase = (casefrom.size() > 1 &&
firstupper && u_isupper(casefrom[casefrom.size()-1]));
}
return state.filterFinals(all_finals, alphabet, escaped_chars,
displayWeightsMode, maxAnalyses, maxWeightClasses,
uppercase, firstupper, 0);
}

void
FSTProcessor::writeEscaped(UStringView str, UFILE *output)
{
Expand Down Expand Up @@ -886,7 +900,9 @@ FSTProcessor::analysis(InputFile& input, UFILE *output)
bool last_incond = false;
bool last_postblank = false;
bool last_preblank = false;
State current_state = initial_state;
//State current_state = initial_state;
ReusableState current_state;
current_state.init(&root);
UString lf; // analysis (lexical form and tags)
UString sf; // surface form
UString lf_spcmp; // space compound analysis
Expand Down Expand Up @@ -1141,7 +1157,7 @@ FSTProcessor::analysis(InputFile& input, UFILE *output)
}
}

current_state = initial_state;
current_state.init(&root);
lf.clear();
sf.clear();
last_start = input_buffer.getPos();
Expand Down Expand Up @@ -1343,7 +1359,8 @@ FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode)
generation_wrapper_null_flush(input, output, mode);
}

State current_state = initial_state;
ReusableState current_state;
current_state.init(&root);
UString sf;

outOfWord = false;
Expand Down Expand Up @@ -1468,7 +1485,7 @@ FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode)
}
}

current_state = initial_state;
current_state.init(&root);
sf.clear();
}
else if(u_isspace(val) && sf.size() == 0)
Expand Down Expand Up @@ -1525,7 +1542,8 @@ FSTProcessor::transliteration(InputFile& input, UFILE *output)
size_t cur_word = 0;
size_t cur_pos = 0;
size_t match_pos = 0;
State current_state = initial_state;
ReusableState current_state;
current_state.init(&root);
UString last_match;
int space_diff = 0;

Expand Down Expand Up @@ -1705,7 +1723,7 @@ FSTProcessor::transliteration(InputFile& input, UFILE *output)
firstupper = false;
have_first = false;
have_second = false;
current_state = initial_state;
current_state.init(&root);
}
}
}
Expand Down
11 changes: 9 additions & 2 deletions lttoolbox/fst_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <lttoolbox/buffer.h>
#include <lttoolbox/my_stdio.h>
#include <lttoolbox/state.h>
#include <lttoolbox/reusable_state.h>
#include <lttoolbox/trans_exe.h>
#include <lttoolbox/input_file.h>
#include <libxml/xmlreader.h>
Expand Down Expand Up @@ -328,6 +329,7 @@ class FSTProcessor
* Assumes that casefrom is non-empty
*/
UString filterFinals(const State& state, UStringView casefrom);
UString filterFinals(const ReusableState& state, UStringView casefrom);

/**
* Write a string to an output stream,
Expand Down Expand Up @@ -450,11 +452,11 @@ class FSTProcessor
*
* @return running with --case-sensitive or state size exceeds max
*/
bool beCaseSensitive(const State& state) {
bool beCaseSensitive(size_t size) {
if(caseSensitive) {
return true;
}
else if(state.size() < max_case_insensitive_state_size) {
else if(size < max_case_insensitive_state_size) {
return false; // ie. do case-folding
}
else {
Expand All @@ -467,6 +469,11 @@ class FSTProcessor
}
}

bool beCaseSensitive(const State& s) { return beCaseSensitive(s.size()); }
bool beCaseSensitive(const ReusableState& s) {
return beCaseSensitive(s.size());
}

public:

/*
Expand Down
3 changes: 3 additions & 0 deletions lttoolbox/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <map>

class State;
class ReusableState;
class Node;


Expand All @@ -35,6 +36,7 @@ class Dest
double *out_weight;

friend class State;
friend class ReusableState;
friend class Node;

void copy(Dest const &d)
Expand Down Expand Up @@ -112,6 +114,7 @@ class Node
{
private:
friend class State;
friend class ReusableState;

/**
* The outgoing transitions of this node.
Expand Down
Loading

0 comments on commit a67d1e0

Please sign in to comment.