From ec2c68f3bdf81e76899c6dec5ab04b983ae75dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Wed, 27 Mar 2024 18:42:38 -0500 Subject: [PATCH 1/7] Move file-dictionary-private data for file dictionary A rather unimportant code cleanup. Just general housekeeping. --- link-grammar/dict-common/dict-common.h | 13 +- link-grammar/dict-file/dict-file.h | 27 +++ link-grammar/dict-file/dictionary.c | 7 +- link-grammar/dict-file/read-dict.c | 250 ++++++++++++------------- 4 files changed, 158 insertions(+), 139 deletions(-) create mode 100644 link-grammar/dict-file/dict-file.h diff --git a/link-grammar/dict-common/dict-common.h b/link-grammar/dict-common/dict-common.h index d4d58d2c78..502a064df6 100644 --- a/link-grammar/dict-common/dict-common.h +++ b/link-grammar/dict-common/dict-common.h @@ -177,6 +177,7 @@ struct Dictionary_s #ifdef HAVE_ATOMESE void * as_server; /* cogserver connection */ #endif + void * file_data; /* file-back dictionary */ void (*insert_entry)(Dictionary, Dict_node *, int); @@ -205,19 +206,9 @@ struct Dictionary_s Category * category; /* Word lists - indexed by category number */ bool generate_walls; /* Generate walls too for wildcard words */ - /* Private data elements that come in play only while file-backed - * dictionaries are being read, and are not otherwise used. - * TODO: These should be moved to a private structure, accessible - * only to the file backend. - */ - const char * input; - const char * pin; - bool recursive_error; - bool is_special; - int already_got_it; /* For char, but needs to hold EOF */ + /* File I/O cruft */ int line_number; char current_idiom[IDIOM_LINK_SZ]; - char token[MAX_TOKEN_LENGTH]; }; bool is_stem(const char *); diff --git a/link-grammar/dict-file/dict-file.h b/link-grammar/dict-file/dict-file.h new file mode 100644 index 0000000000..664af711a2 --- /dev/null +++ b/link-grammar/dict-file/dict-file.h @@ -0,0 +1,27 @@ +/*************************************************************************/ +/* Copyright (c) 2004 */ +/* Daniel Sleator, David Temperley, and John Lafferty */ +/* Copyright (c) 2013, 2014 Linas Vepstas */ +/* All rights reserved */ +/* */ +/* Use of the link grammar parsing system is subject to the terms of the */ +/* license set forth in the LICENSE file included with this software. */ +/* This license allows free redistribution and use in source and binary */ +/* forms, with or without modification, subject to certain conditions. */ +/* */ +/*************************************************************************/ + +/* Private state, used only while file-backed dictionaries are being + * read. Invalid soon after read is complete. + */ +struct File_Dictionary_s +{ + const char * input; + const char * pin; + bool recursive_error; + bool is_special; + int already_got_it; /* For char, but needs to hold EOF */ + char token[MAX_TOKEN_LENGTH]; +}; + +typedef struct File_Dictionary_s * File_Dictionary; diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 120b7a66b3..920e2ec9fc 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -24,6 +24,7 @@ #include "dict-common/file-utils.h" #include "dict-common/idiom.h" #include "dict-common/regex-morph.h" +#include "dict-file/dict-file.h" #include "dict-ram/dict-ram.h" #include "post-process/pp_knowledge.h" #include "read-dialect.h" @@ -228,9 +229,11 @@ dictionary_six_str(const char * lang, /*align*/false, /*exact*/false); /* Read dictionary from the input string. */ + File_Dictionary fdict = malloc(sizeof(struct File_Dictionary_s)); + dict->file_data = fdict; - dict->input = input; - dict->pin = dict->input; + fdict->input = input; + fdict->pin = fdict->input; if (!read_dictionary(dict)) { goto failure; diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index 2daaa261fa..0301202b12 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -21,6 +21,7 @@ #include "dict-common/dict-utils.h" // patch_subscript #include "dict-common/file-utils.h" #include "dict-common/idiom.h" +#include "dict-file/dict-file.h" #include "dict-ram/dict-ram.h" #include "error.h" #include "externs.h" @@ -125,11 +126,6 @@ static bool link_advance(Dictionary dict); void dict_error2(Dictionary dict, const char * s, const char *s2) { -#define ERRBUFLEN 1024 - char tokens[ERRBUFLEN], t[ERRBUFLEN]; - int pos = 1; - int i; - if (IS_DYNAMIC_DICT(dict)) { if (s2) @@ -145,34 +141,39 @@ void dict_error2(Dictionary dict, const char * s, const char *s2) return; } + File_Dictionary fdict = dict->file_data; + /* The link_advance used to print the error message can * throw more errors while printing... */ - if (dict->recursive_error) return; - dict->recursive_error = true; + if (fdict->recursive_error) return; + fdict->recursive_error = true; char token[MAX_TOKEN_LENGTH]; - strcpy(token, dict->token); - bool save_is_special = dict->is_special; - const char * save_input = dict->input; - const char * save_pin = dict->pin; - int save_already_got_it = dict->already_got_it; + strcpy(token, fdict->token); + bool save_is_special = fdict->is_special; + const char * save_input = fdict->input; + const char * save_pin = fdict->pin; + int save_already_got_it = fdict->already_got_it; int save_line_number = dict->line_number; +#define ERRBUFLEN 1024 + char tokens[ERRBUFLEN], t[ERRBUFLEN]; + int pos = 1; tokens[0] = '\0'; - for (i=0; i<5 && dict->token[0] != '\0'; i++) + for (int i=0; i<5 && fdict->token[0] != '\0'; i++) { - pos += snprintf(t, ERRBUFLEN, "\"%s\" ", dict->token); + pos += snprintf(t, ERRBUFLEN, "\"%s\" ", fdict->token); strncat(tokens, t, ERRBUFLEN-1-pos); if (!link_advance(dict)) break; } tokens[pos] = '\0'; - strcpy(dict->token, token); - dict->is_special = save_is_special; - dict->input = save_input; - dict->pin = save_pin; - dict->already_got_it = save_already_got_it; - dict->line_number = save_line_number; + strcpy(fdict->token, token); + fdict->is_special = save_is_special; + fdict->input = save_input; + fdict->pin = save_pin; + fdict->already_got_it = save_already_got_it; + dict->line_number = save_line_number; if (s2) { @@ -186,7 +187,7 @@ void dict_error2(Dictionary dict, const char * s, const char *s2) "%s\n\t Line %d, next tokens: %s\n", dict->name, s, dict->line_number, tokens); } - dict->recursive_error = false; + fdict->recursive_error = false; } static void dict_error(Dictionary dict, const char * s) @@ -196,9 +197,10 @@ static void dict_error(Dictionary dict, const char * s) static void warning(Dictionary dict, const char * s) { + File_Dictionary fdict = dict->file_data; prt_error("Warning: %s\n" "\tline %d, current token = \"%s\"\n", - s, dict->line_number, dict->token); + s, dict->line_number, fdict->token); } /** @@ -210,16 +212,17 @@ static void warning(Dictionary dict, const char * s) typedef char utf8char[MAXUTFLEN]; static bool get_character(Dictionary dict, int quote_mode, utf8char uc) { - int i = 0; + File_Dictionary fdict = dict->file_data; + int i = 0; while (1) { - char c = *(dict->pin++); + char c = *(fdict->pin++); /* Skip over all comments */ if ((c == '%') && (!quote_mode)) { - while ((c != 0x0) && (c != '\n')) c = *(dict->pin++); + while ((c != 0x0) && (c != '\n')) c = *(fdict->pin++); if (c == 0x0) break; dict->line_number++; continue; @@ -241,11 +244,11 @@ static bool get_character(Dictionary dict, int quote_mode, utf8char uc) i = 1; while (i < MAXUTFLEN-1) { - c = *(dict->pin++); + c = *(fdict->pin++); /* If we're onto the next char, we're done. */ if (((c & 0x80) == 0x0) || ((c & 0xc0) == 0xc0)) { - dict->pin--; + fdict->pin--; uc[i] = 0x0; return true; } @@ -291,25 +294,25 @@ static bool char_is_special(char c) NO_SAN_DICT static bool link_advance(Dictionary dict) { - utf8char c; - int nr, i; + File_Dictionary fdict = dict->file_data; bool quote_mode = false; - dict->is_special = false; + fdict->is_special = false; - if (dict->already_got_it != '\0') + if (fdict->already_got_it != '\0') { - dict->is_special = char_is_special(dict->already_got_it); - if (dict->already_got_it == EOF) { - dict->token[0] = '\0'; + fdict->is_special = char_is_special(fdict->already_got_it); + if (fdict->already_got_it == EOF) { + fdict->token[0] = '\0'; } else { - dict->token[0] = (char)dict->already_got_it; /* specials are one byte */ - dict->token[1] = '\0'; + fdict->token[0] = (char)fdict->already_got_it; /* specials are one byte */ + fdict->token[1] = '\0'; } - dict->already_got_it = '\0'; + fdict->already_got_it = '\0'; return true; } + utf8char c; do { bool ok = get_character(dict, false, c); @@ -317,7 +320,7 @@ static bool link_advance(Dictionary dict) } while (lg_isspace((unsigned char)c[0])); - i = 0; + int i = 0; for (;;) { if (i > MAX_TOKEN_LENGTH-3) { @@ -337,13 +340,12 @@ static bool link_advance(Dictionary dict) * when a quoted string is used with a #define statement. */ if (quote_mode) { - if (c[0] == '"' && /* Check the next character too, to allow " in words */ - (*dict->pin == ':' || *dict->pin == ';' || - lg_isspace((unsigned char)*dict->pin))) { + (*fdict->pin == ':' || *fdict->pin == ';' || + lg_isspace((unsigned char)*fdict->pin))) { - dict->token[i] = '\0'; + fdict->token[i] = '\0'; return true; } @@ -354,36 +356,36 @@ static bool link_advance(Dictionary dict) } /* Copy all of the UTF8 bytes. */ - nr = 0; - while (c[nr]) {dict->token[i] = c[nr]; i++; nr++; } + int nr = 0; + while (c[nr]) {fdict->token[i] = c[nr]; i++; nr++; } } else { if ('\0' == c[1] && char_is_special(c[0])) { if (i == 0) { - dict->token[0] = c[0]; /* special toks are one char always */ - dict->token[1] = '\0'; - dict->is_special = true; + fdict->token[0] = c[0]; /* special toks are one char always */ + fdict->token[1] = '\0'; + fdict->is_special = true; return true; } - dict->token[i] = '\0'; - dict->already_got_it = c[0]; + fdict->token[i] = '\0'; + fdict->already_got_it = c[0]; return true; } if (c[0] == 0x0) { - if (i != 0) dict->already_got_it = '\0'; - dict->token[0] = '\0'; + if (i != 0) fdict->already_got_it = '\0'; + fdict->token[0] = '\0'; return true; } if (lg_isspace((unsigned char)c[0])) { - dict->token[i] = '\0'; + fdict->token[i] = '\0'; return true; } if (c[0] == '\"') { quote_mode = true; } else { - nr = 0; - while (c[nr]) {dict->token[i] = c[nr]; i++; nr++; } + int nr = 0; + while (c[nr]) {fdict->token[i] = c[nr]; i++; nr++; } } } bool ok = get_character(dict, quote_mode, c); @@ -397,9 +399,10 @@ static bool link_advance(Dictionary dict) */ static int is_equal(Dictionary dict, char c) { - return (dict->is_special && - c == dict->token[0] && - dict->token[1] == '\0'); + File_Dictionary fdict = dict->file_data; + return (fdict->is_special && + c == fdict->token[0] && + fdict->token[1] == '\0'); } /** @@ -458,18 +461,19 @@ static bool check_connector(Dictionary dict, const char * s) */ static Exp * make_dir_connector(Dictionary dict, int i) { + File_Dictionary fdict = dict->file_data; char *constring; bool multi = false; - char dir = dict->token[i]; - dict->token[i] = '\0'; /* get rid of the + or - */ - if (dict->token[0] == '@') + char dir = fdict->token[i]; + fdict->token[i] = '\0'; /* get rid of the + or - */ + if (fdict->token[0] == '@') { - constring = dict->token+1; + constring = fdict->token+1; multi = true; } else - constring = dict->token; + constring = fdict->token; return make_connector_node(dict, dict->Exp_pool, constring, dir, multi); @@ -506,30 +510,29 @@ static unsigned int exptag_macro_add(Dictionary dict, const char *tag) */ static Exp * make_connector(Dictionary dict) { + File_Dictionary fdict = dict->file_data; Exp * n; - Dict_node *dn; - int i; - i = strlen(dict->token) - 1; /* this must be +, - or $ if a connector */ - if ((dict->token[i] != '+') && - (dict->token[i] != '-') && - (dict->token[i] != ANY_DIR)) + int i = strlen(fdict->token) - 1; /* this must be +, - or $ if a connector */ + if ((fdict->token[i] != '+') && + (fdict->token[i] != '-') && + (fdict->token[i] != ANY_DIR)) { /* If we are here, token is a word */ - patch_subscript(dict->token); - dn = strict_lookup_list(dict, dict->token); + patch_subscript(fdict->token); + Dict_node * dn = strict_lookup_list(dict, fdict->token); if (dn == NULL) { dict_error2(dict, "Perhaps missing + or - in a connector.\n" "Or perhaps you forgot the subscript on a word.\n" "Or perhaps the word is used before it is defined:", - dict->token); + fdict->token); return NULL; } if (dn->right != NULL) { dict_node_free_list(dn); - dict_error2(dict, "Referencing a duplicate word:", dict->token); + dict_error2(dict, "Referencing a duplicate word:", fdict->token); /* Note: A word which becomes duplicate latter evades this check. */ return NULL; } @@ -544,25 +547,25 @@ static Exp * make_connector(Dictionary dict) else { /* If we are here, token is a connector */ - if (!check_connector(dict, dict->token)) + if (!check_connector(dict, fdict->token)) { return NULL; } - if ((dict->token[i] == '+') || (dict->token[i] == '-')) + if ((fdict->token[i] == '+') || (fdict->token[i] == '-')) { /* A simple, unidirectional connector. Just make that. */ n = make_dir_connector(dict, i); if (NULL == n) return NULL; } - else if (dict->token[i] == ANY_DIR) + else if (fdict->token[i] == ANY_DIR) { Exp *plu, *min; /* If we are here, then it's a bi-directional connector. * Make both a + and a - version, and or them together. */ - dict->token[i] = '+'; + fdict->token[i] = '+'; plu = make_dir_connector(dict, i); if (NULL == plu) return NULL; - dict->token[i] = '-'; + fdict->token[i] = '-'; min = make_dir_connector(dict, i); if (NULL == min) return NULL; @@ -607,6 +610,7 @@ static bool is_number(const char * str) */ static Exp *make_expression(Dictionary dict) { + File_Dictionary fdict = dict->file_data; Exp *nl = NULL; Exp *e_head = NULL; Exp *e_tail = NULL; /* last part of the expression */ @@ -672,11 +676,11 @@ static Exp *make_expression(Dictionary dict) * is used as an expression tag. Else, the cost of a * square bracket is 1.0. */ - if (is_number(dict->token)) + if (is_number(fdict->token)) { float cost; - if (strtofC(dict->token, &cost)) + if (strtofC(fdict->token, &cost)) { nl->cost += cost; } @@ -689,11 +693,11 @@ static Exp *make_expression(Dictionary dict) return NULL; } } - else if ((strcmp(dict->token, "or") != 0) && - (strcmp(dict->token, "and") != 0) && - isalpha((unsigned char)dict->token[0])) + else if ((strcmp(fdict->token, "or") != 0) && + (strcmp(fdict->token, "and") != 0) && + isalpha((unsigned char)fdict->token[0])) { - const char *bad = valid_dialect_name(dict->token); + const char *bad = valid_dialect_name(fdict->token); if (bad != NULL) { char badchar[] = { *bad, '\0' }; @@ -705,7 +709,7 @@ static Exp *make_expression(Dictionary dict) { nl = make_unary_node(dict->Exp_pool, nl); } - nl->tag_id = exptag_dialect_add(dict, dict->token); + nl->tag_id = exptag_dialect_add(dict, fdict->token); nl->tag_type = Exptag_dialect; if (!link_advance(dict)) { return NULL; @@ -716,7 +720,7 @@ static Exp *make_expression(Dictionary dict) nl->cost += 1.0F; } } - else if (!dict->is_special) + else if (!fdict->is_special) { nl = make_connector(dict); if (nl == NULL) { @@ -763,17 +767,17 @@ static Exp *make_expression(Dictionary dict) Exp_type op; /* Non-commuting AND */ - if (is_equal(dict, '&') || (strcmp(dict->token, "and") == 0)) + if (is_equal(dict, '&') || (strcmp(fdict->token, "and") == 0)) { op = AND_type; } /* Commuting OR */ - else if (is_equal(dict, '|') || (strcmp(dict->token, "or") == 0)) + else if (is_equal(dict, '|') || (strcmp(fdict->token, "or") == 0)) { op = OR_type; } /* Commuting AND */ - else if (is_equal(dict, SYM_AND) || (strcmp(dict->token, "sym") == 0)) + else if (is_equal(dict, SYM_AND) || (strcmp(fdict->token, "sym") == 0)) { /* Part 1/2 of SYM_AND processing */ op = AND_type; /* allow mixing with ordinary ands at the same level */ @@ -942,14 +946,12 @@ void insert_list(Dictionary dict, Dict_node * p, int l) */ static bool read_entry(Dictionary dict) { - Exp *n; - int i; - + File_Dictionary fdict = dict->file_data; Dict_node *dnx, *dn = NULL; while (!is_equal(dict, ':')) { - if (dict->is_special) + if (fdict->is_special) { dict_error(dict, "I expected a word but didn\'t get it."); goto syntax_error; @@ -959,19 +961,19 @@ static bool read_entry(Dictionary dict) /* However, be careful to reject "/.v" which is the division symbol * used in equations (.v means verb-like). Also reject an affix regex * specification (may appear only in the affix file). */ - if ((dict->token[0] == '/') && - (dict->token[1] != '.') && (get_affix_regex_cg(dict->token) < 0)) + if ((fdict->token[0] == '/') && + (fdict->token[1] != '.') && (get_affix_regex_cg(fdict->token) < 0)) { - Dict_node *new_dn = read_word_file(dict, dn, dict->token); + Dict_node *new_dn = read_word_file(dict, dn, fdict->token); if (new_dn == NULL) { - prt_error("Error: Cannot open word file \"%s\".\n", dict->token); + prt_error("Error: Cannot open word file \"%s\".\n", fdict->token); goto syntax_error; /* not a syntax error, but need to free dn */ } dn = new_dn; } - else if (0 == strcmp(dict->token, "#include")) + else if (0 == strcmp(fdict->token, "#include")) { bool rc; char* instr; @@ -986,13 +988,13 @@ static bool read_entry(Dictionary dict) if (!link_advance(dict)) goto syntax_error; - skip_slash = ('/' == dict->token[0]) ? 1 : 0; - dict_name = strdupa(dict->token); + skip_slash = ('/' == fdict->token[0]) ? 1 : 0; + dict_name = strdupa(fdict->token); save_name = dict->name; - save_is_special = dict->is_special; - save_input = dict->input; - save_pin = dict->pin; - save_already_got_it = dict->already_got_it; + save_is_special = fdict->is_special; + save_input = fdict->input; + save_pin = fdict->pin; + save_already_got_it = fdict->already_got_it; save_line_number = dict->line_number; /* OK, token contains the filename to read ... */ @@ -1004,8 +1006,8 @@ static bool read_entry(Dictionary dict) dict->name, dict->line_number-1, dict_name); goto syntax_error; } - dict->input = instr; - dict->pin = dict->input; + fdict->input = instr; + fdict->pin = fdict->input; /* The line number and dict name are used for error reporting */ dict->line_number = 1; @@ -1014,12 +1016,12 @@ static bool read_entry(Dictionary dict) /* Now read the thing in. */ rc = read_dictionary(dict); - dict->name = save_name; - dict->is_special = save_is_special; - dict->input = save_input; - dict->pin = save_pin; - dict->already_got_it = save_already_got_it; - dict->line_number = save_line_number; + dict->name = save_name; + fdict->is_special = save_is_special; + fdict->input = save_input; + fdict->pin = save_pin; + fdict->already_got_it = save_already_got_it; + dict->line_number = save_line_number; free_file_contents(instr); if (!rc) goto syntax_error; @@ -1028,21 +1030,21 @@ static bool read_entry(Dictionary dict) if (!link_advance(dict)) goto syntax_error; /* If a semicolon follows the include, that's OK... ignore it. */ - if (';' == dict->token[0]) + if (';' == fdict->token[0]) { if (!link_advance(dict)) goto syntax_error; } return true; } - else if (0 == strcmp(dict->token, "#define")) + else if (0 == strcmp(fdict->token, "#define")) { if (!link_advance(dict)) goto syntax_error; - const char *name = strdupa(dict->token); + const char *name = strdupa(fdict->token); /* Get the value. */ if (!link_advance(dict)) goto syntax_error; - add_define(dict, name, dict->token); + add_define(dict, name, fdict->token); if (!link_advance(dict)) goto syntax_error; if (!is_equal(dict, ';')) @@ -1062,8 +1064,8 @@ static bool read_entry(Dictionary dict) /* Note: The following patches a dot in regexes appearing in * the affix file... It is corrected later. */ - patch_subscript(dict->token); - dn->string = string_set_add(dict->token, dict->string_set); + patch_subscript(fdict->token); + dn->string = string_set_add(fdict->token, dict->string_set); } /* Advance to next entry, unless error */ @@ -1076,11 +1078,9 @@ static bool read_entry(Dictionary dict) goto syntax_error; } - n = make_expression(dict); + Exp * n = make_expression(dict); if (n == NULL) - { goto syntax_error; - } if (!is_equal(dict, ';')) { @@ -1096,7 +1096,7 @@ static bool read_entry(Dictionary dict) /* At this point, dn points to a list of Dict_nodes connected by * their left pointers. These are to be inserted into the dictionary. */ - i = 0; + int i = 0; for (dnx = dn; dnx != NULL; dnx = dnx->left) { dnx->exp = n; @@ -1124,19 +1124,17 @@ static bool read_entry(Dictionary dict) bool read_dictionary(Dictionary dict) { if (!link_advance(dict)) - { return false; - } + /* The last character of a dictionary is NUL. * Note: At the end of reading a dictionary, dict->pin points to one * character after the input. Referring its [-1] element is safe even if * the dict file size is 0. */ - while ('\0' != dict->pin[-1]) + File_Dictionary fdict = dict->file_data; + while ('\0' != fdict->pin[-1]) { if (!read_entry(dict)) - { return false; - } } if (dict->category != NULL) From c971d8345a853bcf100206932ec99027b2cb77a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Wed, 27 Mar 2024 18:57:32 -0500 Subject: [PATCH 2/7] The file cursor can be made completely private. It could be passed as an argument, as needed. That would be a better design, actually. --- link-grammar/dict-file/dict-file.h | 27 -------------------------- link-grammar/dict-file/dictionary.c | 10 +--------- link-grammar/dict-file/read-dict.c | 30 +++++++++++++++++++++-------- link-grammar/dict-file/read-dict.h | 2 +- 4 files changed, 24 insertions(+), 45 deletions(-) delete mode 100644 link-grammar/dict-file/dict-file.h diff --git a/link-grammar/dict-file/dict-file.h b/link-grammar/dict-file/dict-file.h deleted file mode 100644 index 664af711a2..0000000000 --- a/link-grammar/dict-file/dict-file.h +++ /dev/null @@ -1,27 +0,0 @@ -/*************************************************************************/ -/* Copyright (c) 2004 */ -/* Daniel Sleator, David Temperley, and John Lafferty */ -/* Copyright (c) 2013, 2014 Linas Vepstas */ -/* All rights reserved */ -/* */ -/* Use of the link grammar parsing system is subject to the terms of the */ -/* license set forth in the LICENSE file included with this software. */ -/* This license allows free redistribution and use in source and binary */ -/* forms, with or without modification, subject to certain conditions. */ -/* */ -/*************************************************************************/ - -/* Private state, used only while file-backed dictionaries are being - * read. Invalid soon after read is complete. - */ -struct File_Dictionary_s -{ - const char * input; - const char * pin; - bool recursive_error; - bool is_special; - int already_got_it; /* For char, but needs to hold EOF */ - char token[MAX_TOKEN_LENGTH]; -}; - -typedef struct File_Dictionary_s * File_Dictionary; diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 920e2ec9fc..9feedee2ad 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -24,7 +24,6 @@ #include "dict-common/file-utils.h" #include "dict-common/idiom.h" #include "dict-common/regex-morph.h" -#include "dict-file/dict-file.h" #include "dict-ram/dict-ram.h" #include "post-process/pp_knowledge.h" #include "read-dialect.h" @@ -229,15 +228,8 @@ dictionary_six_str(const char * lang, /*align*/false, /*exact*/false); /* Read dictionary from the input string. */ - File_Dictionary fdict = malloc(sizeof(struct File_Dictionary_s)); - dict->file_data = fdict; - - fdict->input = input; - fdict->pin = fdict->input; - if (!read_dictionary(dict)) - { + if (!read_dictionary(dict, input)) goto failure; - } if (NULL == affix_name) { diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index 0301202b12..ecc3bd7496 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -21,7 +21,6 @@ #include "dict-common/dict-utils.h" // patch_subscript #include "dict-common/file-utils.h" #include "dict-common/idiom.h" -#include "dict-file/dict-file.h" #include "dict-ram/dict-ram.h" #include "error.h" #include "externs.h" @@ -122,6 +121,17 @@ automatically generated (currently only for idioms). */ +struct File_Dictionary_s +{ + const char * input; + const char * pin; + bool recursive_error; + bool is_special; + int already_got_it; /* For char, but needs to hold EOF */ + char token[MAX_TOKEN_LENGTH]; +}; +typedef struct File_Dictionary_s * File_Dictionary; + static bool link_advance(Dictionary dict); void dict_error2(Dictionary dict, const char * s, const char *s2) @@ -1006,15 +1016,12 @@ static bool read_entry(Dictionary dict) dict->name, dict->line_number-1, dict_name); goto syntax_error; } - fdict->input = instr; - fdict->pin = fdict->input; - /* The line number and dict name are used for error reporting */ - dict->line_number = 1; + /* The dict name are used for error reporting */ dict->name = dict_name; /* Now read the thing in. */ - rc = read_dictionary(dict); + rc = read_dictionary(dict, instr); dict->name = save_name; fdict->is_special = save_is_special; @@ -1121,8 +1128,16 @@ static bool read_entry(Dictionary dict) return false; } -bool read_dictionary(Dictionary dict) +bool read_dictionary(Dictionary dict, const char * input) { + File_Dictionary fdict = dict->file_data; + if (NULL == fdict) + fdict = malloc(sizeof(struct File_Dictionary_s)); + + fdict->input = input; + fdict->pin = fdict->input; + dict->line_number = 1; + if (!link_advance(dict)) return false; @@ -1130,7 +1145,6 @@ bool read_dictionary(Dictionary dict) * Note: At the end of reading a dictionary, dict->pin points to one * character after the input. Referring its [-1] element is safe even if * the dict file size is 0. */ - File_Dictionary fdict = dict->file_data; while ('\0' != fdict->pin[-1]) { if (!read_entry(dict)) diff --git a/link-grammar/dict-file/read-dict.h b/link-grammar/dict-file/read-dict.h index 82a79efad8..8299e9390a 100644 --- a/link-grammar/dict-file/read-dict.h +++ b/link-grammar/dict-file/read-dict.h @@ -19,7 +19,7 @@ Dictionary dictionary_six(const char *lang, const char *dict_name, const char *pp_name, const char *cons_name, const char *affix_name, const char *regex_name); Dictionary dictionary_create_from_file(const char *lang); -bool read_dictionary(Dictionary dict); +bool read_dictionary(Dictionary dict, const char *input); void dict_error2(Dictionary dict, const char *s, const char *s2); void insert_list(Dictionary dict, Dict_node * p, int l); From 7654d6c1a9b3fce0990962a36b4f986a0025d1b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Wed, 27 Mar 2024 19:01:09 -0500 Subject: [PATCH 3/7] Use a shorter name --- link-grammar/dict-file/read-dict.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index ecc3bd7496..a3a4210443 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -121,7 +121,7 @@ automatically generated (currently only for idioms). */ -struct File_Dictionary_s +struct File_Dict_s { const char * input; const char * pin; @@ -130,7 +130,7 @@ struct File_Dictionary_s int already_got_it; /* For char, but needs to hold EOF */ char token[MAX_TOKEN_LENGTH]; }; -typedef struct File_Dictionary_s * File_Dictionary; +typedef struct File_Dict_s * File_Dict; static bool link_advance(Dictionary dict); @@ -151,7 +151,7 @@ void dict_error2(Dictionary dict, const char * s, const char *s2) return; } - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; /* The link_advance used to print the error message can * throw more errors while printing... */ @@ -207,7 +207,7 @@ static void dict_error(Dictionary dict, const char * s) static void warning(Dictionary dict, const char * s) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; prt_error("Warning: %s\n" "\tline %d, current token = \"%s\"\n", s, dict->line_number, fdict->token); @@ -222,7 +222,7 @@ static void warning(Dictionary dict, const char * s) typedef char utf8char[MAXUTFLEN]; static bool get_character(Dictionary dict, int quote_mode, utf8char uc) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; int i = 0; while (1) @@ -304,7 +304,7 @@ static bool char_is_special(char c) NO_SAN_DICT static bool link_advance(Dictionary dict) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; bool quote_mode = false; fdict->is_special = false; @@ -409,7 +409,7 @@ static bool link_advance(Dictionary dict) */ static int is_equal(Dictionary dict, char c) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; return (fdict->is_special && c == fdict->token[0] && fdict->token[1] == '\0'); @@ -471,7 +471,7 @@ static bool check_connector(Dictionary dict, const char * s) */ static Exp * make_dir_connector(Dictionary dict, int i) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; char *constring; bool multi = false; @@ -520,7 +520,7 @@ static unsigned int exptag_macro_add(Dictionary dict, const char *tag) */ static Exp * make_connector(Dictionary dict) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; Exp * n; int i = strlen(fdict->token) - 1; /* this must be +, - or $ if a connector */ @@ -620,7 +620,7 @@ static bool is_number(const char * str) */ static Exp *make_expression(Dictionary dict) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; Exp *nl = NULL; Exp *e_head = NULL; Exp *e_tail = NULL; /* last part of the expression */ @@ -956,7 +956,7 @@ void insert_list(Dictionary dict, Dict_node * p, int l) */ static bool read_entry(Dictionary dict) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; Dict_node *dnx, *dn = NULL; while (!is_equal(dict, ':')) @@ -1130,9 +1130,9 @@ static bool read_entry(Dictionary dict) bool read_dictionary(Dictionary dict, const char * input) { - File_Dictionary fdict = dict->file_data; + File_Dict fdict = dict->file_data; if (NULL == fdict) - fdict = malloc(sizeof(struct File_Dictionary_s)); + fdict = malloc(sizeof(struct File_Dict_s)); fdict->input = input; fdict->pin = fdict->input; From 4976529c2444de443d5c41b40a6051a230870235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Wed, 27 Mar 2024 19:57:06 -0500 Subject: [PATCH 4/7] Convert file cursor into a real cursor --- link-grammar/dict-common/dict-common.h | 1 - link-grammar/dict-file/read-dict.c | 215 ++++++++++++------------- link-grammar/dict-file/read-dict.h | 1 - link-grammar/dict-ram/dict-ram.c | 17 +- 4 files changed, 112 insertions(+), 122 deletions(-) diff --git a/link-grammar/dict-common/dict-common.h b/link-grammar/dict-common/dict-common.h index 502a064df6..4f4bba989a 100644 --- a/link-grammar/dict-common/dict-common.h +++ b/link-grammar/dict-common/dict-common.h @@ -177,7 +177,6 @@ struct Dictionary_s #ifdef HAVE_ATOMESE void * as_server; /* cogserver connection */ #endif - void * file_data; /* file-back dictionary */ void (*insert_entry)(Dictionary, Dict_node *, int); diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index a3a4210443..d6a70619e6 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -123,6 +123,7 @@ struct File_Dict_s { + Dictionary dict; const char * input; const char * pin; bool recursive_error; @@ -132,32 +133,16 @@ struct File_Dict_s }; typedef struct File_Dict_s * File_Dict; -static bool link_advance(Dictionary dict); +static bool link_advance(File_Dict); -void dict_error2(Dictionary dict, const char * s, const char *s2) +static void dict_error2(File_Dict fdict, const char * s, const char *s2) { - if (IS_DYNAMIC_DICT(dict)) - { - if (s2) - { - prt_error("Error: While handling storage-node\n \"%s\":\n" - "%s \"%s\"\n", dict->name, s, s2); - } - else - { - prt_error("Error: While handling storage-node\n \"%s\":\n" - "%s\n", dict->name, s); - } - return; - } - - File_Dict fdict = dict->file_data; - /* The link_advance used to print the error message can * throw more errors while printing... */ if (fdict->recursive_error) return; fdict->recursive_error = true; + Dictionary dict = fdict->dict; char token[MAX_TOKEN_LENGTH]; strcpy(token, fdict->token); bool save_is_special = fdict->is_special; @@ -174,7 +159,7 @@ void dict_error2(Dictionary dict, const char * s, const char *s2) { pos += snprintf(t, ERRBUFLEN, "\"%s\" ", fdict->token); strncat(tokens, t, ERRBUFLEN-1-pos); - if (!link_advance(dict)) break; + if (!link_advance(fdict)) break; } tokens[pos] = '\0'; @@ -200,17 +185,16 @@ void dict_error2(Dictionary dict, const char * s, const char *s2) fdict->recursive_error = false; } -static void dict_error(Dictionary dict, const char * s) +static void dict_error(File_Dict fdict, const char * s) { - dict_error2(dict, s, NULL); + dict_error2(fdict, s, NULL); } -static void warning(Dictionary dict, const char * s) +static void warning(File_Dict fdict, const char * s) { - File_Dict fdict = dict->file_data; prt_error("Warning: %s\n" "\tline %d, current token = \"%s\"\n", - s, dict->line_number, fdict->token); + s, fdict->dict->line_number, fdict->token); } /** @@ -220,9 +204,9 @@ static void warning(Dictionary dict, const char * s) */ #define MAXUTFLEN 7 typedef char utf8char[MAXUTFLEN]; -static bool get_character(Dictionary dict, int quote_mode, utf8char uc) +static bool get_character(File_Dict fdict, int quote_mode, utf8char uc) { - File_Dict fdict = dict->file_data; + Dictionary dict = fdict->dict; int i = 0; while (1) @@ -265,7 +249,7 @@ static bool get_character(Dictionary dict, int quote_mode, utf8char uc) uc[i] = c; i++; } - dict_error(dict, "UTF8 char is too long."); + dict_error(fdict, "UTF8 char is too long."); return false; } uc[0] = 0x0; @@ -302,11 +286,9 @@ static bool char_is_special(char c) * Return 1 if a character was read, else return 0 (and print a warning). */ NO_SAN_DICT -static bool link_advance(Dictionary dict) +static bool link_advance(File_Dict fdict) { - File_Dict fdict = dict->file_data; bool quote_mode = false; - fdict->is_special = false; if (fdict->already_got_it != '\0') @@ -325,7 +307,7 @@ static bool link_advance(Dictionary dict) utf8char c; do { - bool ok = get_character(dict, false, c); + bool ok = get_character(fdict, false, c); if (!ok) return false; } while (lg_isspace((unsigned char)c[0])); @@ -334,7 +316,7 @@ static bool link_advance(Dictionary dict) for (;;) { if (i > MAX_TOKEN_LENGTH-3) { - dict_error(dict, "Token too long."); + dict_error(fdict, "Token too long."); return false; } @@ -361,7 +343,7 @@ static bool link_advance(Dictionary dict) if (c[0] == '\0') { - dict_error(dict, "EOF while reading quoted token."); + dict_error(fdict, "EOF while reading quoted token."); return false; } @@ -398,7 +380,7 @@ static bool link_advance(Dictionary dict) while (c[nr]) {fdict->token[i] = c[nr]; i++; nr++; } } } - bool ok = get_character(dict, quote_mode, c); + bool ok = get_character(fdict, quote_mode, c); if (!ok) return false; } /* unreachable */ @@ -407,9 +389,8 @@ static bool link_advance(Dictionary dict) /** * Returns true if this token is a special token and it is equal to c */ -static int is_equal(Dictionary dict, char c) +static int is_equal(File_Dict fdict, char c) { - File_Dict fdict = dict->file_data; return (fdict->is_special && c == fdict->token[0] && fdict->token[1] == '\0'); @@ -420,30 +401,30 @@ static int is_equal(Dictionary dict, char c) * Return true if the connector is valid, else return false, * and print an appropriate warning message. */ -static bool check_connector(Dictionary dict, const char * s) +static bool check_connector(File_Dict fdict, const char * s) { int i; i = strlen(s); if (i < 1) { - dict_error(dict, "Expecting a connector."); + dict_error(fdict, "Expecting a connector."); return false; } i = s[i-1]; /* the last character of the token */ if ((i != '+') && (i != '-') && (i != ANY_DIR)) { - dict_error(dict, "A connector must end in a \"+\", \"-\" or \"$\"."); + dict_error(fdict, "A connector must end in a \"+\", \"-\" or \"$\"."); return false; } if (*s == '@') s++; if (('h' == *s) || ('d' == *s)) s++; if (!is_connector_name_char(*s)) { - dict_error2(dict, "Invalid character in connector " + dict_error2(fdict, "Invalid character in connector " "(connectors must start with an uppercase letter " "after an optional \"h\" or \"d\"):", (char[]){*s, '\0'}); return false; } if (*s == '_') { - dict_error(dict, "Invalid character in connector " + dict_error(fdict, "Invalid character in connector " "(an initial \"_\" is reserved for internal use)."); return false; } @@ -452,7 +433,7 @@ static bool check_connector(Dictionary dict, const char * s) do { s++; } while (is_connector_name_char(*s)); while (s[1]) { if (!is_connector_subscript_char(*s) && (*s != WILD_TYPE)) { - dict_error2(dict, "Invalid character in connector subscript " + dict_error2(fdict, "Invalid character in connector subscript " "(only lowercase letters, digits, and \"*\" are allowed):", (char[]){*s, '\0'}); return false; @@ -469,9 +450,8 @@ static bool check_connector(Dictionary dict, const char * s) * * Assumes the current token is the connector. */ -static Exp * make_dir_connector(Dictionary dict, int i) +static Exp * make_dir_connector(Dictionary dict, File_Dict fdict, int i) { - File_Dict fdict = dict->file_data; char *constring; bool multi = false; @@ -518,9 +498,9 @@ static unsigned int exptag_macro_add(Dictionary dict, const char *tag) * * Assumes the current token is a connector or dictionary word. */ -static Exp * make_connector(Dictionary dict) +static Exp * make_connector(File_Dict fdict) { - File_Dict fdict = dict->file_data; + Dictionary dict = fdict->dict; Exp * n; int i = strlen(fdict->token) - 1; /* this must be +, - or $ if a connector */ @@ -533,7 +513,7 @@ static Exp * make_connector(Dictionary dict) Dict_node * dn = strict_lookup_list(dict, fdict->token); if (dn == NULL) { - dict_error2(dict, "Perhaps missing + or - in a connector.\n" + dict_error2(fdict, "Perhaps missing + or - in a connector.\n" "Or perhaps you forgot the subscript on a word.\n" "Or perhaps the word is used before it is defined:", fdict->token); @@ -542,7 +522,7 @@ static Exp * make_connector(Dictionary dict) if (dn->right != NULL) { dict_node_free_list(dn); - dict_error2(dict, "Referencing a duplicate word:", fdict->token); + dict_error2(fdict, "Referencing a duplicate word:", fdict->token); /* Note: A word which becomes duplicate latter evades this check. */ return NULL; } @@ -557,14 +537,14 @@ static Exp * make_connector(Dictionary dict) else { /* If we are here, token is a connector */ - if (!check_connector(dict, fdict->token)) + if (!check_connector(fdict, fdict->token)) { return NULL; } if ((fdict->token[i] == '+') || (fdict->token[i] == '-')) { /* A simple, unidirectional connector. Just make that. */ - n = make_dir_connector(dict, i); + n = make_dir_connector(dict, fdict, i); if (NULL == n) return NULL; } else if (fdict->token[i] == ANY_DIR) @@ -573,22 +553,22 @@ static Exp * make_connector(Dictionary dict) /* If we are here, then it's a bi-directional connector. * Make both a + and a - version, and or them together. */ fdict->token[i] = '+'; - plu = make_dir_connector(dict, i); + plu = make_dir_connector(dict, fdict, i); if (NULL == plu) return NULL; fdict->token[i] = '-'; - min = make_dir_connector(dict, i); + min = make_dir_connector(dict, fdict, i); if (NULL == min) return NULL; n = make_or_node(dict->Exp_pool, plu, min); } else { - dict_error(dict, "Unknown connector direction type."); + dict_error(fdict, "Unknown connector direction type."); return NULL; } } - if (!link_advance(dict)) + if (!link_advance(fdict)) { free(n); return NULL; @@ -618,9 +598,10 @@ static bool is_number(const char * str) * with the current token. At the end, the token is the first one not * part of this expression. */ -static Exp *make_expression(Dictionary dict) +static Exp *make_expression(File_Dict fdict) { - File_Dict fdict = dict->file_data; + Dictionary dict = fdict->dict; + Exp *nl = NULL; Exp *e_head = NULL; Exp *e_tail = NULL; /* last part of the expression */ @@ -628,55 +609,55 @@ static Exp *make_expression(Dictionary dict) while (true) { - if (is_equal(dict, '(')) + if (is_equal(fdict, '(')) { - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } - nl = make_expression(dict); + nl = make_expression(fdict); if (nl == NULL) { return NULL; } - if (!is_equal(dict, ')')) { - dict_error(dict, "Expecting a \")\"."); + if (!is_equal(fdict, ')')) { + dict_error(fdict, "Expecting a \")\"."); return NULL; } - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } } - else if (is_equal(dict, '{')) + else if (is_equal(fdict, '{')) { - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } - nl = make_expression(dict); + nl = make_expression(fdict); if (nl == NULL) { return NULL; } - if (!is_equal(dict, '}')) { - dict_error(dict, "Expecting a \"}\"."); + if (!is_equal(fdict, '}')) { + dict_error(fdict, "Expecting a \"}\"."); return NULL; } - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } nl = make_optional_node(dict->Exp_pool, nl); } - else if (is_equal(dict, '[')) + else if (is_equal(fdict, '[')) { - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } - nl = make_expression(dict); + nl = make_expression(fdict); if (nl == NULL) { return NULL; } - if (!is_equal(dict, ']')) { - dict_error(dict, "Expecting a \"]\"."); + if (!is_equal(fdict, ']')) { + dict_error(fdict, "Expecting a \"]\"."); return NULL; } - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } @@ -696,10 +677,10 @@ static Exp *make_expression(Dictionary dict) } else { - warning(dict, "Invalid cost (using 1.0)\n"); + warning(fdict, "Invalid cost (using 1.0)\n"); nl->cost += 1.0F; } - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } } @@ -711,7 +692,7 @@ static Exp *make_expression(Dictionary dict) if (bad != NULL) { char badchar[] = { *bad, '\0' }; - dict_error2(dict, "Invalid character in dialect tag name:", + dict_error2(fdict, "Invalid character in dialect tag name:", badchar); return NULL; } @@ -721,7 +702,7 @@ static Exp *make_expression(Dictionary dict) } nl->tag_id = exptag_dialect_add(dict, fdict->token); nl->tag_type = Exptag_dialect; - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } } @@ -732,19 +713,19 @@ static Exp *make_expression(Dictionary dict) } else if (!fdict->is_special) { - nl = make_connector(dict); + nl = make_connector(fdict); if (nl == NULL) { return NULL; } } - else if (is_equal(dict, ')') || is_equal(dict, ']')) + else if (is_equal(fdict, ')') || is_equal(fdict, ']')) { /* allows "()" or "[]" */ nl = make_zeroary_node(dict->Exp_pool); } else { - dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected."); + dict_error(fdict, "Connector, \"(\", \"[\", or \"{\" expected."); return NULL; } @@ -777,17 +758,17 @@ static Exp *make_expression(Dictionary dict) Exp_type op; /* Non-commuting AND */ - if (is_equal(dict, '&') || (strcmp(fdict->token, "and") == 0)) + if (is_equal(fdict, '&') || (strcmp(fdict->token, "and") == 0)) { op = AND_type; } /* Commuting OR */ - else if (is_equal(dict, '|') || (strcmp(fdict->token, "or") == 0)) + else if (is_equal(fdict, '|') || (strcmp(fdict->token, "or") == 0)) { op = OR_type; } /* Commuting AND */ - else if (is_equal(dict, SYM_AND) || (strcmp(fdict->token, "sym") == 0)) + else if (is_equal(fdict, SYM_AND) || (strcmp(fdict->token, "sym") == 0)) { /* Part 1/2 of SYM_AND processing */ op = AND_type; /* allow mixing with ordinary ands at the same level */ @@ -810,12 +791,12 @@ static Exp *make_expression(Dictionary dict) { if (e_head->type != op) { - dict_error(dict, "\"and\" and \"or\" at the same level in an expression."); + dict_error(fdict, "\"and\" and \"or\" at the same level in an expression."); return NULL; } } - if (!link_advance(dict)) { + if (!link_advance(fdict)) { return NULL; } @@ -954,16 +935,15 @@ void insert_list(Dictionary dict, Dict_node * p, int l) * and is terminated by a semi-colon. * Add these words to the dictionary. */ -static bool read_entry(Dictionary dict) +static bool read_entry(File_Dict fdict) { - File_Dict fdict = dict->file_data; Dict_node *dnx, *dn = NULL; - while (!is_equal(dict, ':')) + while (!is_equal(fdict, ':')) { if (fdict->is_special) { - dict_error(dict, "I expected a word but didn\'t get it."); + dict_error(fdict, "I expected a word but didn\'t get it."); goto syntax_error; } @@ -974,7 +954,7 @@ static bool read_entry(Dictionary dict) if ((fdict->token[0] == '/') && (fdict->token[1] != '.') && (get_affix_regex_cg(fdict->token) < 0)) { - Dict_node *new_dn = read_word_file(dict, dn, fdict->token); + Dict_node *new_dn = read_word_file(fdict->dict, dn, fdict->token); if (new_dn == NULL) { prt_error("Error: Cannot open word file \"%s\".\n", fdict->token); @@ -996,11 +976,13 @@ static bool read_entry(Dictionary dict) int save_line_number; size_t skip_slash; - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fdict)) goto syntax_error; + + Dictionary dict = fdict->dict; skip_slash = ('/' == fdict->token[0]) ? 1 : 0; dict_name = strdupa(fdict->token); - save_name = dict->name; + save_name = fdict->dict->name; save_is_special = fdict->is_special; save_input = fdict->input; save_pin = fdict->pin; @@ -1034,29 +1016,29 @@ static bool read_entry(Dictionary dict) if (!rc) goto syntax_error; /* when we return, point to the next entry */ - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fdict)) goto syntax_error; /* If a semicolon follows the include, that's OK... ignore it. */ if (';' == fdict->token[0]) { - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fdict)) goto syntax_error; } return true; } else if (0 == strcmp(fdict->token, "#define")) { - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fdict)) goto syntax_error; const char *name = strdupa(fdict->token); /* Get the value. */ - if (!link_advance(dict)) goto syntax_error; - add_define(dict, name, fdict->token); + if (!link_advance(fdict)) goto syntax_error; + add_define(fdict->dict, name, fdict->token); - if (!link_advance(dict)) goto syntax_error; - if (!is_equal(dict, ';')) + if (!link_advance(fdict)) goto syntax_error; + if (!is_equal(fdict, ';')) { - dict_error(dict, "Expecting \";\" at the end of #define."); + dict_error(fdict, "Expecting \";\" at the end of #define."); goto syntax_error; } } @@ -1072,32 +1054,32 @@ static bool read_entry(Dictionary dict) /* Note: The following patches a dot in regexes appearing in * the affix file... It is corrected later. */ patch_subscript(fdict->token); - dn->string = string_set_add(fdict->token, dict->string_set); + dn->string = string_set_add(fdict->token, fdict->dict->string_set); } /* Advance to next entry, unless error */ - if (!link_advance(dict)) goto syntax_error; + if (!link_advance(fdict)) goto syntax_error; } /* pass the : */ - if (!link_advance(dict)) + if (!link_advance(fdict)) { goto syntax_error; } - Exp * n = make_expression(dict); + Exp * n = make_expression(fdict); if (n == NULL) goto syntax_error; - if (!is_equal(dict, ';')) + if (!is_equal(fdict, ';')) { - dict_error(dict, "Expecting \";\" at the end of an entry."); + dict_error(fdict, "Expecting \";\" at the end of an entry."); goto syntax_error; } if (dn == NULL) { - dict_error(dict, "Expecting a token before \":\"."); + dict_error(fdict, "Expecting a token before \":\"."); goto syntax_error; } @@ -1109,13 +1091,15 @@ static bool read_entry(Dictionary dict) dnx->exp = n; i++; } + + Dictionary dict = fdict->dict; if (IS_GENERATION(dict)) add_category(dict, n, dn, i); dict->insert_entry(dict, dn, i); /* pass the ; */ - if (!link_advance(dict)) + if (!link_advance(fdict)) { /* Avoid freeing dn, since it is already inserted into the dict. */ return false; @@ -1130,15 +1114,14 @@ static bool read_entry(Dictionary dict) bool read_dictionary(Dictionary dict, const char * input) { - File_Dict fdict = dict->file_data; - if (NULL == fdict) - fdict = malloc(sizeof(struct File_Dict_s)); + File_Dict fdict = alloca(sizeof(struct File_Dict_s)); + dict->line_number = 1; + fdict->dict = dict; fdict->input = input; fdict->pin = fdict->input; - dict->line_number = 1; - if (!link_advance(dict)) + if (!link_advance(fdict)) return false; /* The last character of a dictionary is NUL. @@ -1147,7 +1130,7 @@ bool read_dictionary(Dictionary dict, const char * input) * the dict file size is 0. */ while ('\0' != fdict->pin[-1]) { - if (!read_entry(dict)) + if (!read_entry(fdict)) return false; } diff --git a/link-grammar/dict-file/read-dict.h b/link-grammar/dict-file/read-dict.h index 8299e9390a..03e98aed1d 100644 --- a/link-grammar/dict-file/read-dict.h +++ b/link-grammar/dict-file/read-dict.h @@ -20,7 +20,6 @@ Dictionary dictionary_six(const char *lang, const char *dict_name, const char *affix_name, const char *regex_name); Dictionary dictionary_create_from_file(const char *lang); bool read_dictionary(Dictionary dict, const char *input); -void dict_error2(Dictionary dict, const char *s, const char *s2); void insert_list(Dictionary dict, Dict_node * p, int l); void free_insert_list(Dict_node *ilist); diff --git a/link-grammar/dict-ram/dict-ram.c b/link-grammar/dict-ram/dict-ram.c index eed2d0aa96..48e0bd5087 100644 --- a/link-grammar/dict-ram/dict-ram.c +++ b/link-grammar/dict-ram/dict-ram.c @@ -17,7 +17,6 @@ #include "dict-common/dict-internals.h" #include "dict-common/dict-utils.h" // patch_subscript #include "dict-common/idiom.h" -#include "dict-file/read-dict.h" // dict_error2 #include "string-id.h" #include "string-set.h" @@ -591,7 +590,6 @@ static int dup_word_status(Dictionary dict, const Dict_node *newnode) static bool dup_word_error(Dictionary dict, Dict_node *newnode) { - if (dup_word_status(dict, newnode) == 1) return false; if (dict->allow_duplicate_words == 0) @@ -606,8 +604,19 @@ static bool dup_word_error(Dictionary dict, Dict_node *newnode) if (dup_word_status(dict, newnode) == 1) return false; } - dict_error2(dict, "Ignoring word which has been multiply defined:", - newnode->string); + + if (IS_DYNAMIC_DICT(dict)) + { + prt_error("Error: While handling storage-node\n \"%s\":\n" + "Ignoring word which has been multiply defined: \"%s\"\n", + dict->name, newnode->string); + } else { + // File-backed dictionary sets a line number, for debugging. + prt_error("Error: While parsing dictionary \"%s\":\n" + "Ignoring word which has been multiply defined: \"%s\"\n" + "\t Line %d\n", + dict->name, newnode->string, dict->line_number); + } /* Too late to skip insertion - insert it with a null expression. */ newnode->exp = make_zeroary_node(dict->Exp_pool); From bd414a00657db42d054c9ccc02df90060674be39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Wed, 27 Mar 2024 19:59:25 -0500 Subject: [PATCH 5/7] Change name of file cursor data struct If it is a cursor, should just call it a cursor --- link-grammar/dict-file/read-dict.c | 370 ++++++++++++++--------------- 1 file changed, 185 insertions(+), 185 deletions(-) diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index d6a70619e6..f9e73980ef 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -121,7 +121,7 @@ automatically generated (currently only for idioms). */ -struct File_Dict_s +struct FileCursor_s { Dictionary dict; const char * input; @@ -131,43 +131,43 @@ struct File_Dict_s int already_got_it; /* For char, but needs to hold EOF */ char token[MAX_TOKEN_LENGTH]; }; -typedef struct File_Dict_s * File_Dict; +typedef struct FileCursor_s * FileCursor; -static bool link_advance(File_Dict); +static bool link_advance(FileCursor); -static void dict_error2(File_Dict fdict, const char * s, const char *s2) +static void dict_error2(FileCursor fcurs, const char * s, const char *s2) { /* The link_advance used to print the error message can * throw more errors while printing... */ - if (fdict->recursive_error) return; - fdict->recursive_error = true; + if (fcurs->recursive_error) return; + fcurs->recursive_error = true; - Dictionary dict = fdict->dict; + Dictionary dict = fcurs->dict; char token[MAX_TOKEN_LENGTH]; - strcpy(token, fdict->token); - bool save_is_special = fdict->is_special; - const char * save_input = fdict->input; - const char * save_pin = fdict->pin; - int save_already_got_it = fdict->already_got_it; + strcpy(token, fcurs->token); + bool save_is_special = fcurs->is_special; + const char * save_input = fcurs->input; + const char * save_pin = fcurs->pin; + int save_already_got_it = fcurs->already_got_it; int save_line_number = dict->line_number; #define ERRBUFLEN 1024 char tokens[ERRBUFLEN], t[ERRBUFLEN]; int pos = 1; tokens[0] = '\0'; - for (int i=0; i<5 && fdict->token[0] != '\0'; i++) + for (int i=0; i<5 && fcurs->token[0] != '\0'; i++) { - pos += snprintf(t, ERRBUFLEN, "\"%s\" ", fdict->token); + pos += snprintf(t, ERRBUFLEN, "\"%s\" ", fcurs->token); strncat(tokens, t, ERRBUFLEN-1-pos); - if (!link_advance(fdict)) break; + if (!link_advance(fcurs)) break; } tokens[pos] = '\0'; - strcpy(fdict->token, token); - fdict->is_special = save_is_special; - fdict->input = save_input; - fdict->pin = save_pin; - fdict->already_got_it = save_already_got_it; + strcpy(fcurs->token, token); + fcurs->is_special = save_is_special; + fcurs->input = save_input; + fcurs->pin = save_pin; + fcurs->already_got_it = save_already_got_it; dict->line_number = save_line_number; if (s2) @@ -182,19 +182,19 @@ static void dict_error2(File_Dict fdict, const char * s, const char *s2) "%s\n\t Line %d, next tokens: %s\n", dict->name, s, dict->line_number, tokens); } - fdict->recursive_error = false; + fcurs->recursive_error = false; } -static void dict_error(File_Dict fdict, const char * s) +static void dict_error(FileCursor fcurs, const char * s) { - dict_error2(fdict, s, NULL); + dict_error2(fcurs, s, NULL); } -static void warning(File_Dict fdict, const char * s) +static void warning(FileCursor fcurs, const char * s) { prt_error("Warning: %s\n" "\tline %d, current token = \"%s\"\n", - s, fdict->dict->line_number, fdict->token); + s, fcurs->dict->line_number, fcurs->token); } /** @@ -204,19 +204,19 @@ static void warning(File_Dict fdict, const char * s) */ #define MAXUTFLEN 7 typedef char utf8char[MAXUTFLEN]; -static bool get_character(File_Dict fdict, int quote_mode, utf8char uc) +static bool get_character(FileCursor fcurs, int quote_mode, utf8char uc) { - Dictionary dict = fdict->dict; + Dictionary dict = fcurs->dict; int i = 0; while (1) { - char c = *(fdict->pin++); + char c = *(fcurs->pin++); /* Skip over all comments */ if ((c == '%') && (!quote_mode)) { - while ((c != 0x0) && (c != '\n')) c = *(fdict->pin++); + while ((c != 0x0) && (c != '\n')) c = *(fcurs->pin++); if (c == 0x0) break; dict->line_number++; continue; @@ -238,18 +238,18 @@ static bool get_character(File_Dict fdict, int quote_mode, utf8char uc) i = 1; while (i < MAXUTFLEN-1) { - c = *(fdict->pin++); + c = *(fcurs->pin++); /* If we're onto the next char, we're done. */ if (((c & 0x80) == 0x0) || ((c & 0xc0) == 0xc0)) { - fdict->pin--; + fcurs->pin--; uc[i] = 0x0; return true; } uc[i] = c; i++; } - dict_error(fdict, "UTF8 char is too long."); + dict_error(fcurs, "UTF8 char is too long."); return false; } uc[0] = 0x0; @@ -286,28 +286,28 @@ static bool char_is_special(char c) * Return 1 if a character was read, else return 0 (and print a warning). */ NO_SAN_DICT -static bool link_advance(File_Dict fdict) +static bool link_advance(FileCursor fcurs) { bool quote_mode = false; - fdict->is_special = false; + fcurs->is_special = false; - if (fdict->already_got_it != '\0') + if (fcurs->already_got_it != '\0') { - fdict->is_special = char_is_special(fdict->already_got_it); - if (fdict->already_got_it == EOF) { - fdict->token[0] = '\0'; + fcurs->is_special = char_is_special(fcurs->already_got_it); + if (fcurs->already_got_it == EOF) { + fcurs->token[0] = '\0'; } else { - fdict->token[0] = (char)fdict->already_got_it; /* specials are one byte */ - fdict->token[1] = '\0'; + fcurs->token[0] = (char)fcurs->already_got_it; /* specials are one byte */ + fcurs->token[1] = '\0'; } - fdict->already_got_it = '\0'; + fcurs->already_got_it = '\0'; return true; } utf8char c; do { - bool ok = get_character(fdict, false, c); + bool ok = get_character(fcurs, false, c); if (!ok) return false; } while (lg_isspace((unsigned char)c[0])); @@ -316,7 +316,7 @@ static bool link_advance(File_Dict fdict) for (;;) { if (i > MAX_TOKEN_LENGTH-3) { - dict_error(fdict, "Token too long."); + dict_error(fcurs, "Token too long."); return false; } @@ -334,53 +334,53 @@ static bool link_advance(File_Dict fdict) if (quote_mode) { if (c[0] == '"' && /* Check the next character too, to allow " in words */ - (*fdict->pin == ':' || *fdict->pin == ';' || - lg_isspace((unsigned char)*fdict->pin))) { + (*fcurs->pin == ':' || *fcurs->pin == ';' || + lg_isspace((unsigned char)*fcurs->pin))) { - fdict->token[i] = '\0'; + fcurs->token[i] = '\0'; return true; } if (c[0] == '\0') { - dict_error(fdict, "EOF while reading quoted token."); + dict_error(fcurs, "EOF while reading quoted token."); return false; } /* Copy all of the UTF8 bytes. */ int nr = 0; - while (c[nr]) {fdict->token[i] = c[nr]; i++; nr++; } + while (c[nr]) {fcurs->token[i] = c[nr]; i++; nr++; } } else { if ('\0' == c[1] && char_is_special(c[0])) { if (i == 0) { - fdict->token[0] = c[0]; /* special toks are one char always */ - fdict->token[1] = '\0'; - fdict->is_special = true; + fcurs->token[0] = c[0]; /* special toks are one char always */ + fcurs->token[1] = '\0'; + fcurs->is_special = true; return true; } - fdict->token[i] = '\0'; - fdict->already_got_it = c[0]; + fcurs->token[i] = '\0'; + fcurs->already_got_it = c[0]; return true; } if (c[0] == 0x0) { - if (i != 0) fdict->already_got_it = '\0'; - fdict->token[0] = '\0'; + if (i != 0) fcurs->already_got_it = '\0'; + fcurs->token[0] = '\0'; return true; } if (lg_isspace((unsigned char)c[0])) { - fdict->token[i] = '\0'; + fcurs->token[i] = '\0'; return true; } if (c[0] == '\"') { quote_mode = true; } else { int nr = 0; - while (c[nr]) {fdict->token[i] = c[nr]; i++; nr++; } + while (c[nr]) {fcurs->token[i] = c[nr]; i++; nr++; } } } - bool ok = get_character(fdict, quote_mode, c); + bool ok = get_character(fcurs, quote_mode, c); if (!ok) return false; } /* unreachable */ @@ -389,11 +389,11 @@ static bool link_advance(File_Dict fdict) /** * Returns true if this token is a special token and it is equal to c */ -static int is_equal(File_Dict fdict, char c) +static int is_equal(FileCursor fcurs, char c) { - return (fdict->is_special && - c == fdict->token[0] && - fdict->token[1] == '\0'); + return (fcurs->is_special && + c == fcurs->token[0] && + fcurs->token[1] == '\0'); } /** @@ -401,30 +401,30 @@ static int is_equal(File_Dict fdict, char c) * Return true if the connector is valid, else return false, * and print an appropriate warning message. */ -static bool check_connector(File_Dict fdict, const char * s) +static bool check_connector(FileCursor fcurs, const char * s) { int i; i = strlen(s); if (i < 1) { - dict_error(fdict, "Expecting a connector."); + dict_error(fcurs, "Expecting a connector."); return false; } i = s[i-1]; /* the last character of the token */ if ((i != '+') && (i != '-') && (i != ANY_DIR)) { - dict_error(fdict, "A connector must end in a \"+\", \"-\" or \"$\"."); + dict_error(fcurs, "A connector must end in a \"+\", \"-\" or \"$\"."); return false; } if (*s == '@') s++; if (('h' == *s) || ('d' == *s)) s++; if (!is_connector_name_char(*s)) { - dict_error2(fdict, "Invalid character in connector " + dict_error2(fcurs, "Invalid character in connector " "(connectors must start with an uppercase letter " "after an optional \"h\" or \"d\"):", (char[]){*s, '\0'}); return false; } if (*s == '_') { - dict_error(fdict, "Invalid character in connector " + dict_error(fcurs, "Invalid character in connector " "(an initial \"_\" is reserved for internal use)."); return false; } @@ -433,7 +433,7 @@ static bool check_connector(File_Dict fdict, const char * s) do { s++; } while (is_connector_name_char(*s)); while (s[1]) { if (!is_connector_subscript_char(*s) && (*s != WILD_TYPE)) { - dict_error2(fdict, "Invalid character in connector subscript " + dict_error2(fcurs, "Invalid character in connector subscript " "(only lowercase letters, digits, and \"*\" are allowed):", (char[]){*s, '\0'}); return false; @@ -450,20 +450,20 @@ static bool check_connector(File_Dict fdict, const char * s) * * Assumes the current token is the connector. */ -static Exp * make_dir_connector(Dictionary dict, File_Dict fdict, int i) +static Exp * make_dir_connector(Dictionary dict, FileCursor fcurs, int i) { char *constring; bool multi = false; - char dir = fdict->token[i]; - fdict->token[i] = '\0'; /* get rid of the + or - */ - if (fdict->token[0] == '@') + char dir = fcurs->token[i]; + fcurs->token[i] = '\0'; /* get rid of the + or - */ + if (fcurs->token[0] == '@') { - constring = fdict->token+1; + constring = fcurs->token+1; multi = true; } else - constring = fdict->token; + constring = fcurs->token; return make_connector_node(dict, dict->Exp_pool, constring, dir, multi); @@ -498,31 +498,31 @@ static unsigned int exptag_macro_add(Dictionary dict, const char *tag) * * Assumes the current token is a connector or dictionary word. */ -static Exp * make_connector(File_Dict fdict) +static Exp * make_connector(FileCursor fcurs) { - Dictionary dict = fdict->dict; + Dictionary dict = fcurs->dict; Exp * n; - int i = strlen(fdict->token) - 1; /* this must be +, - or $ if a connector */ - if ((fdict->token[i] != '+') && - (fdict->token[i] != '-') && - (fdict->token[i] != ANY_DIR)) + int i = strlen(fcurs->token) - 1; /* this must be +, - or $ if a connector */ + if ((fcurs->token[i] != '+') && + (fcurs->token[i] != '-') && + (fcurs->token[i] != ANY_DIR)) { /* If we are here, token is a word */ - patch_subscript(fdict->token); - Dict_node * dn = strict_lookup_list(dict, fdict->token); + patch_subscript(fcurs->token); + Dict_node * dn = strict_lookup_list(dict, fcurs->token); if (dn == NULL) { - dict_error2(fdict, "Perhaps missing + or - in a connector.\n" + dict_error2(fcurs, "Perhaps missing + or - in a connector.\n" "Or perhaps you forgot the subscript on a word.\n" "Or perhaps the word is used before it is defined:", - fdict->token); + fcurs->token); return NULL; } if (dn->right != NULL) { dict_node_free_list(dn); - dict_error2(fdict, "Referencing a duplicate word:", fdict->token); + dict_error2(fcurs, "Referencing a duplicate word:", fcurs->token); /* Note: A word which becomes duplicate latter evades this check. */ return NULL; } @@ -537,38 +537,38 @@ static Exp * make_connector(File_Dict fdict) else { /* If we are here, token is a connector */ - if (!check_connector(fdict, fdict->token)) + if (!check_connector(fcurs, fcurs->token)) { return NULL; } - if ((fdict->token[i] == '+') || (fdict->token[i] == '-')) + if ((fcurs->token[i] == '+') || (fcurs->token[i] == '-')) { /* A simple, unidirectional connector. Just make that. */ - n = make_dir_connector(dict, fdict, i); + n = make_dir_connector(dict, fcurs, i); if (NULL == n) return NULL; } - else if (fdict->token[i] == ANY_DIR) + else if (fcurs->token[i] == ANY_DIR) { Exp *plu, *min; /* If we are here, then it's a bi-directional connector. * Make both a + and a - version, and or them together. */ - fdict->token[i] = '+'; - plu = make_dir_connector(dict, fdict, i); + fcurs->token[i] = '+'; + plu = make_dir_connector(dict, fcurs, i); if (NULL == plu) return NULL; - fdict->token[i] = '-'; - min = make_dir_connector(dict, fdict, i); + fcurs->token[i] = '-'; + min = make_dir_connector(dict, fcurs, i); if (NULL == min) return NULL; n = make_or_node(dict->Exp_pool, plu, min); } else { - dict_error(fdict, "Unknown connector direction type."); + dict_error(fcurs, "Unknown connector direction type."); return NULL; } } - if (!link_advance(fdict)) + if (!link_advance(fcurs)) { free(n); return NULL; @@ -598,9 +598,9 @@ static bool is_number(const char * str) * with the current token. At the end, the token is the first one not * part of this expression. */ -static Exp *make_expression(File_Dict fdict) +static Exp *make_expression(FileCursor fcurs) { - Dictionary dict = fdict->dict; + Dictionary dict = fcurs->dict; Exp *nl = NULL; Exp *e_head = NULL; @@ -609,55 +609,55 @@ static Exp *make_expression(File_Dict fdict) while (true) { - if (is_equal(fdict, '(')) + if (is_equal(fcurs, '(')) { - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } - nl = make_expression(fdict); + nl = make_expression(fcurs); if (nl == NULL) { return NULL; } - if (!is_equal(fdict, ')')) { - dict_error(fdict, "Expecting a \")\"."); + if (!is_equal(fcurs, ')')) { + dict_error(fcurs, "Expecting a \")\"."); return NULL; } - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } } - else if (is_equal(fdict, '{')) + else if (is_equal(fcurs, '{')) { - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } - nl = make_expression(fdict); + nl = make_expression(fcurs); if (nl == NULL) { return NULL; } - if (!is_equal(fdict, '}')) { - dict_error(fdict, "Expecting a \"}\"."); + if (!is_equal(fcurs, '}')) { + dict_error(fcurs, "Expecting a \"}\"."); return NULL; } - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } nl = make_optional_node(dict->Exp_pool, nl); } - else if (is_equal(fdict, '[')) + else if (is_equal(fcurs, '[')) { - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } - nl = make_expression(fdict); + nl = make_expression(fcurs); if (nl == NULL) { return NULL; } - if (!is_equal(fdict, ']')) { - dict_error(fdict, "Expecting a \"]\"."); + if (!is_equal(fcurs, ']')) { + dict_error(fcurs, "Expecting a \"]\"."); return NULL; } - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } @@ -667,32 +667,32 @@ static Exp *make_expression(File_Dict fdict) * is used as an expression tag. Else, the cost of a * square bracket is 1.0. */ - if (is_number(fdict->token)) + if (is_number(fcurs->token)) { float cost; - if (strtofC(fdict->token, &cost)) + if (strtofC(fcurs->token, &cost)) { nl->cost += cost; } else { - warning(fdict, "Invalid cost (using 1.0)\n"); + warning(fcurs, "Invalid cost (using 1.0)\n"); nl->cost += 1.0F; } - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } } - else if ((strcmp(fdict->token, "or") != 0) && - (strcmp(fdict->token, "and") != 0) && - isalpha((unsigned char)fdict->token[0])) + else if ((strcmp(fcurs->token, "or") != 0) && + (strcmp(fcurs->token, "and") != 0) && + isalpha((unsigned char)fcurs->token[0])) { - const char *bad = valid_dialect_name(fdict->token); + const char *bad = valid_dialect_name(fcurs->token); if (bad != NULL) { char badchar[] = { *bad, '\0' }; - dict_error2(fdict, "Invalid character in dialect tag name:", + dict_error2(fcurs, "Invalid character in dialect tag name:", badchar); return NULL; } @@ -700,9 +700,9 @@ static Exp *make_expression(File_Dict fdict) { nl = make_unary_node(dict->Exp_pool, nl); } - nl->tag_id = exptag_dialect_add(dict, fdict->token); + nl->tag_id = exptag_dialect_add(dict, fcurs->token); nl->tag_type = Exptag_dialect; - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } } @@ -711,21 +711,21 @@ static Exp *make_expression(File_Dict fdict) nl->cost += 1.0F; } } - else if (!fdict->is_special) + else if (!fcurs->is_special) { - nl = make_connector(fdict); + nl = make_connector(fcurs); if (nl == NULL) { return NULL; } } - else if (is_equal(fdict, ')') || is_equal(fdict, ']')) + else if (is_equal(fcurs, ')') || is_equal(fcurs, ']')) { /* allows "()" or "[]" */ nl = make_zeroary_node(dict->Exp_pool); } else { - dict_error(fdict, "Connector, \"(\", \"[\", or \"{\" expected."); + dict_error(fcurs, "Connector, \"(\", \"[\", or \"{\" expected."); return NULL; } @@ -758,17 +758,17 @@ static Exp *make_expression(File_Dict fdict) Exp_type op; /* Non-commuting AND */ - if (is_equal(fdict, '&') || (strcmp(fdict->token, "and") == 0)) + if (is_equal(fcurs, '&') || (strcmp(fcurs->token, "and") == 0)) { op = AND_type; } /* Commuting OR */ - else if (is_equal(fdict, '|') || (strcmp(fdict->token, "or") == 0)) + else if (is_equal(fcurs, '|') || (strcmp(fcurs->token, "or") == 0)) { op = OR_type; } /* Commuting AND */ - else if (is_equal(fdict, SYM_AND) || (strcmp(fdict->token, "sym") == 0)) + else if (is_equal(fcurs, SYM_AND) || (strcmp(fcurs->token, "sym") == 0)) { /* Part 1/2 of SYM_AND processing */ op = AND_type; /* allow mixing with ordinary ands at the same level */ @@ -791,12 +791,12 @@ static Exp *make_expression(File_Dict fdict) { if (e_head->type != op) { - dict_error(fdict, "\"and\" and \"or\" at the same level in an expression."); + dict_error(fcurs, "\"and\" and \"or\" at the same level in an expression."); return NULL; } } - if (!link_advance(fdict)) { + if (!link_advance(fcurs)) { return NULL; } @@ -935,15 +935,15 @@ void insert_list(Dictionary dict, Dict_node * p, int l) * and is terminated by a semi-colon. * Add these words to the dictionary. */ -static bool read_entry(File_Dict fdict) +static bool read_entry(FileCursor fcurs) { Dict_node *dnx, *dn = NULL; - while (!is_equal(fdict, ':')) + while (!is_equal(fcurs, ':')) { - if (fdict->is_special) + if (fcurs->is_special) { - dict_error(fdict, "I expected a word but didn\'t get it."); + dict_error(fcurs, "I expected a word but didn\'t get it."); goto syntax_error; } @@ -951,19 +951,19 @@ static bool read_entry(File_Dict fdict) /* However, be careful to reject "/.v" which is the division symbol * used in equations (.v means verb-like). Also reject an affix regex * specification (may appear only in the affix file). */ - if ((fdict->token[0] == '/') && - (fdict->token[1] != '.') && (get_affix_regex_cg(fdict->token) < 0)) + if ((fcurs->token[0] == '/') && + (fcurs->token[1] != '.') && (get_affix_regex_cg(fcurs->token) < 0)) { - Dict_node *new_dn = read_word_file(fdict->dict, dn, fdict->token); + Dict_node *new_dn = read_word_file(fcurs->dict, dn, fcurs->token); if (new_dn == NULL) { - prt_error("Error: Cannot open word file \"%s\".\n", fdict->token); + prt_error("Error: Cannot open word file \"%s\".\n", fcurs->token); goto syntax_error; /* not a syntax error, but need to free dn */ } dn = new_dn; } - else if (0 == strcmp(fdict->token, "#include")) + else if (0 == strcmp(fcurs->token, "#include")) { bool rc; char* instr; @@ -976,17 +976,17 @@ static bool read_entry(File_Dict fdict) int save_line_number; size_t skip_slash; - if (!link_advance(fdict)) goto syntax_error; + if (!link_advance(fcurs)) goto syntax_error; - Dictionary dict = fdict->dict; + Dictionary dict = fcurs->dict; - skip_slash = ('/' == fdict->token[0]) ? 1 : 0; - dict_name = strdupa(fdict->token); - save_name = fdict->dict->name; - save_is_special = fdict->is_special; - save_input = fdict->input; - save_pin = fdict->pin; - save_already_got_it = fdict->already_got_it; + skip_slash = ('/' == fcurs->token[0]) ? 1 : 0; + dict_name = strdupa(fcurs->token); + save_name = fcurs->dict->name; + save_is_special = fcurs->is_special; + save_input = fcurs->input; + save_pin = fcurs->pin; + save_already_got_it = fcurs->already_got_it; save_line_number = dict->line_number; /* OK, token contains the filename to read ... */ @@ -1006,39 +1006,39 @@ static bool read_entry(File_Dict fdict) rc = read_dictionary(dict, instr); dict->name = save_name; - fdict->is_special = save_is_special; - fdict->input = save_input; - fdict->pin = save_pin; - fdict->already_got_it = save_already_got_it; + fcurs->is_special = save_is_special; + fcurs->input = save_input; + fcurs->pin = save_pin; + fcurs->already_got_it = save_already_got_it; dict->line_number = save_line_number; free_file_contents(instr); if (!rc) goto syntax_error; /* when we return, point to the next entry */ - if (!link_advance(fdict)) goto syntax_error; + if (!link_advance(fcurs)) goto syntax_error; /* If a semicolon follows the include, that's OK... ignore it. */ - if (';' == fdict->token[0]) + if (';' == fcurs->token[0]) { - if (!link_advance(fdict)) goto syntax_error; + if (!link_advance(fcurs)) goto syntax_error; } return true; } - else if (0 == strcmp(fdict->token, "#define")) + else if (0 == strcmp(fcurs->token, "#define")) { - if (!link_advance(fdict)) goto syntax_error; - const char *name = strdupa(fdict->token); + if (!link_advance(fcurs)) goto syntax_error; + const char *name = strdupa(fcurs->token); /* Get the value. */ - if (!link_advance(fdict)) goto syntax_error; - add_define(fdict->dict, name, fdict->token); + if (!link_advance(fcurs)) goto syntax_error; + add_define(fcurs->dict, name, fcurs->token); - if (!link_advance(fdict)) goto syntax_error; - if (!is_equal(fdict, ';')) + if (!link_advance(fcurs)) goto syntax_error; + if (!is_equal(fcurs, ';')) { - dict_error(fdict, "Expecting \";\" at the end of #define."); + dict_error(fcurs, "Expecting \";\" at the end of #define."); goto syntax_error; } } @@ -1053,33 +1053,33 @@ static bool read_entry(File_Dict fdict) /* Note: The following patches a dot in regexes appearing in * the affix file... It is corrected later. */ - patch_subscript(fdict->token); - dn->string = string_set_add(fdict->token, fdict->dict->string_set); + patch_subscript(fcurs->token); + dn->string = string_set_add(fcurs->token, fcurs->dict->string_set); } /* Advance to next entry, unless error */ - if (!link_advance(fdict)) goto syntax_error; + if (!link_advance(fcurs)) goto syntax_error; } /* pass the : */ - if (!link_advance(fdict)) + if (!link_advance(fcurs)) { goto syntax_error; } - Exp * n = make_expression(fdict); + Exp * n = make_expression(fcurs); if (n == NULL) goto syntax_error; - if (!is_equal(fdict, ';')) + if (!is_equal(fcurs, ';')) { - dict_error(fdict, "Expecting \";\" at the end of an entry."); + dict_error(fcurs, "Expecting \";\" at the end of an entry."); goto syntax_error; } if (dn == NULL) { - dict_error(fdict, "Expecting a token before \":\"."); + dict_error(fcurs, "Expecting a token before \":\"."); goto syntax_error; } @@ -1092,14 +1092,14 @@ static bool read_entry(File_Dict fdict) i++; } - Dictionary dict = fdict->dict; + Dictionary dict = fcurs->dict; if (IS_GENERATION(dict)) add_category(dict, n, dn, i); dict->insert_entry(dict, dn, i); /* pass the ; */ - if (!link_advance(fdict)) + if (!link_advance(fcurs)) { /* Avoid freeing dn, since it is already inserted into the dict. */ return false; @@ -1114,23 +1114,23 @@ static bool read_entry(File_Dict fdict) bool read_dictionary(Dictionary dict, const char * input) { - File_Dict fdict = alloca(sizeof(struct File_Dict_s)); + FileCursor fcurs = alloca(sizeof(struct FileCursor_s)); dict->line_number = 1; - fdict->dict = dict; - fdict->input = input; - fdict->pin = fdict->input; + fcurs->dict = dict; + fcurs->input = input; + fcurs->pin = fcurs->input; - if (!link_advance(fdict)) + if (!link_advance(fcurs)) return false; /* The last character of a dictionary is NUL. * Note: At the end of reading a dictionary, dict->pin points to one * character after the input. Referring its [-1] element is safe even if * the dict file size is 0. */ - while ('\0' != fdict->pin[-1]) + while ('\0' != fcurs->pin[-1]) { - if (!read_entry(fdict)) + if (!read_entry(fcurs)) return false; } From 87d7e9f8a44b39e7de55685038b6d3323574be67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Wed, 27 Mar 2024 20:24:52 -0500 Subject: [PATCH 6/7] Create a new file-cursor for each file to be read. --- link-grammar/dict-file/read-dict.c | 60 ++++++++++++------------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index f9e73980ef..06202bbac1 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -965,51 +965,31 @@ static bool read_entry(FileCursor fcurs) } else if (0 == strcmp(fcurs->token, "#include")) { - bool rc; - char* instr; - char* dict_name; - const char * save_name; - bool save_is_special; - const char * save_input; - const char * save_pin; - int save_already_got_it; - int save_line_number; - size_t skip_slash; - if (!link_advance(fcurs)) goto syntax_error; - Dictionary dict = fcurs->dict; - - skip_slash = ('/' == fcurs->token[0]) ? 1 : 0; - dict_name = strdupa(fcurs->token); - save_name = fcurs->dict->name; - save_is_special = fcurs->is_special; - save_input = fcurs->input; - save_pin = fcurs->pin; - save_already_got_it = fcurs->already_got_it; - save_line_number = dict->line_number; - /* OK, token contains the filename to read ... */ - instr = get_file_contents(dict_name + skip_slash); + char* dict_name = strdupa(fcurs->token); + size_t skip_slash = ('/' == fcurs->token[0]) ? 1 : 0; + char* instr = get_file_contents(dict_name + skip_slash); if (NULL == instr) { + Dictionary dict = fcurs->dict; prt_error("Error: While parsing dictionary \"%s\":\n" "\t Line %d: Could not open subdictionary \"%s\"\n", dict->name, dict->line_number-1, dict_name); goto syntax_error; } - /* The dict name are used for error reporting */ + /* The dict name and line-number are used for error reporting */ + Dictionary dict = fcurs->dict; + const char * save_name = dict->name; + int save_line_number = dict->line_number; dict->name = dict_name; /* Now read the thing in. */ - rc = read_dictionary(dict, instr); + bool rc = read_dictionary(dict, instr); dict->name = save_name; - fcurs->is_special = save_is_special; - fcurs->input = save_input; - fcurs->pin = save_pin; - fcurs->already_got_it = save_already_got_it; dict->line_number = save_line_number; free_file_contents(instr); @@ -1112,15 +1092,8 @@ static bool read_entry(FileCursor fcurs) return false; } -bool read_dictionary(Dictionary dict, const char * input) +static bool fread_dict(FileCursor fcurs) { - FileCursor fcurs = alloca(sizeof(struct FileCursor_s)); - - dict->line_number = 1; - fcurs->dict = dict; - fcurs->input = input; - fcurs->pin = fcurs->input; - if (!link_advance(fcurs)) return false; @@ -1134,6 +1107,7 @@ bool read_dictionary(Dictionary dict, const char * input) return false; } + Dictionary dict = fcurs->dict; if (dict->category != NULL) { /* Create a category element which contains 0 words, to signify the @@ -1150,4 +1124,16 @@ bool read_dictionary(Dictionary dict, const char * input) return true; } +bool read_dictionary(Dictionary dict, const char * input) +{ + FileCursor fcurs = alloca(sizeof(struct FileCursor_s)); + + dict->line_number = 1; + fcurs->dict = dict; + fcurs->input = input; + fcurs->pin = fcurs->input; + + return fread_dict(fcurs); +} + /* ======================================================================= */ From c8655bd75e2afe1516e04d0b38518c396f581cd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Wed, 27 Mar 2024 20:50:08 -0500 Subject: [PATCH 7/7] Initalize all fields appropriately --- link-grammar/dict-file/read-dict.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index 06202bbac1..5a315f33b6 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -1132,6 +1132,10 @@ bool read_dictionary(Dictionary dict, const char * input) fcurs->dict = dict; fcurs->input = input; fcurs->pin = fcurs->input; + fcurs->recursive_error = false; + fcurs->is_special = false; + fcurs->already_got_it = false; + fcurs->token[0] = 0; return fread_dict(fcurs); }