Skip to content

Commit

Permalink
Merge pull request #15 from dnv-opensource/14-parsing-single-characte…
Browse files Browse the repository at this point in the history
…r-variables-fails

14 parsing single character variables fails
  • Loading branch information
ClaasRostock authored Jan 9, 2024
2 parents fb266f2 + a6ea48f commit b903cbd
Show file tree
Hide file tree
Showing 7 changed files with 167 additions and 8 deletions.
6 changes: 3 additions & 3 deletions src/dictIO/dictReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def _eval_expressions(dict: CppDict):
# Collect all references contained in expressions
_references: List[str] = []
for item in dict.expressions.values():
_refs: List[str] = re.findall(r"\$\w[\w\[\]]+", item["expression"])
_refs: List[str] = re.findall(r"\$\w[\w\[\]]*", item["expression"])
_references.extend(_refs)
# Resolve references
variables: Dict[str, Any] = dict.variables
Expand All @@ -233,7 +233,7 @@ def _eval_expressions(dict: CppDict):
for key, item in expressions_copy.items():
placeholder: str = item["name"]
expression: str = item["expression"]
_refs: List[str] = re.findall(r"\$\w[\w\[\]]+", expression)
_refs: List[str] = re.findall(r"\$\w[\w\[\]]*", expression)
for ref in _refs:
if ref in references_resolved:
expression = re.sub(
Expand Down Expand Up @@ -265,7 +265,7 @@ def _eval_expressions(dict: CppDict):
# At the end of each iteration, re-resolve all references based on the now updated variables table of dict
_references = []
for item in dict.expressions.values():
_refs = re.findall(r"\$\w[\w\[\]]+", item["expression"])
_refs = re.findall(r"\$\w[\w\[\]]*", item["expression"])
_references.extend(_refs)
variables = dict.variables
references = {ref: __class__._resolve_reference(ref, variables) for ref in _references}
Expand Down
2 changes: 1 addition & 1 deletion src/dictIO/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def format_string(self, arg: str) -> str:
the formatted string
"""
if re.search(r"[$]", arg):
if re.search(r"^\$\w[\w\[\]]+$", arg): # reference
if re.search(r"^\$\w[\w\[\]]*$", arg): # reference
return self.format_reference_string(arg)
else: # expression
return self.format_expression_string(arg)
Expand Down
6 changes: 3 additions & 3 deletions src/dictIO/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ def _extract_expressions(self, dict: CppDict):
dict.expressions |= {index: {"expression": expression, "name": placeholder}}

# Step 2: Find references in .block_content (single references to key'd entries that are NOT in double quotes).
search_pattern = r"\$\w[\w\[\]]+"
search_pattern = r"\$\w[\w\[\]]*"
while match := re.search(search_pattern, dict.block_content, re.MULTILINE):
reference = match[0]
index = self.counter()
Expand Down Expand Up @@ -1346,14 +1346,14 @@ def _extract_expression(
# References are denoted using the '$' syntax familiar from shell programming.
# Any key'd entries in a dict are considered variables and can be referenced.
# If string does not contain minimum one reference, return.
search_pattern = r"\$\w[\w\[\]]+"
search_pattern = r"\$\w[\w\[\]]*"
references = re.findall(search_pattern, string, re.MULTILINE)
if not references:
return string

# Case 1: Reference
# The string contains only a single plain reference (single reference to a key'd entry in the parsed dict).
search_pattern = r"^\s*(\$\w[\w\[\]]+){1}\s*$"
search_pattern = r"^\s*(\$\w[\w\[\]]*){1}\s*$"
if match := re.search(search_pattern, string, re.MULTILINE):
reference: str = match.groups()[0]
# Replace the reference in string with a placeholder (EXPRESSION000000) and register it in parsed_dict:
Expand Down
34 changes: 33 additions & 1 deletion tests/test_dictReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def test_compare_expressions_in_dict_format_with_expressions_in_json_format():
def _get_references_in_expressions(dict: CppDict) -> List[str]:
references: List[str] = []
for item in dict.expressions.values():
_refs: List[str] = re.findall(r"\$\w[\w\[\]]+", item["expression"])
_refs: List[str] = re.findall(r"\$\w[\w\[\]]*", item["expression"])
references.extend(_refs)
return references

Expand Down Expand Up @@ -562,6 +562,38 @@ def test_reread_strings_dict():
assert reread_dict["subDict"]["string_07_sq_escdq_word"] == r"quote(\"string_07_sq_escdq_word\")"


def test_single_character_vars():
# sourcery skip: avoid-builtin-shadow
# Prepare
source_file = Path("test_single_character_vars_dict")
# Execute
dict = DictReader.read(source_file, includes=False)
# Assert single character variables are properly parsed
assert dict["a"] == 1.0
assert dict["b"] is True
assert dict["c"]["d"] == 4.0
assert dict["c"]["e"] is False


def test_single_character_references():
# sourcery skip: avoid-builtin-shadow
# Prepare
source_file = Path("test_single_character_vars_dict")
# Execute
dict = DictReader.read(source_file)
# Assert included dict has been merged and single character references been resolved
assert dict["c"]["f"] == 6.0
assert dict["c"]["g"] is True
assert dict["c"]["h"] == 8.0
assert dict["c"]["i"] is False
assert dict["j"] == 10.0
assert dict["k"] == 12.0
assert dict["u"] == 3
assert dict["v"] == "Alice"
assert dict["w"] == "paragliding contest"
assert dict["ww"] == "AliceandBobfailtheparagliding contest"


class SetupHelper:
@staticmethod
def prepare_dict_until(
Expand Down
29 changes: 29 additions & 0 deletions tests/test_dicts/test_single_character_vars_dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*---------------------------------*- C++ -*----------------------------------*\
filetype dictionary; coding utf-8; version 0.1; local --; purpose --;
\*----------------------------------------------------------------------------*/

#include 'test_single_character_vars_paramDict'

a 1.0;
b true;

n (0 1 2 3);
m (Alice and Bob fail the "paragliding contest");

c
{
d 4.0;
e false;
f $x;
g $y;
h $h;
i $i;
}

j $z;
k $k;

u $n[3];
v $m[0];
w $m[5];
ww "$m[0]$m[1]$m[2]$m[3]$m[4]$m[5]";
12 changes: 12 additions & 0 deletions tests/test_dicts/test_single_character_vars_paramDict
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*---------------------------------*- C++ -*----------------------------------*\
filetype dictionary; coding utf-8; version 0.1; local --; purpose --;
\*----------------------------------------------------------------------------*/

a -1.0;
b false;
x 6.0;
y true;
h 8.0;
i false;
z 10.0;
k 12.0;
86 changes: 86 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,92 @@ def test_extract_expressions(self):
assert list(dict.expressions.values())[8]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[8]["expression"] == "$varName1[1][2]"

def test_extract_single_character_expressions(self):
# sourcery skip: avoid-builtin-shadow
# Prepare
dict = CppDict()
parser = CppParser()
text_block_in = (
"This is a text block\n"
"with multiple lines. Within this text block, there are key value pairs where the value\n"
"is a string surrounded by double quotes and containing at least one reference to a variable starting with $.\n"
"Such strings are identified as expressions. Expressions will be evaluated by DictReader.\n"
"The following examples will be identified as expressions:\n"
" reference1 $a\n"
" reference2 $a[0]\n"
" reference3 $a[1][2]\n"
' expression1 "$a"\n'
' expression2 "$b + 4"\n'
' expression3 "4 + $b"\n'
' expression4 "$b + $c" and some blabla thereafter\n'
' expression5 "$a + $b + $c" and some blabla thereafter\n'
' expression6 "$b + $c + $a" and some blabla thereafter\n'
"The following example will NOT be identified as expression but as string literal:\n"
" string1 '$a is not an expression but a string literal because it is in single instead of double quotes'\n"
' string2 "not an expression but a string literal as it does not contain a Dollar character"\n'
"_extract_expressions() will extract expressions and substitute them with a placeholder\n"
"in the form E X P R E S S I O N 0 0 0 0 0 0."
"The actual evaluation of an expression is not part of _extract_expressions(). The evaluation is done within ()."
)
text_block_expected = (
"This is a text block\n"
"with multiple lines. Within this text block, there are key value pairs where the value\n"
"is a string surrounded by double quotes and containing at least one reference to a variable starting with $.\n"
"Such strings are identified as expressions. Expressions will be evaluated by DictReader.\n"
"The following examples will be identified as expressions:\n"
" reference1 EXPRESSION000000\n"
" reference2 EXPRESSION000000\n"
" reference3 EXPRESSION000000\n"
" expression1 EXPRESSION000000\n"
" expression2 EXPRESSION000000\n"
" expression3 EXPRESSION000000\n"
" expression4 EXPRESSION000000 and some blabla thereafter\n"
" expression5 EXPRESSION000000 and some blabla thereafter\n"
" expression6 EXPRESSION000000 and some blabla thereafter\n"
"The following example will NOT be identified as expression but as string literal:\n"
" string1 STRINGLITERAL000000\n"
" string2 STRINGLITERAL000000\n"
"_extract_expressions() will extract expressions and substitute them with a placeholder\n"
"in the form E X P R E S S I O N 0 0 0 0 0 0."
"The actual evaluation of an expression is not part of _extract_expressions(). The evaluation is done within ()."
)
dict.block_content = text_block_in
parser._extract_string_literals(dict)
# Execute
parser._extract_expressions(dict)
# Assert
text_block_out = re.sub(r"[0-9]{6}", "000000", dict.block_content)
assert text_block_out == text_block_expected
string_diff(text_block_out, text_block_expected)
assert len(dict.expressions) == 9

assert list(dict.expressions.values())[0]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[0]["expression"] == "$a"

assert list(dict.expressions.values())[1]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[1]["expression"] == "$b + 4"

assert list(dict.expressions.values())[2]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[2]["expression"] == "4 + $b"

assert list(dict.expressions.values())[3]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[3]["expression"] == "$b + $c"

assert list(dict.expressions.values())[4]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[4]["expression"] == "$a + $b + $c"

assert list(dict.expressions.values())[5]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[5]["expression"] == "$b + $c + $a"

assert list(dict.expressions.values())[6]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[6]["expression"] == "$a"

assert list(dict.expressions.values())[7]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[7]["expression"] == "$a[0]"

assert list(dict.expressions.values())[8]["name"][:10] == "EXPRESSION"
assert list(dict.expressions.values())[8]["expression"] == "$a[1][2]"

def test_separate_delimiters(self):
# sourcery skip: avoid-builtin-shadow
# sourcery skip: no-loop-in-tests
Expand Down

0 comments on commit b903cbd

Please sign in to comment.