Skip to content

Commit

Permalink
Major refactoring, separated statements (#27)
Browse files Browse the repository at this point in the history
* Major refactoring, separated statements

* new statements : text, table, image

* tag version v1.3.0 in readme

* new unit tests
  • Loading branch information
philippe-levan authored Nov 16, 2023
1 parent 844b9a0 commit b38c07f
Show file tree
Hide file tree
Showing 18 changed files with 982 additions and 854 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,11 @@ For trying to fix these problems, you can try:
- use variable formatting instead of the one of the character before

## Versions :

- v1.3.0, 2023-11-16 :
- major refactoring. No evolution for the user.
- new unit tests on tables and images
- no BC Break (theoretically)
- v1.2.8, 2023-09-01 :
- fix bug in TextShape var replacement
- v1.2.7, 2023-08-30 :
Expand Down
282 changes: 282 additions & 0 deletions lotemplate/Statement/ForStatement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
import re
from sorcery import dict_of
import lotemplate.errors as errors
from typing import Union
from com.sun.star.lang import XComponent
from lotemplate.Statement.IfStatement import IfStatement

class ForStatement:
"""
Class representing an for statement in a template libreoffice
"""
start_regex = r"""
\[\s*for\s* # [if detection
\$ # var start with $
(\w+ # basic var name
(\( # parsing of fonction var
((?: # ?: is for non capturing group : the regex inside the parenthesis must be matched but does not create the capturing group
\\.|. # everything that is escaped or every simple char
)*?) # the ? before the ) in order to be not greedy (stop on the first unescaped ")"
\))
?) # the ? before the ) in order to be not greedy (won't go until the last ")")
\s*\]
"""
# remove comments, spaces and newlines
start_regex = re.sub(r'#.*', '', start_regex).replace("\n", "").replace("\t", "").replace(" ", "")
# print(start_regex)
# \[\s*for\s*\$(\w+(\(((?:\\.|.)*?)\))?)\s*\]
foritem_regex = r"""
\[\s*foritem\s* # [foritem detection
(
\w+ # simple var of type abc
(?:\.\w+)* # composite var name like abc.def
)
(?:\s+(escape_html|raw))? # option pour escaper le contenu de la variable
\s*\]
"""
foritem_regex = re.sub(r'#.*', '', foritem_regex).replace("\n", "").replace("\t", "").replace(" ", "")
# print(foritem_regex)
# \[\s*foritem\s*((\w+)(?:\.\w+)*)\s*\]

# [forindex] is replaced by the counter of the loop. A string starting at 0
forindex_regex = r'\[\s*forindex\s*\]'

end_regex = r'\[\s*endfor\s*\]'

def __init__(self, for_string):
self.for_string = for_string
match = re.search(self.start_regex, for_string, re.IGNORECASE)
self.variable_name = match.group(1)

def scan_for(doc: XComponent) -> dict:
"""
scan for statement. return list of vars.
We verify that
- there is and endfor for each for statement
- vars sent are lists
"""

def scan_single_for(local_x_found) -> str:
"""
scan for a single for statement
"""
for_statement = ForStatement(local_x_found.getString())
position_in_text = len(for_statement.for_string)
text = local_x_found.getText()
cursor = text.createTextCursorByRange(local_x_found)
while True:
if not cursor.goRight(1, True):
raise errors.TemplateError(
'no_endfor_found',
f"The statement {for_statement.for_string} has no endfor",
dict_of(for_statement.for_string)
)
position_in_text = position_in_text + 1
selected_string = cursor.String
match = re.search(ForStatement.end_regex, selected_string, re.IGNORECASE)
if match is not None:
break
return for_statement.variable_name

search = doc.createSearchDescriptor()
search.SearchString = ForStatement.start_regex
search.SearchRegularExpression = True
search.SearchCaseSensitive = False
x_found = doc.findFirst(search)

for_vars = {}
while x_found is not None:
variable_name = scan_single_for(x_found)
for_vars[variable_name] = {'type': 'array', 'value': []}
x_found = doc.findNext(x_found.End, search)
return for_vars


def for_replace(doc: XComponent, local_variables: dict[str, dict[str, Union[str, list[str]]]]) -> None:
"""
Parse statements like [for $myvar]...[endfor]
We replace the for and endfor statements with the text between them, for each value in the variable.
:param doc: the document to fill
:param local_variables: the variables
:return: None
"""

def compute_for(doc, local_x_found):
"""
for one single for statement, cut and paste the content of the for
:param local_x_found:
:return:
"""

def escape_html(s):
"""
Replace special characters "&", "<" and ">" to HTML-safe sequences.
If the optional flag quote is true, the quotation mark character (")
is also translated.
"""
s = s.replace("&", "&amp;") # Must be done first!
s = s.replace("<", "&lt;")
s = s.replace(">", "&gt;")
s = s.replace('"', "&quot;")
return s

def getForitemValue(match_var_name, match_escaping, foritem_var):
"""
we are in a for loop on values of an array.
a regex just detected [foritem match_var_name match_escaping]
mathch escaping, can exist or not. If it exists, it can be raw or escape_html
:param match_var_name:
:param match_escaping:
:return:
"""
# get separate the var_name by "." to get the value in the dict
var_name_hierarchy = match_var_name.split('.')
# get the variable value from the hierarchy
value = foritem_var
for var_name in var_name_hierarchy:
value = value[var_name]

# get the escaping
escaping = 'raw'
if match_escaping is not None:
escaping = match_escaping
# escape the value
if escaping == 'escape_html':
value = escape_html(value)
return str(value)

def manage_if_inside_for(content, local_variables, foritem_var, forindex):
"""
manage the if statements inside a for loop.
It uses a recursive approach : we search an if statement inside the content string. Then we create a
subcontent with the content after the if statement and call the function again on this substring.
With this recursive call, the process begins with the last if statement and update the content for
the previous call.
:param content:
:param local_variables:
:param foritem_var:
:return:
"""
# look for the first if statement
match_if = re.search(IfStatement.start_regex, content, re.IGNORECASE)
if match_if is None:
return content

# get content before the if statement and call recursively the function
subcontent = content[match_if.end():]
subcontent = manage_if_inside_for(subcontent, local_variables, foritem_var, forindex)

# update the content with the result of the recursive call
content = content[:match_if.end()] + subcontent

# get the if statement values
if_statement = IfStatement(match_if.group(0))

# precontent is the content before the if statement
precontent = content[:match_if.start()]
postcontent = content[match_if.start():]
match_if_postcontent = re.search(IfStatement.start_regex, postcontent, re.IGNORECASE)

# if no endif => throw error
match_endif_postcontent = re.search(IfStatement.end_regex, postcontent, re.IGNORECASE)
if match_endif_postcontent is None:
raise errors.TemplateError(
'no_endif_found',
f"The statement {if_statement.if_string} has no endif",
dict_of(if_statement.if_string)
)

# get value associated to the if statement
value = None
if if_statement.variable_name is not None:
computed_variable_name = re.sub(ForStatement.forindex_regex, forindex, if_statement.variable_name)
value = local_variables[computed_variable_name]['value']
if if_statement.foritem_name is not None:
value = getForitemValue(if_statement.foritem_name, if_statement.foritem_escaping, foritem_var)
if if_statement.forindex is not None:
value = forindex
if_result = if_statement.get_if_result(value)

if if_result:
postcontent = postcontent[:match_endif_postcontent.start()] + postcontent[match_endif_postcontent.end():]
postcontent = postcontent[:match_if_postcontent.start()] + postcontent[match_if_postcontent.end():]
if not if_result:
postcontent = postcontent[:match_if_postcontent.start()] + postcontent[match_endif_postcontent.end():]

return precontent + postcontent


for_statement = ForStatement(local_x_found.getString())
foritem_vars = local_variables[for_statement.variable_name]['value']

# remove the for statement from the odt
text = local_x_found.getText()
cursor = text.createTextCursorByRange(local_x_found)
cursor.String = ''

# select content between for and endfor (including endfor)
while True:
if not cursor.goRight(1, True):
raise errors.TemplateError(
'no_endfor_found',
f"The statement {for_statement.for_string} has no endif",
dict_of(for_statement.for_string)
)
selected_string = cursor.String
match = re.search(ForStatement.end_regex, selected_string, re.IGNORECASE)
if match is not None:
break

# get the content between the for and the endfor
cursor.goLeft(len(match.group(0)), True)
template = cursor.String
# remove the content from the file
cursor.String = ''
# remove the endfor at the end
cursor.goRight(len(match.group(0)), True)
cursor.String = ''

# loop on values of the variable
counter = 0
for foritem_var in foritem_vars:
content = template
# parse if inside for before managing foritem replacements
content = manage_if_inside_for(content, local_variables, foritem_var, str(counter))

# search [forindex] and remplace by my counter
content = re.sub(ForStatement.forindex_regex, str(counter), content, flags=re.IGNORECASE)

# replace inside the selected content selected
for match in re.finditer(ForStatement.foritem_regex, content, re.IGNORECASE):
getForitemValue(match.group(1), match.group(2), foritem_var)
# replace the variable by its value
content = content.replace(
match.group(0),
getForitemValue(match.group(1), match.group(2), foritem_var)
)

# paste the content
text.insertString(cursor, content, False)

# counter increment
counter += 1

# main of for_replace
search = doc.createSearchDescriptor()
search.SearchString = ForStatement.start_regex
search.SearchRegularExpression = True
search.SearchCaseSensitive = False
x_found = doc.findFirst(search)
while x_found is not None:
compute_for(doc, x_found)
x_found = doc.findNext(x_found.End, search)

Loading

0 comments on commit b38c07f

Please sign in to comment.