diff --git a/README.md b/README.md index e4fdbff..496d84a 100644 --- a/README.md +++ b/README.md @@ -610,6 +610,11 @@ For trying to fix these problems, you can try: - use variable formatting instead of the one of the character before ## Versions : + +- v1.3.0, 2023-11-16 : + - major refactoring. No evolution for the user. + - new unit tests on tables and images + - no BC Break (theoretically) - v1.2.8, 2023-09-01 : - fix bug in TextShape var replacement - v1.2.7, 2023-08-30 : diff --git a/lotemplate/Statement/ForStatement.py b/lotemplate/Statement/ForStatement.py new file mode 100644 index 0000000..879529f --- /dev/null +++ b/lotemplate/Statement/ForStatement.py @@ -0,0 +1,282 @@ +import re +from sorcery import dict_of +import lotemplate.errors as errors +from typing import Union +from com.sun.star.lang import XComponent +from lotemplate.Statement.IfStatement import IfStatement + +class ForStatement: + """ + Class representing an for statement in a template libreoffice + """ + start_regex = r""" + \[\s*for\s* # [if detection + \$ # var start with $ + (\w+ # basic var name + (\( # parsing of fonction var + ((?: # ?: is for non capturing group : the regex inside the parenthesis must be matched but does not create the capturing group + \\.|. # everything that is escaped or every simple char + )*?) # the ? before the ) in order to be not greedy (stop on the first unescaped ")" + \)) + ?) # the ? before the ) in order to be not greedy (won't go until the last ")") + \s*\] + """ + # remove comments, spaces and newlines + start_regex = re.sub(r'#.*', '', start_regex).replace("\n", "").replace("\t", "").replace(" ", "") + # print(start_regex) + # \[\s*for\s*\$(\w+(\(((?:\\.|.)*?)\))?)\s*\] + foritem_regex = r""" + \[\s*foritem\s* # [foritem detection + ( + \w+ # simple var of type abc + (?:\.\w+)* # composite var name like abc.def + ) + (?:\s+(escape_html|raw))? # option pour escaper le contenu de la variable + \s*\] + """ + foritem_regex = re.sub(r'#.*', '', foritem_regex).replace("\n", "").replace("\t", "").replace(" ", "") + # print(foritem_regex) + # \[\s*foritem\s*((\w+)(?:\.\w+)*)\s*\] + + # [forindex] is replaced by the counter of the loop. A string starting at 0 + forindex_regex = r'\[\s*forindex\s*\]' + + end_regex = r'\[\s*endfor\s*\]' + + def __init__(self, for_string): + self.for_string = for_string + match = re.search(self.start_regex, for_string, re.IGNORECASE) + self.variable_name = match.group(1) + + def scan_for(doc: XComponent) -> dict: + """ + scan for statement. return list of vars. + + We verify that + - there is and endfor for each for statement + - vars sent are lists + """ + + def scan_single_for(local_x_found) -> str: + """ + scan for a single for statement + """ + for_statement = ForStatement(local_x_found.getString()) + position_in_text = len(for_statement.for_string) + text = local_x_found.getText() + cursor = text.createTextCursorByRange(local_x_found) + while True: + if not cursor.goRight(1, True): + raise errors.TemplateError( + 'no_endfor_found', + f"The statement {for_statement.for_string} has no endfor", + dict_of(for_statement.for_string) + ) + position_in_text = position_in_text + 1 + selected_string = cursor.String + match = re.search(ForStatement.end_regex, selected_string, re.IGNORECASE) + if match is not None: + break + return for_statement.variable_name + + search = doc.createSearchDescriptor() + search.SearchString = ForStatement.start_regex + search.SearchRegularExpression = True + search.SearchCaseSensitive = False + x_found = doc.findFirst(search) + + for_vars = {} + while x_found is not None: + variable_name = scan_single_for(x_found) + for_vars[variable_name] = {'type': 'array', 'value': []} + x_found = doc.findNext(x_found.End, search) + return for_vars + + + def for_replace(doc: XComponent, local_variables: dict[str, dict[str, Union[str, list[str]]]]) -> None: + """ + Parse statements like [for $myvar]...[endfor] + + We replace the for and endfor statements with the text between them, for each value in the variable. + + :param doc: the document to fill + :param local_variables: the variables + :return: None + """ + + def compute_for(doc, local_x_found): + """ + for one single for statement, cut and paste the content of the for + :param local_x_found: + :return: + """ + + def escape_html(s): + """ + Replace special characters "&", "<" and ">" to HTML-safe sequences. + If the optional flag quote is true, the quotation mark character (") + is also translated. + """ + s = s.replace("&", "&") # Must be done first! + s = s.replace("<", "<") + s = s.replace(">", ">") + s = s.replace('"', """) + return s + + def getForitemValue(match_var_name, match_escaping, foritem_var): + """ + we are in a for loop on values of an array. + + a regex just detected [foritem match_var_name match_escaping] + + mathch escaping, can exist or not. If it exists, it can be raw or escape_html + + :param match_var_name: + :param match_escaping: + :return: + """ + # get separate the var_name by "." to get the value in the dict + var_name_hierarchy = match_var_name.split('.') + # get the variable value from the hierarchy + value = foritem_var + for var_name in var_name_hierarchy: + value = value[var_name] + + # get the escaping + escaping = 'raw' + if match_escaping is not None: + escaping = match_escaping + # escape the value + if escaping == 'escape_html': + value = escape_html(value) + return str(value) + + def manage_if_inside_for(content, local_variables, foritem_var, forindex): + """ + manage the if statements inside a for loop. + + It uses a recursive approach : we search an if statement inside the content string. Then we create a + subcontent with the content after the if statement and call the function again on this substring. + + With this recursive call, the process begins with the last if statement and update the content for + the previous call. + + :param content: + :param local_variables: + :param foritem_var: + :return: + """ + # look for the first if statement + match_if = re.search(IfStatement.start_regex, content, re.IGNORECASE) + if match_if is None: + return content + + # get content before the if statement and call recursively the function + subcontent = content[match_if.end():] + subcontent = manage_if_inside_for(subcontent, local_variables, foritem_var, forindex) + + # update the content with the result of the recursive call + content = content[:match_if.end()] + subcontent + + # get the if statement values + if_statement = IfStatement(match_if.group(0)) + + # precontent is the content before the if statement + precontent = content[:match_if.start()] + postcontent = content[match_if.start():] + match_if_postcontent = re.search(IfStatement.start_regex, postcontent, re.IGNORECASE) + + # if no endif => throw error + match_endif_postcontent = re.search(IfStatement.end_regex, postcontent, re.IGNORECASE) + if match_endif_postcontent is None: + raise errors.TemplateError( + 'no_endif_found', + f"The statement {if_statement.if_string} has no endif", + dict_of(if_statement.if_string) + ) + + # get value associated to the if statement + value = None + if if_statement.variable_name is not None: + computed_variable_name = re.sub(ForStatement.forindex_regex, forindex, if_statement.variable_name) + value = local_variables[computed_variable_name]['value'] + if if_statement.foritem_name is not None: + value = getForitemValue(if_statement.foritem_name, if_statement.foritem_escaping, foritem_var) + if if_statement.forindex is not None: + value = forindex + if_result = if_statement.get_if_result(value) + + if if_result: + postcontent = postcontent[:match_endif_postcontent.start()] + postcontent[match_endif_postcontent.end():] + postcontent = postcontent[:match_if_postcontent.start()] + postcontent[match_if_postcontent.end():] + if not if_result: + postcontent = postcontent[:match_if_postcontent.start()] + postcontent[match_endif_postcontent.end():] + + return precontent + postcontent + + + for_statement = ForStatement(local_x_found.getString()) + foritem_vars = local_variables[for_statement.variable_name]['value'] + + # remove the for statement from the odt + text = local_x_found.getText() + cursor = text.createTextCursorByRange(local_x_found) + cursor.String = '' + + # select content between for and endfor (including endfor) + while True: + if not cursor.goRight(1, True): + raise errors.TemplateError( + 'no_endfor_found', + f"The statement {for_statement.for_string} has no endif", + dict_of(for_statement.for_string) + ) + selected_string = cursor.String + match = re.search(ForStatement.end_regex, selected_string, re.IGNORECASE) + if match is not None: + break + + # get the content between the for and the endfor + cursor.goLeft(len(match.group(0)), True) + template = cursor.String + # remove the content from the file + cursor.String = '' + # remove the endfor at the end + cursor.goRight(len(match.group(0)), True) + cursor.String = '' + + # loop on values of the variable + counter = 0 + for foritem_var in foritem_vars: + content = template + # parse if inside for before managing foritem replacements + content = manage_if_inside_for(content, local_variables, foritem_var, str(counter)) + + # search [forindex] and remplace by my counter + content = re.sub(ForStatement.forindex_regex, str(counter), content, flags=re.IGNORECASE) + + # replace inside the selected content selected + for match in re.finditer(ForStatement.foritem_regex, content, re.IGNORECASE): + getForitemValue(match.group(1), match.group(2), foritem_var) + # replace the variable by its value + content = content.replace( + match.group(0), + getForitemValue(match.group(1), match.group(2), foritem_var) + ) + + # paste the content + text.insertString(cursor, content, False) + + # counter increment + counter += 1 + + # main of for_replace + search = doc.createSearchDescriptor() + search.SearchString = ForStatement.start_regex + search.SearchRegularExpression = True + search.SearchCaseSensitive = False + x_found = doc.findFirst(search) + while x_found is not None: + compute_for(doc, x_found) + x_found = doc.findNext(x_found.End, search) + diff --git a/lotemplate/Statement/HtmlStatement.py b/lotemplate/Statement/HtmlStatement.py new file mode 100644 index 0000000..61f0979 --- /dev/null +++ b/lotemplate/Statement/HtmlStatement.py @@ -0,0 +1,117 @@ +import re +from sorcery import dict_of +import lotemplate.errors as errors +from com.sun.star.lang import XComponent + +class HtmlStatement: + """ + Class representing an html statement in a template libreoffice + """ + start_regex = r'\[\s*html\s*\]' + end_regex = r'\[\s*endhtml\s*\]' + def __init__(self, html_string): + self.html_string = html_string + + def scan_html(doc: XComponent) -> None: + """ + scan html statement. + + We verify that + - there is and endhtml for each html statement + """ + + def scan_single_html(local_x_found) -> None: + """ + scan for a single for statement + """ + html_statement = HtmlStatement(local_x_found.getString()) + position_in_text = len(html_statement.html_string) + text = local_x_found.getText() + cursor = text.createTextCursorByRange(local_x_found) + while True: + if not cursor.goRight(1, True): + raise errors.TemplateError( + 'no_endhtml_found', + f"The statement {html_statement.html_string} has no endhtml", + dict_of(html_statement.html_string) + ) + position_in_text = position_in_text + 1 + selected_string = cursor.String + match = re.search(HtmlStatement.end_regex, selected_string, re.IGNORECASE) + if match is not None: + break + + search = doc.createSearchDescriptor() + search.SearchString = HtmlStatement.start_regex + search.SearchRegularExpression = True + search.SearchCaseSensitive = False + x_found = doc.findFirst(search) + + while x_found is not None: + scan_single_html(x_found) + x_found = doc.findNext(x_found.End, search) + + + def html_replace(template, doc: XComponent) -> None: + """ + Replace the content inside [html] and [endhtml] with a pasted html code inside the doc + """ + + def compute_html(doc, local_x_found): + html_statement = HtmlStatement(local_x_found.getString()) + text = local_x_found.getText() + cursor = text.createTextCursorByRange(local_x_found) + while True: + if not cursor.goRight(1, True): + raise errors.TemplateError( + 'no_endhtml_found', + f"The statement [html] has no endhtml", + dict_of(html_statement.html_string) + ) + + selected_string = cursor.String + match = re.search(HtmlStatement.end_regex, selected_string, re.IGNORECASE) + if match is not None: + break + cursor.String = '' + html_string = re.sub(HtmlStatement.end_regex, '', selected_string, flags=re.IGNORECASE) + html_string = re.sub(HtmlStatement.start_regex, '', html_string, flags=re.IGNORECASE) + template.pasteHtml(html_string, cursor) + + # main of for_replace + search = doc.createSearchDescriptor() + search.SearchString = HtmlStatement.start_regex + search.SearchRegularExpression = True + search.SearchCaseSensitive = False + x_found = doc.findFirst(search) + while x_found is not None: + compute_html(doc, x_found) + x_found = doc.findNext(x_found.End, search) + + def html_fill(template, doc: XComponent, variable: str, value: str) -> None: + """ + Fills all the html-related content (contents of type "html" in the json file) + + :param doc: the document to fill + :param variable: the variable to search + :param value: the value to replace with + :return: None + """ + + search = doc.createSearchDescriptor() + search.SearchString = variable + founded = doc.findAll(search) + for x_found in founded: + text = x_found.getText() + cursor = text.createTextCursorByRange(x_found) + cursor.String = "" + template.pasteHtml(value, cursor) + + for page in doc.getDrawPages(): + for shape in page: + if shape.getShapeType() == "com.sun.star.drawing.TextShape": + shape.String = shape.String.replace(variable, value) + # we wanted to use the pasteHtml function, but it doesn't work in a shape + # cursor = shape.createTextCursor() + # oldString = cursor.String + # self.pasteHtml(oldString.replace(variable, value), cursor) diff --git a/lotemplate/Statement/IfStatement.py b/lotemplate/Statement/IfStatement.py new file mode 100644 index 0000000..983f9a5 --- /dev/null +++ b/lotemplate/Statement/IfStatement.py @@ -0,0 +1,236 @@ +import re + +from sorcery import dict_of +import lotemplate.errors as errors +from typing import Union +from com.sun.star.lang import XComponent + +class IfStatement: + """ + Class representing an if statement in a template libreoffice + """ + start_regex = r""" + \[\s*if\s* # [if detection + (?: + (?: # parsing of var + \$ # var start with $ + (\w+ # basic var name + (\( # parsing of fonction var + ((?: # ?: is for non capturing group : the regex inside the parenthesis must be matched but does not create the capturing group + \\.|. # everything that is escaped or every simple char + )*?) # the ? before the ) in order to be not greedy (stop on the first unescaped ")" + \)) + ?) # the ? before the ) in order to be not greedy (won't go until the last ")") + ) + | + (?: # parsing of foritem + \[\s*foritem\s* # [foritem detection + ( + \w+ # simple var of type abc + (?:\.\w+)* # composite var name like abc.def + ) + (?:\s+(escape_html|raw))? # option pour escaper le contenu de la variable + \s*\] + ) + | + (\[\s*forindex\s*\]) # parsing of forindex + ) + \s* + ( # catch whether + (?: # for syntax == var or != var + ( # equality + \=\=| + \!\=| + \=\=\=| + \!\=\=| + CONTAINS| + NOT_CONTAINS + )\s* + ( # value is anything, should escape [ and ] + (?: + \\.|. + )* + ?) # not too greedy + ) + | + (IS_EMPTY|IS_NOT_EMPTY) # for syntax [if $toto IS_EMPTY] or [if $toto IS_NOT_EMPTY] + ) + \s*\] + """ + # remove comments, spaces and newlines + start_regex = re.sub(r'#.*', '', start_regex).replace("\n", "").replace("\t", "").replace(" ", "") + # print(start_regex) + # \[\s*if\s*\$(\w+(\(((?:\\.|.)*?)\))?)\s*((?:(\=\=|\!\=)\s*((?:\\.|.)*?))|(IS_EMPTY|IS_NOT_EMPTY))\s*\] + + end_regex = r'\[\s*endif\s*\]' + + def __init__(self, if_string): + self.if_string = if_string + match = re.search(self.start_regex, if_string, re.IGNORECASE) + + # for standard if outside for statements + self.variable_name = match.group(1) + # foritem parsing is used by if statements inside for statements if we want to check the value of a foritem value. + self.foritem_name = match.group(4) + self.foritem_escaping = match.group(5) + # forindex parsing + self.forindex = match.group(6) + + if match.group(8) is not None: + # syntaxes like [if $foo == bar] or [if $foo != bar] + self.operator = match.group(8) + self.value = match.group(9) + else: + # syntaxes like [if $foo IS_EMPTY] or [if $foo IS_NOT_EMPTY] + self.operator = match.group(10) + + def get_if_result(self, value): + if self.operator == '==': + return value.lower() == self.value.lower() + if self.operator == '!=': + return value.lower() != self.value.lower() + if self.operator == '===': + return value == self.value + if self.operator == '!==': + return value != self.value + if self.operator == 'CONTAINS': + return self.value.lower() in value.lower() + if self.operator == 'NOT_CONTAINS': + return self.value.lower() not in value.lower() + if self.operator == 'IS_EMPTY': + return re.search(r'^[\s\t\n]*$', value) is not None + if self.operator == 'IS_NOT_EMPTY': + return re.search(r'^[\s\t\n]*$', value) is None + return False + + def scan_if(template) -> None: + """ + scan for if statement. No return. We just verify that there is + and endif for each if statement + """ + def compute_if(x_found, x_found_endif): + """ + Compute the if statement. + """ + if_text = x_found.getText() + endif_text = x_found_endif.getText() + if_cursor = if_text.createTextCursorByRange(x_found) + endif_cursor = endif_text.createTextCursorByRange(x_found_endif) + content_cursor = if_text.createTextCursorByRange(x_found.End) + content_cursor.gotoRange(x_found_endif.Start, True) + + if_cursor.String = '' + endif_cursor.String = '' + content_cursor.String = '' + + def find_if_to_compute(doc, search, x_found): + """ + Find the if statement to compute. + """ + if x_found is None: + return None + while True: + x_found_after = doc.findNext(x_found.End, search) + if x_found_after is not None: + find_if_to_compute(doc, search, x_found_after) + else: + break + + endif_search = doc.createSearchDescriptor() + endif_search.SearchString = IfStatement.end_regex + endif_search.SearchRegularExpression = True + endif_search.SearchCaseSensitive = False + + x_found_endif = doc.findNext(x_found.End, endif_search) + if x_found_endif is None: + cursor = x_found.getText().createTextCursorByRange(x_found) + raise errors.TemplateError( + 'no_endif_found', + f"The statement {cursor.String} has no endif", + dict_of(cursor.String) + ) + compute_if(x_found, x_found_endif) + + + # main of if_replace + doc = template.open_doc_from_url() + search = doc.createSearchDescriptor() + search.SearchString = IfStatement.start_regex + search.SearchRegularExpression = True + search.SearchCaseSensitive = False + x_found = doc.findFirst(search) + find_if_to_compute(doc, search, x_found) + doc.dispose() + + + def if_replace(doc: XComponent, local_variables: dict[str, dict[str, Union[str, list[str]]]]) -> None: + """ + Parse statements like [if $myvar==TOTO]...[endif] + + If the condition matches we remove the if and endif statement. + It the condition doesn't match, we remove the statements and the text between the statements. + + :param doc: the document to fill + :param local_variables: the variables + :return: None + """ + + def compute_if(x_found, x_found_endif): + """ + Compute the if statement. + """ + if_text = x_found.getText() + endif_text = x_found_endif.getText() + if_cursor = if_text.createTextCursorByRange(x_found) + endif_cursor = endif_text.createTextCursorByRange(x_found_endif) + content_cursor = if_text.createTextCursorByRange(x_found.End) + content_cursor.gotoRange(x_found_endif.Start, True) + if_statement = IfStatement(if_cursor.String) + if_result = if_statement.get_if_result(local_variables[if_statement.variable_name]['value']) + + if not if_result: + # if the if statement is not verified, we remove the paragraph with the if + if_cursor.String = '' + endif_cursor.String = '' + content_cursor.String = '' + elif if_result: + # if the if statement is verified, we remove the if and endif statements + if_cursor.String = '' + endif_cursor.String = '' + + def find_if_to_compute(doc, search, x_found): + """ + Find the if statement to compute. + """ + if x_found is None: + return None + while True: + x_found_after = doc.findNext(x_found.End, search) + if x_found_after is not None: + find_if_to_compute(doc, search, x_found_after) + else: + break + + endif_search = doc.createSearchDescriptor() + endif_search.SearchString = IfStatement.end_regex + endif_search.SearchRegularExpression = True + endif_search.SearchCaseSensitive = False + + x_found_endif = doc.findNext(x_found.End, endif_search) + if x_found_endif is None: + cursor = x_found.getText().createTextCursorByRange(x_found) + raise errors.TemplateError( + 'no_endif_found', + f"The statement {cursor.String} has no endif", + dict_of(cursor.String) + ) + compute_if(x_found, x_found_endif) + + + # main of if_replace + search = doc.createSearchDescriptor() + search.SearchString = IfStatement.start_regex + search.SearchRegularExpression = True + search.SearchCaseSensitive = False + x_found = doc.findFirst(search) + find_if_to_compute(doc, search, x_found) diff --git a/lotemplate/Statement/ImageStatement.py b/lotemplate/Statement/ImageStatement.py new file mode 100644 index 0000000..398de43 --- /dev/null +++ b/lotemplate/Statement/ImageStatement.py @@ -0,0 +1,57 @@ +from com.sun.star.beans import PropertyValue, UnknownPropertyException +from com.sun.star.lang import XComponent +import regex +from urllib import request +from PIL import Image +from lotemplate.utils import get_file_url, is_network_based +from com.sun.star.awt import Size + + +class ImageStatement: + image_regex = regex.compile(r'\$\w+') + + def scan_image(doc: XComponent) -> dict[str, dict[str, str]]: + """ + scan for images in the given doc + + :param doc: the document to scan + :return: the scanned variables + """ + + return { + elem.LinkDisplayName[1:]: {'type': 'image', 'value': ''} + for elem in doc.getGraphicObjects() + if ImageStatement.image_regex.fullmatch(elem.LinkDisplayName) + } + + def image_fill(doc: XComponent, graphic_provider, variable: str, path: str, should_resize=True) -> None: + """ + Fills all the image-related content + + :param should_resize: specify if the image should be resized to keep his original size ratio + :param graphic_provider: the graphic provider, from the established connection + :param doc: the document to fill + :param variable: the variable to search + :param path: the path of the image to replace with + :return: None + """ + + if not path: + return + + for graphic_object in doc.getGraphicObjects(): + if graphic_object.LinkDisplayName != variable: + continue + + new_image = graphic_provider.queryGraphic((PropertyValue('URL', 0, get_file_url(path), 0),)) + + if should_resize: + with Image.open(request.urlopen(path) if is_network_based(path) else path) as image: + ratio = image.width / image.height + new_size = Size() + new_size.Height = graphic_object.Size.Height + new_size.Width = graphic_object.Size.Height * ratio + graphic_object.setSize(new_size) + + graphic_object.Graphic = new_image + diff --git a/lotemplate/Statement/TableStatement.py b/lotemplate/Statement/TableStatement.py new file mode 100644 index 0000000..7c814ff --- /dev/null +++ b/lotemplate/Statement/TableStatement.py @@ -0,0 +1,109 @@ +from com.sun.star.lang import XComponent +from typing import Union +import lotemplate.errors as errors +import regex + + +class TableStatement: + table_regex = regex.compile( + r'\$\w+' + r'(?:\((?(?R)|"[^"]*"|[^$&"\s()][^\s()]*)(?:[+ ](?&arg))*\))?' + r'|(?&\w+)' + ) + + def scan_table(doc: XComponent, get_list=False) -> Union[dict, list]: + """ + scan for tables in the given doc + + :param get_list: indicates if the function should return a list + of variables or the formatted dictionary of variables + :param doc: the document to scan + :return: the scanned variables + """ + + def scan_cell(cell) -> None: + """ + scan for variables in the given cell + + :param cell: the cell to scan + :return: None + """ + for match in TableStatement.table_regex.finditer(cell): + if not match.captures('var'): + continue + if row_i != nb_rows - 1: + raise errors.TemplateError( + 'variable_not_in_last_row', + f"The variable {match[0]!r} (table {t_name!r}) " + f"isn't in the last row (got: row {row_i + 1!r}, " + f"expected: row {nb_rows!r})", + dict(table=t_name, actual_row=row_i + 1, + expected_row=nb_rows, variable=match[0]) + ) + tab_vars[match[0][1:]] = {'type': 'table', 'value': ['']} + list_tab_vars.append(match[0]) + + tab_vars = {} + list_tab_vars = [] + for i in range(doc.getTextTables().getCount()): + table_data = doc.getTextTables().getByIndex(i).getDataArray() + t_name = doc.getTextTables().getByIndex(i).getName() + nb_rows = len(table_data) + for row_i, row in enumerate(table_data): + for column in row: + scan_cell(column) + + return list_tab_vars if get_list else tab_vars + + def tables_fill(doc: XComponent, variables: dict[str, dict[str, Union[str, list[str]]]], text_prefix: str, + table_prefix: str) -> None: + """ + Fills all the table-related content + + :param doc: the document to fill + :param text_prefix: the prefix for text variables + :param table_prefix: the prefix for table variables + :return: None + """ + search = doc.createSearchDescriptor() + matches = [] + for element, infos in sorted(variables.items(), key=lambda s: -len(s[0])): + if infos['type'] != 'table': + continue + search.SearchString = (text_prefix if '(' in element else table_prefix) + element + founded = doc.findAll(search) + matches += [founded.getByIndex(i) for i in range(founded.getCount()) if founded.getByIndex(i).TextTable] + tab_vars = [{ + "table": variable.TextTable, + "var": variable.String + } for variable in matches] + + tables = [ + {'table': tab, 'vars': + {tab_var['var']: variables[tab_var['var'][1:]]['value'] + for tab_var in tab_vars if tab_var['table'] == tab} + } for tab in list(set(variable['table'] for variable in tab_vars)) + ] + + for element in tables: + + table = element['table'] + table_vars = element['vars'] + var_row_pos = len(table.getRows()) - 1 + nb_rows_to_add = max([len(variable) for variable in table_vars.values()]) + table.getRows().insertByIndex(var_row_pos + 1, nb_rows_to_add - 1) + table_values = table.getDataArray() + var_row = table_values[var_row_pos] + static_rows = table_values[:var_row_pos] + + for i in range(nb_rows_to_add): + new_row = var_row + for variable_name, variable_value in sorted(table_vars.items(), key=lambda s: -len(s[0])): + new_row = tuple( + elem.replace( + variable_name, variable_value[i] + if i < len(variable_value) else "" + ) for elem in new_row + ) + static_rows += (new_row,) + table.setDataArray(static_rows) diff --git a/lotemplate/Statement/TextStatement.py b/lotemplate/Statement/TextStatement.py new file mode 100644 index 0000000..10093b1 --- /dev/null +++ b/lotemplate/Statement/TextStatement.py @@ -0,0 +1,74 @@ +import re +from com.sun.star.lang import XComponent +from com.sun.star.beans import PropertyValue, UnknownPropertyException +import regex +from lotemplate.Statement.ForStatement import ForStatement +from lotemplate.Statement.TableStatement import TableStatement + + +class TextStatement: + text_regex = regex.compile(r'\$(\w+(\(((?:\\.|.)*?)\))?)') + def __init__(self, text_string): + self.text_string = text_string + + def scan_text(doc: XComponent) -> dict[str, dict[str, str]]: + """ + scan for text in the given doc + + :param doc: the document to scan + :return: the scanned variables + """ + + raw_string = doc.getText().getString() + matches = TextStatement.text_regex.finditer(raw_string) + plain_vars = {} + for var in matches: + key_name = var[0][1:] + # add to plain_vars if it doesn't matche ForStatement.foritem_regex + if not re.search(ForStatement.forindex_regex, key_name, re.IGNORECASE): + plain_vars[key_name] = {'type': 'text', 'value': ''} + + text_fields_vars = {} + for page in doc.getDrawPages(): + for shape in page: + try: + matches = TextStatement.text_regex.finditer(shape.String) + except (AttributeError, UnknownPropertyException): + continue + text_fields_vars = (text_fields_vars | + {var.group(0)[1:]: {'type': 'text', 'value': ''} for var in matches}) + + for var in TableStatement.scan_table(doc, get_list=True): + if '$' + var in plain_vars: + del plain_vars[var] + + for var in ForStatement.scan_for(doc): + if var in plain_vars: + del plain_vars[var] + + return plain_vars | text_fields_vars + + + def text_fill(doc: XComponent, variable: str, value: str) -> None: + """ + Fills all the text-related content + + :param doc: the document to fill + :param variable: the variable to search + :param value: the value to replace with + :return: None + """ + + search = doc.createSearchDescriptor() + search.SearchString = variable + founded = doc.findAll(search) + + for x_found in founded: + text = x_found.getText() + cursor = text.createTextCursorByRange(x_found) + cursor.String = value + + for page in doc.getDrawPages(): + for shape in page: + if shape.getShapeType() == "com.sun.star.drawing.TextShape": + shape.String = shape.String.replace(variable, value) diff --git a/lotemplate/Statement/__init__.py b/lotemplate/Statement/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lotemplate/classes.py b/lotemplate/classes.py index 85c416a..a49d590 100644 --- a/lotemplate/classes.py +++ b/lotemplate/classes.py @@ -12,25 +12,27 @@ import os from typing import Union -from urllib import request -from PIL import Image from sorcery import dict_of import uno -import re import unohelper -from com.sun.star.beans import PropertyValue, UnknownPropertyException +from com.sun.star.beans import PropertyValue from com.sun.star.io import IOException from com.sun.star.lang import IllegalArgumentException, DisposedException from com.sun.star.connection import NoConnectException from com.sun.star.uno import RuntimeException -from com.sun.star.awt import Size from com.sun.star.text.ControlCharacter import PARAGRAPH_BREAK from com.sun.star.style.BreakType import PAGE_AFTER from . import errors from .utils import * +from lotemplate.Statement.ForStatement import ForStatement +from lotemplate.Statement.HtmlStatement import HtmlStatement +from lotemplate.Statement.IfStatement import IfStatement +from lotemplate.Statement.TextStatement import TextStatement +from lotemplate.Statement.TableStatement import TableStatement +from lotemplate.Statement.ImageStatement import ImageStatement class Connexion: @@ -79,159 +81,6 @@ def restart(self) -> None: self.__init__(self.host, self.port) - -class IfStatement: - """ - Class representing an if statement in a template libreoffice - """ - start_regex = r""" - \[\s*if\s* # [if detection - (?: - (?: # parsing of var - \$ # var start with $ - (\w+ # basic var name - (\( # parsing of fonction var - ((?: # ?: is for non capturing group : the regex inside the parenthesis must be matched but does not create the capturing group - \\.|. # everything that is escaped or every simple char - )*?) # the ? before the ) in order to be not greedy (stop on the first unescaped ")" - \)) - ?) # the ? before the ) in order to be not greedy (won't go until the last ")") - ) - | - (?: # parsing of foritem - \[\s*foritem\s* # [foritem detection - ( - \w+ # simple var of type abc - (?:\.\w+)* # composite var name like abc.def - ) - (?:\s+(escape_html|raw))? # option pour escaper le contenu de la variable - \s*\] - ) - | - (\[\s*forindex\s*\]) # parsing of forindex - ) - \s* - ( # catch whether - (?: # for syntax == var or != var - ( # equality - \=\=| - \!\=| - \=\=\=| - \!\=\=| - CONTAINS| - NOT_CONTAINS - )\s* - ( # value is anything, should escape [ and ] - (?: - \\.|. - )* - ?) # not too greedy - ) - | - (IS_EMPTY|IS_NOT_EMPTY) # for syntax [if $toto IS_EMPTY] or [if $toto IS_NOT_EMPTY] - ) - \s*\] - """ - # remove comments, spaces and newlines - start_regex = re.sub(r'#.*', '', start_regex).replace("\n", "").replace("\t", "").replace(" ", "") - # print(start_regex) - # \[\s*if\s*\$(\w+(\(((?:\\.|.)*?)\))?)\s*((?:(\=\=|\!\=)\s*((?:\\.|.)*?))|(IS_EMPTY|IS_NOT_EMPTY))\s*\] - - end_regex = r'\[\s*endif\s*\]' - - def __init__(self, if_string): - self.if_string = if_string - match = re.search(self.start_regex, if_string, re.IGNORECASE) - - # for standard if outside for statements - self.variable_name = match.group(1) - # foritem parsing is used by if statements inside for statements if we want to check the value of a foritem value. - self.foritem_name = match.group(4) - self.foritem_escaping = match.group(5) - # forindex parsing - self.forindex = match.group(6) - - if match.group(8) is not None: - # syntaxes like [if $foo == bar] or [if $foo != bar] - self.operator = match.group(8) - self.value = match.group(9) - else: - # syntaxes like [if $foo IS_EMPTY] or [if $foo IS_NOT_EMPTY] - self.operator = match.group(10) - - def get_if_result(self, value): - if self.operator == '==': - return value.lower() == self.value.lower() - if self.operator == '!=': - return value.lower() != self.value.lower() - if self.operator == '===': - return value == self.value - if self.operator == '!==': - return value != self.value - if self.operator == 'CONTAINS': - return self.value.lower() in value.lower() - if self.operator == 'NOT_CONTAINS': - return self.value.lower() not in value.lower() - if self.operator == 'IS_EMPTY': - return re.search(r'^[\s\t\n]*$', value) is not None - if self.operator == 'IS_NOT_EMPTY': - return re.search(r'^[\s\t\n]*$', value) is None - return False - - -class ForStatement: - """ - Class representing an for statement in a template libreoffice - """ - start_regex = r""" - \[\s*for\s* # [if detection - \$ # var start with $ - (\w+ # basic var name - (\( # parsing of fonction var - ((?: # ?: is for non capturing group : the regex inside the parenthesis must be matched but does not create the capturing group - \\.|. # everything that is escaped or every simple char - )*?) # the ? before the ) in order to be not greedy (stop on the first unescaped ")" - \)) - ?) # the ? before the ) in order to be not greedy (won't go until the last ")") - \s*\] - """ - # remove comments, spaces and newlines - start_regex = re.sub(r'#.*', '', start_regex).replace("\n", "").replace("\t", "").replace(" ", "") - # print(start_regex) - # \[\s*for\s*\$(\w+(\(((?:\\.|.)*?)\))?)\s*\] - foritem_regex = r""" - \[\s*foritem\s* # [foritem detection - ( - \w+ # simple var of type abc - (?:\.\w+)* # composite var name like abc.def - ) - (?:\s+(escape_html|raw))? # option pour escaper le contenu de la variable - \s*\] - """ - foritem_regex = re.sub(r'#.*', '', foritem_regex).replace("\n", "").replace("\t", "").replace(" ", "") - # print(foritem_regex) - # \[\s*foritem\s*((\w+)(?:\.\w+)*)\s*\] - - # [forindex] is replaced by the counter of the loop. A string starting at 0 - forindex_regex = r'\[\s*forindex\s*\]' - - end_regex = r'\[\s*endfor\s*\]' - - def __init__(self, for_string): - self.for_string = for_string - match = re.search(self.start_regex, for_string, re.IGNORECASE) - self.variable_name = match.group(1) - -class HtmlStatement: - """ - Class representing an html statement in a template libreoffice - """ - start_regex = r'\[\s*html\s*\]' - end_regex = r'\[\s*endhtml\s*\]' - def __init__(self, html_string): - self.html_string = html_string - - class Template: def __enter__(self): @@ -343,251 +192,13 @@ def scan(self, **kwargs) -> dict[str: dict[str, Union[str, list[str]]]]: should_close = kwargs.get("should_close", False) - def scan_text(doc) -> dict[str, dict[str, str]]: - """ - scan for text in the given doc - - :param doc: the document to scan - :return: the scanned variables - """ - - raw_string = doc.getText().getString() - - matches = var_regexes['text'].finditer(raw_string) - plain_vars = {} - for var in matches: - key_name = var[0][1:] - # add to plain_vars if it doesn't matche ForStatement.foritem_regex - if not re.search(ForStatement.forindex_regex, key_name, re.IGNORECASE): - plain_vars[key_name] = {'type': 'text', 'value': ''} - - text_fields_vars = {} - for page in doc.getDrawPages(): - for shape in page: - try: - matches = var_regexes['text'].finditer(shape.String) - except (AttributeError, UnknownPropertyException): - continue - text_fields_vars = (text_fields_vars | - {var.group(0)[1:]: {'type': 'text', 'value': ''} for var in matches}) - - for var in scan_table(doc, get_list=True): - if '$' + var in plain_vars: - del plain_vars[var] - - for var in scan_for(doc): - if var in plain_vars: - del plain_vars[var] - - return plain_vars | text_fields_vars - - def scan_if() -> None: - """ - scan for if statement. No return. We just verify that there is - and endif for each if statement - """ - def compute_if(x_found, x_found_endif): - """ - Compute the if statement. - """ - if_text = x_found.getText() - endif_text = x_found_endif.getText() - if_cursor = if_text.createTextCursorByRange(x_found) - endif_cursor = endif_text.createTextCursorByRange(x_found_endif) - content_cursor = if_text.createTextCursorByRange(x_found.End) - content_cursor.gotoRange(x_found_endif.Start, True) - - if_cursor.String = '' - endif_cursor.String = '' - content_cursor.String = '' - - def find_if_to_compute(doc, search, x_found): - """ - Find the if statement to compute. - """ - if x_found is None: - return None - while True: - x_found_after = doc.findNext(x_found.End, search) - if x_found_after is not None: - find_if_to_compute(doc, search, x_found_after) - else: - break - - endif_search = doc.createSearchDescriptor() - endif_search.SearchString = IfStatement.end_regex - endif_search.SearchRegularExpression = True - endif_search.SearchCaseSensitive = False - - x_found_endif = doc.findNext(x_found.End, endif_search) - if x_found_endif is None: - cursor = x_found.getText().createTextCursorByRange(x_found) - raise errors.TemplateError( - 'no_endif_found', - f"The statement {cursor.String} has no endif", - dict_of(cursor.String) - ) - compute_if(x_found, x_found_endif) - - - # main of if_replace - doc = self.open_doc_from_url() - search = doc.createSearchDescriptor() - search.SearchString = IfStatement.start_regex - search.SearchRegularExpression = True - search.SearchCaseSensitive = False - x_found = doc.findFirst(search) - find_if_to_compute(doc, search, x_found) - doc.dispose() - - def scan_for(doc) -> dict: - """ - scan for statement. return list of vars. - - We verify that - - there is and endfor for each for statement - - vars sent are lists - """ - - def scan_single_for(local_x_found) -> str: - """ - scan for a single for statement - """ - for_statement = ForStatement(local_x_found.getString()) - position_in_text = len(for_statement.for_string) - text = local_x_found.getText() - cursor = text.createTextCursorByRange(local_x_found) - while True: - if not cursor.goRight(1, True): - raise errors.TemplateError( - 'no_endfor_found', - f"The statement {for_statement.for_string} has no endfor", - dict_of(for_statement.for_string) - ) - position_in_text = position_in_text + 1 - selected_string = cursor.String - match = re.search(ForStatement.end_regex, selected_string, re.IGNORECASE) - if match is not None: - break - return for_statement.variable_name - - search = doc.createSearchDescriptor() - search.SearchString = ForStatement.start_regex - search.SearchRegularExpression = True - search.SearchCaseSensitive = False - x_found = doc.findFirst(search) - - for_vars = {} - while x_found is not None: - variable_name = scan_single_for(x_found) - for_vars[variable_name] = {'type': 'array', 'value': []} - x_found = doc.findNext(x_found.End, search) - return for_vars - - def scan_html(doc) -> None: - """ - scan html statement. - - We verify that - - there is and endhtml for each html statement - """ - - def scan_single_html(local_x_found) -> None: - """ - scan for a single for statement - """ - html_statement = HtmlStatement(local_x_found.getString()) - position_in_text = len(html_statement.html_string) - text = local_x_found.getText() - cursor = text.createTextCursorByRange(local_x_found) - while True: - if not cursor.goRight(1, True): - raise errors.TemplateError( - 'no_endhtml_found', - f"The statement {html_statement.html_string} has no endhtml", - dict_of(html_statement.html_string) - ) - position_in_text = position_in_text + 1 - selected_string = cursor.String - match = re.search(HtmlStatement.end_regex, selected_string, re.IGNORECASE) - if match is not None: - break - - search = doc.createSearchDescriptor() - search.SearchString = HtmlStatement.start_regex - search.SearchRegularExpression = True - search.SearchCaseSensitive = False - x_found = doc.findFirst(search) - - while x_found is not None: - scan_single_html(x_found) - x_found = doc.findNext(x_found.End, search) - - def scan_table(doc, get_list=False) -> Union[dict, list]: - """ - scan for tables in the given doc - - :param get_list: indicates if the function should return a list - of variables or the formatted dictionary of variables - :param doc: the document to scan - :return: the scanned variables - """ - - def scan_cell(cell) -> None: - """ - scan for variables in the given cell - - :param cell: the cell to scan - :return: None - """ - for match in var_regexes['table'].finditer(cell): - if not match.captures('var'): - continue - if row_i != nb_rows - 1: - raise errors.TemplateError( - 'variable_not_in_last_row', - f"The variable {match[0]!r} (table {t_name!r}) " - f"isn't in the last row (got: row {row_i + 1!r}, " - f"expected: row {nb_rows!r})", - dict(table=t_name, actual_row=row_i + 1, - expected_row=nb_rows, variable=match[0]) - ) - tab_vars[match[0][1:]] = {'type': 'table', 'value': ['']} - list_tab_vars.append(match[0]) - - tab_vars = {} - list_tab_vars = [] - for i in range(doc.getTextTables().getCount()): - table_data = doc.getTextTables().getByIndex(i).getDataArray() - t_name = doc.getTextTables().getByIndex(i).getName() - nb_rows = len(table_data) - for row_i, row in enumerate(table_data): - for column in row: - scan_cell(column) - - return list_tab_vars if get_list else tab_vars - - def scan_image(doc) -> dict[str, dict[str, str]]: - """ - scan for images in the given doc - - :param doc: the document to scan - :return: the scanned variables - """ - - return { - elem.LinkDisplayName[1:]: {'type': 'image', 'value': ''} - for elem in doc.getGraphicObjects() - if var_regexes['image'].fullmatch(elem.LinkDisplayName) - } - - texts = scan_text(self.doc) + texts = TextStatement.scan_text(self.doc) # we use another document for if statement scanning because it modifies the file - scan_if() - tables = scan_table(self.doc) - images = scan_image(self.doc) - fors = scan_for(self.doc) - scan_html(self.doc) + IfStatement.scan_if(template = self) + tables = TableStatement.scan_table(self.doc) + images = ImageStatement.scan_image(self.doc) + fors = ForStatement.scan_for(self.doc) + HtmlStatement.scan_html(self.doc) variables_list = list(texts.keys()) + list(tables.keys()) + list(images.keys()) + list(fors.keys()) duplicates = [variable for variable in variables_list if variables_list.count(variable) > 1] @@ -652,437 +263,6 @@ def fill(self, variables: dict[str, dict[str, Union[str, list[str]]]]) -> None: :return: None """ - def html_replace(doc) -> None: - """ - Replace the content inside [html] and [endhtml] with a pasted html code inside the doc - """ - - def compute_html(doc, local_x_found): - html_statement = HtmlStatement(local_x_found.getString()) - text = local_x_found.getText() - cursor = text.createTextCursorByRange(local_x_found) - while True: - if not cursor.goRight(1, True): - raise errors.TemplateError( - 'no_endhtml_found', - f"The statement [html] has no endhtml", - dict_of(html_statement.html_string) - ) - - selected_string = cursor.String - match = re.search(HtmlStatement.end_regex, selected_string, re.IGNORECASE) - if match is not None: - break - cursor.String = '' - html_string = re.sub(HtmlStatement.end_regex, '', selected_string, flags=re.IGNORECASE) - html_string = re.sub(HtmlStatement.start_regex, '', html_string, flags=re.IGNORECASE) - self.pasteHtml(html_string, cursor) - - # main of for_replace - search = doc.createSearchDescriptor() - search.SearchString = HtmlStatement.start_regex - search.SearchRegularExpression = True - search.SearchCaseSensitive = False - x_found = doc.findFirst(search) - while x_found is not None: - compute_html(doc, x_found) - x_found = doc.findNext(x_found.End, search) - - def for_replace(doc, local_variables: dict[str, dict[str, Union[str, list[str]]]]) -> None: - """ - Parse statements like [for $myvar]...[endfor] - - We replace the for and endfor statements with the text between them, for each value in the variable. - - :param doc: the document to fill - :param local_variables: the variables - :return: None - """ - - def compute_for(doc, local_x_found): - """ - for one single for statement, cut and paste the content of the for - :param local_x_found: - :return: - """ - - def escape_html(s): - """ - Replace special characters "&", "<" and ">" to HTML-safe sequences. - If the optional flag quote is true, the quotation mark character (") - is also translated. - """ - s = s.replace("&", "&") # Must be done first! - s = s.replace("<", "<") - s = s.replace(">", ">") - s = s.replace('"', """) - return s - - def getForitemValue(match_var_name, match_escaping, foritem_var): - """ - we are in a for loop on values of an array. - - a regex just detected [foritem match_var_name match_escaping] - - mathch escaping, can exist or not. If it exists, it can be raw or escape_html - - :param match_var_name: - :param match_escaping: - :return: - """ - # get separate the var_name by "." to get the value in the dict - var_name_hierarchy = match_var_name.split('.') - # get the variable value from the hierarchy - value = foritem_var - for var_name in var_name_hierarchy: - value = value[var_name] - - # get the escaping - escaping = 'raw' - if match_escaping is not None: - escaping = match_escaping - # escape the value - if escaping == 'escape_html': - value = escape_html(value) - return str(value) - - def manage_if_inside_for(content, local_variables, foritem_var, forindex): - """ - manage the if statements inside a for loop. - - It uses a recursive approach : we search an if statement inside the content string. Then we create a - subcontent with the content after the if statement and call the function again on this substring. - - With this recursive call, the process begins with the last if statement and update the content for - the previous call. - - :param content: - :param local_variables: - :param foritem_var: - :return: - """ - # look for the first if statement - match_if = re.search(IfStatement.start_regex, content, re.IGNORECASE) - if match_if is None: - return content - - # get content before the if statement and call recursively the function - subcontent = content[match_if.end():] - subcontent = manage_if_inside_for(subcontent, local_variables, foritem_var, forindex) - - # update the content with the result of the recursive call - content = content[:match_if.end()] + subcontent - - # get the if statement values - if_statement = IfStatement(match_if.group(0)) - - # precontent is the content before the if statement - precontent = content[:match_if.start()] - postcontent = content[match_if.start():] - match_if_postcontent = re.search(IfStatement.start_regex, postcontent, re.IGNORECASE) - - # if no endif => throw error - match_endif_postcontent = re.search(IfStatement.end_regex, postcontent, re.IGNORECASE) - if match_endif_postcontent is None: - raise errors.TemplateError( - 'no_endif_found', - f"The statement {if_statement.if_string} has no endif", - dict_of(if_statement.if_string) - ) - - # get value associated to the if statement - value = None - if if_statement.variable_name is not None: - computed_variable_name = re.sub(ForStatement.forindex_regex, forindex, if_statement.variable_name) - value = local_variables[computed_variable_name]['value'] - if if_statement.foritem_name is not None: - value = getForitemValue(if_statement.foritem_name, if_statement.foritem_escaping, foritem_var) - if if_statement.forindex is not None: - value = forindex - if_result = if_statement.get_if_result(value) - - if if_result: - postcontent = postcontent[:match_endif_postcontent.start()] + postcontent[match_endif_postcontent.end():] - postcontent = postcontent[:match_if_postcontent.start()] + postcontent[match_if_postcontent.end():] - if not if_result: - postcontent = postcontent[:match_if_postcontent.start()] + postcontent[match_endif_postcontent.end():] - - return precontent + postcontent - - - for_statement = ForStatement(local_x_found.getString()) - foritem_vars = local_variables[for_statement.variable_name]['value'] - - # remove the for statement from the odt - text = local_x_found.getText() - cursor = text.createTextCursorByRange(local_x_found) - cursor.String = '' - - # select content between for and endfor (including endfor) - while True: - if not cursor.goRight(1, True): - raise errors.TemplateError( - 'no_endfor_found', - f"The statement {for_statement.for_string} has no endif", - dict_of(for_statement.for_string) - ) - selected_string = cursor.String - match = re.search(ForStatement.end_regex, selected_string, re.IGNORECASE) - if match is not None: - break - - # get the content between the for and the endfor - cursor.goLeft(len(match.group(0)), True) - template = cursor.String - # remove the content from the file - cursor.String = '' - # remove the endfor at the end - cursor.goRight(len(match.group(0)), True) - cursor.String = '' - - # loop on values of the variable - counter = 0 - for foritem_var in foritem_vars: - content = template - # parse if inside for before managing foritem replacements - content = manage_if_inside_for(content, local_variables, foritem_var, str(counter)) - - # search [forindex] and remplace by my counter - content = re.sub(ForStatement.forindex_regex, str(counter), content, flags=re.IGNORECASE) - - # replace inside the selected content selected - for match in re.finditer(ForStatement.foritem_regex, content, re.IGNORECASE): - getForitemValue(match.group(1), match.group(2), foritem_var) - # replace the variable by its value - content = content.replace( - match.group(0), - getForitemValue(match.group(1), match.group(2), foritem_var) - ) - - # paste the content - text.insertString(cursor, content, False) - - # counter increment - counter += 1 - - # main of for_replace - search = doc.createSearchDescriptor() - search.SearchString = ForStatement.start_regex - search.SearchRegularExpression = True - search.SearchCaseSensitive = False - x_found = doc.findFirst(search) - while x_found is not None: - compute_for(doc, x_found) - x_found = doc.findNext(x_found.End, search) - - def if_replace(doc, local_variables: dict[str, dict[str, Union[str, list[str]]]]) -> None: - """ - Parse statements like [if $myvar==TOTO]...[endif] - - If the condition matches we remove the if and endif statement. - It the condition doesn't match, we remove the statements and the text between the statements. - - :param doc: the document to fill - :param local_variables: the variables - :return: None - """ - - def compute_if(x_found, x_found_endif): - """ - Compute the if statement. - """ - if_text = x_found.getText() - endif_text = x_found_endif.getText() - if_cursor = if_text.createTextCursorByRange(x_found) - endif_cursor = endif_text.createTextCursorByRange(x_found_endif) - content_cursor = if_text.createTextCursorByRange(x_found.End) - content_cursor.gotoRange(x_found_endif.Start, True) - if_statement = IfStatement(if_cursor.String) - if_result = if_statement.get_if_result(local_variables[if_statement.variable_name]['value']) - - if not if_result: - # if the if statement is not verified, we remove the paragraph with the if - if_cursor.String = '' - endif_cursor.String = '' - content_cursor.String = '' - elif if_result: - # if the if statement is verified, we remove the if and endif statements - if_cursor.String = '' - endif_cursor.String = '' - - def find_if_to_compute(doc, search, x_found): - """ - Find the if statement to compute. - """ - if x_found is None: - return None; - while True: - x_found_after = doc.findNext(x_found.End, search) - if x_found_after is not None: - find_if_to_compute(doc, search, x_found_after) - else: - break - - endif_search = doc.createSearchDescriptor() - endif_search.SearchString = IfStatement.end_regex - endif_search.SearchRegularExpression = True - endif_search.SearchCaseSensitive = False - - x_found_endif = doc.findNext(x_found.End, endif_search) - if x_found_endif is None: - cursor = x_found.getText().createTextCursorByRange(x_found) - raise errors.TemplateError( - 'no_endif_found', - f"The statement {cursor.String} has no endif", - dict_of(cursor.String) - ) - compute_if(x_found, x_found_endif) - - - # main of if_replace - search = doc.createSearchDescriptor() - search.SearchString = IfStatement.start_regex - search.SearchRegularExpression = True - search.SearchCaseSensitive = False - x_found = doc.findFirst(search) - find_if_to_compute(doc, search, x_found) - - def text_fill(doc, variable: str, value: str) -> None: - """ - Fills all the text-related content - - :param doc: the document to fill - :param variable: the variable to search - :param value: the value to replace with - :return: None - """ - - search = doc.createSearchDescriptor() - search.SearchString = variable - founded = doc.findAll(search) - - for x_found in founded: - text = x_found.getText() - cursor = text.createTextCursorByRange(x_found) - cursor.String = value - - for page in doc.getDrawPages(): - for shape in page: - if shape.getShapeType() == "com.sun.star.drawing.TextShape": - shape.String = shape.String.replace(variable, value) - - def html_fill(doc, variable: str, value: str) -> None: - """ - Fills all the html-related content - - :param doc: the document to fill - :param variable: the variable to search - :param value: the value to replace with - :return: None - """ - - search = doc.createSearchDescriptor() - search.SearchString = variable - founded = doc.findAll(search) - for x_found in founded: - text = x_found.getText() - cursor = text.createTextCursorByRange(x_found) - cursor.String = "" - self.pasteHtml(value, cursor) - - for page in doc.getDrawPages(): - for shape in page: - if shape.getShapeType() == "com.sun.star.drawing.TextShape": - shape.String = shape.String.replace(variable, value) - # we wanted to use the pasteHtml function, but it doesn't work in a shape - # cursor = shape.createTextCursor() - # oldString = cursor.String - # self.pasteHtml(oldString.replace(variable, value), cursor) - - def image_fill(doc, graphic_provider, variable: str, path: str, should_resize=True) -> None: - """ - Fills all the image-related content - - :param should_resize: specify if the image should be resized to keep his original size ratio - :param graphic_provider: the graphic provider, from the established connection - :param doc: the document to fill - :param variable: the variable to search - :param path: the path of the image to replace with - :return: None - """ - - if not path: - return - - for graphic_object in doc.getGraphicObjects(): - if graphic_object.LinkDisplayName != variable: - continue - - new_image = graphic_provider.queryGraphic((PropertyValue('URL', 0, get_file_url(path), 0),)) - - if should_resize: - with Image.open(request.urlopen(path) if is_network_based(path) else path) as image: - ratio = image.width / image.height - new_size = Size() - new_size.Height = graphic_object.Size.Height - new_size.Width = graphic_object.Size.Height * ratio - graphic_object.setSize(new_size) - - graphic_object.Graphic = new_image - - def tables_fill(doc, text_prefix: str, table_prefix: str) -> None: - """ - Fills all the table-related content - - :param doc: the document to fill - :param text_prefix: the prefix for text variables - :param table_prefix: the prefix for table variables - :return: None - """ - - search = doc.createSearchDescriptor() - matches = [] - for element, infos in sorted(variables.items(), key=lambda s: -len(s[0])): - if infos['type'] != 'table': - continue - search.SearchString = (text_prefix if '(' in element else table_prefix) + element - founded = doc.findAll(search) - matches += [founded.getByIndex(i) for i in range(founded.getCount()) if founded.getByIndex(i).TextTable] - tab_vars = [{ - "table": variable.TextTable, - "var": variable.String - } for variable in matches] - - tables = [ - {'table': tab, 'vars': - {tab_var['var']: variables[tab_var['var'][1:]]['value'] - for tab_var in tab_vars if tab_var['table'] == tab} - } for tab in list(set(variable['table'] for variable in tab_vars)) - ] - - for element in tables: - - table = element['table'] - table_vars = element['vars'] - var_row_pos = len(table.getRows()) - 1 - nb_rows_to_add = max([len(variable) for variable in table_vars.values()]) - table.getRows().insertByIndex(var_row_pos + 1, nb_rows_to_add - 1) - table_values = table.getDataArray() - var_row = table_values[var_row_pos] - static_rows = table_values[:var_row_pos] - - for i in range(nb_rows_to_add): - new_row = var_row - for variable_name, variable_value in sorted(table_vars.items(), key=lambda s: -len(s[0])): - new_row = tuple( - elem.replace( - variable_name, variable_value[i] - if i < len(variable_value) else "" - ) for elem in new_row - ) - static_rows += (new_row,) - table.setDataArray(static_rows) - if self.new: self.new.dispose() self.new.close(True) @@ -1109,21 +289,21 @@ def tables_fill(doc, text_prefix: str, table_prefix: str) -> None: ### ### main calls ### - for_replace(self.new, variables) + ForStatement.for_replace(self.new, variables) - if_replace(self.new, variables) + IfStatement.if_replace(self.new, variables) for var, details in sorted(variables.items(), key=lambda s: -len(s[0])): if details['type'] == 'text': - text_fill(self.new, "$" + var, details['value']) + TextStatement.text_fill(self.new, "$" + var, details['value']) elif details['type'] == 'image': - image_fill(self.new, self.cnx.graphic_provider, "$" + var, details['value']) + ImageStatement.image_fill(self.new, self.cnx.graphic_provider, "$" + var, details['value']) elif details['type'] == 'html': - html_fill(self.new, "$" + var, details['value']) + HtmlStatement.html_fill(template=self, doc=self.new, variable="$" + var, value=details['value']) - html_replace(self.new) + HtmlStatement.html_replace(template=self, doc=self.new) - tables_fill(self.new, '$', '&') + TableStatement.tables_fill(self.new, variables, '$', '&') def export(self, name: str, should_replace=False) -> Union[str, None]: """ diff --git a/lotemplate/unittest/files/content/image.expected.txt b/lotemplate/unittest/files/content/image.expected.txt new file mode 100644 index 0000000..e33256f --- /dev/null +++ b/lotemplate/unittest/files/content/image.expected.txt @@ -0,0 +1,22 @@ +Test image modification + + + + + + + + + + + + + + + + +Here we are after the image + +Warning : this test does not test automatically the image insertion. You should check the result in the content folder in the image.unittest.odt to see if the image is well inserted. + +It only test that the system can find the image on the disk. diff --git a/lotemplate/unittest/files/content/image.jpg b/lotemplate/unittest/files/content/image.jpg new file mode 100644 index 0000000..6c52545 Binary files /dev/null and b/lotemplate/unittest/files/content/image.jpg differ diff --git a/lotemplate/unittest/files/content/image.json b/lotemplate/unittest/files/content/image.json new file mode 100644 index 0000000..d15750e --- /dev/null +++ b/lotemplate/unittest/files/content/image.json @@ -0,0 +1 @@ +{"image": {"type": "image", "value": "lotemplate/unittest/files/content/image.jpg"}} \ No newline at end of file diff --git a/lotemplate/unittest/files/content/image.odt b/lotemplate/unittest/files/content/image.odt new file mode 100644 index 0000000..51824f9 Binary files /dev/null and b/lotemplate/unittest/files/content/image.odt differ diff --git a/lotemplate/unittest/files/content/table.expected.txt b/lotemplate/unittest/files/content/table.expected.txt new file mode 100644 index 0000000..0e67f24 --- /dev/null +++ b/lotemplate/unittest/files/content/table.expected.txt @@ -0,0 +1,26 @@ + +Column1 +Column2 +Column3 +a +static +A +b +static +B +c +static +C +d +static +D +e +static +E +f +static +F +g +static +G + diff --git a/lotemplate/unittest/files/content/table.json b/lotemplate/unittest/files/content/table.json new file mode 100644 index 0000000..37508be --- /dev/null +++ b/lotemplate/unittest/files/content/table.json @@ -0,0 +1,26 @@ +{ + "var1": { + "type": "table", + "value": [ + "a", + "b", + "c", + "d", + "e", + "f", + "g" + ] + }, + "var2": { + "type": "table", + "value": [ + "A", + "B", + "C", + "D", + "E", + "F", + "G" + ] + } +} \ No newline at end of file diff --git a/lotemplate/unittest/files/content/table.odt b/lotemplate/unittest/files/content/table.odt new file mode 100644 index 0000000..685ee77 Binary files /dev/null and b/lotemplate/unittest/files/content/table.odt differ diff --git a/lotemplate/unittest/test_content.py b/lotemplate/unittest/test_content.py index 8b5850d..0951b78 100644 --- a/lotemplate/unittest/test_content.py +++ b/lotemplate/unittest/test_content.py @@ -102,5 +102,11 @@ def test_if_inside_for(self): def test_html_vars(self): self.assertTrue(compare_files('html_vars')) + def test_table(self): + self.assertTrue(compare_files('table')) + + def test_image(self): + self.assertTrue(compare_files('image')) + def test_debug(self): self.assertTrue(compare_files('debug')) diff --git a/lotemplate/utils.py b/lotemplate/utils.py index 223d765..7af1eea 100644 --- a/lotemplate/utils.py +++ b/lotemplate/utils.py @@ -8,8 +8,7 @@ __all__ = ( 'convert_to_datas_template', 'is_network_based', - 'get_file_url', - 'var_regexes' + 'get_file_url' ) import functools @@ -20,7 +19,6 @@ from typing import Union from sorcery import dict_of from copy import deepcopy -import regex from . import errors @@ -285,14 +283,3 @@ def get_file_url(file: str) -> str: """ return file if is_network_based(file) else ( "file://" + ((os.getcwd() + "/" + file) if file[0] != '/' else file)) - - -var_regexes = { - 'image': regex.compile(r'\$\w+'), - 'text': regex.compile( - r'\$(\w+(\(((?:\\.|.)*?)\))?)'), - 'table': regex.compile( - r'\$\w+' - r'(?:\((?(?R)|"[^"]*"|[^$&"\s()][^\s()]*)(?:[+ ](?&arg))*\))?' - r'|(?&\w+)'), -}