From 0436ae9c656135d63330bb845379d9a1924a443c Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Thu, 28 Sep 2023 11:07:38 -0600
Subject: [PATCH 01/11] update file-level

---
 cli.py           |  63 ++++++++++---------
 devtale/utils.py | 158 ++++++++++++++++++++++++++++-------------------
 2 files changed, 128 insertions(+), 93 deletions(-)

diff --git a/cli.py b/cli.py
index 5084e27..877d223 100644
--- a/cli.py
+++ b/cli.py
@@ -7,12 +7,6 @@
 import click
 from dotenv import load_dotenv
 
-from devtale.aggregators import (
-    GoAggregator,
-    JavascriptAggregator,
-    PHPAggregator,
-    PythonAggregator,
-)
 from devtale.constants import (
     ALLOWED_EXTENSIONS,
     ALLOWED_NO_CODE_EXTENSIONS,
@@ -22,6 +16,7 @@
 from devtale.utils import (
     build_project_tree,
     extract_code_elements,
+    fuse_documentation,
     fuse_tales,
     get_unit_tale,
     prepare_code_elements,
@@ -306,15 +301,20 @@ def process_file(
     debug: bool = False,
     cost_estimation: bool = False,
 ) -> None:
+    """It creates a dev tale for the file input."""
     cost = 0
     file_name = os.path.basename(file_path)
     file_ext = os.path.splitext(file_name)[-1]
     save_path = os.path.join(output_path, f"{file_name}.json")
 
+    # For the debugging mode we do not want to process the file, we only want
+    # to verify input. Useful to verify the repository/directories flow.
     if debug:
         logger.debug(f"FILE INFO:\nfile_path: {file_path}\nsave_path: {save_path}")
         return {"file_docstring": "-"}, cost
 
+    # Create output dir if it does not exists and only if we are not
+    # pre-estimating the cost.
     if not os.path.exists(output_path) and not cost_estimation:
         os.makedirs(output_path)
 
@@ -322,9 +322,13 @@ def process_file(
     with open(file_path, "r") as file:
         code = file.read()
 
+    # Return empty devtale if the input file is empty
     if not code:
         return {"file_docstring": ""}, cost
 
+    # Avoid processing a file twice if we already have a tale for it.
+    # Only fuse it again. Useful to avoid GPT calls in case of debugging
+    # aggregators.
     if os.path.exists(save_path):
         logger.info(f"Skipping {file_name} as its tale file already exists.")
         with open(save_path, "r") as file:
@@ -333,9 +337,12 @@ def process_file(
             fuse_documentation(code, found_tale, output_path, file_name, file_ext)
         return found_tale, cost
 
+    # For config/bash files we do not aim to document the file itself. We
+    # care about understanding what the file does.
     if not file_ext or file_ext in ALLOWED_NO_CODE_EXTENSIONS:
         # a small single chunk is enough
         no_code_file = split_text(code, chunk_size=5000)[0].page_content
+        # prepare input
         no_code_file_data = {
             "file_name": file_name,
             "file_content": no_code_file,
@@ -350,6 +357,10 @@ def process_file(
 
         return {"file_docstring": file_docstring}, cost
 
+    # big_docs reduces the number of GPT-4 calls as we want to extract
+    # functions/classes names, while short_docs allows GPT-4 to focus in
+    # a more granular context to accurately generate the docstring for each
+    # function/class that it found.
     logger.info("split dev draft ideas")
     big_docs = split_code(code, language=LANGUAGES[file_ext], chunk_size=10000)
     short_docs = split_code(code, language=LANGUAGES[file_ext], chunk_size=3000)
@@ -364,13 +375,16 @@ def process_file(
         if elements_set:
             code_elements.append(elements_set)
 
+    # Combine all the code elements extracted into a single general Dict
+    # without duplicates.
     logger.info("prepare code elements")
     code_elements_dict = prepare_code_elements(code_elements)
 
     # Make a copy to keep the original dict intact
     code_elements_copy = copy.deepcopy(code_elements_dict)
 
-    # clean
+    # Clean dict copy to remove keys with empty values and the summaries
+    # of each code chunk.
     code_elements_copy.pop("summary", None)
     if not code_elements_copy["classes"]:
         code_elements_copy.pop("classes", None)
@@ -379,7 +393,8 @@ def process_file(
 
     logger.info("create tale sections")
     tales_list = []
-    # process only if we have elements to document
+    # Generate a docstring for each class and function/method in the
+    # code_elements
     if code_elements_copy or cost_estimation:
         for idx, doc in enumerate(short_docs):
             tale, call_cost = get_unit_tale(
@@ -392,15 +407,20 @@ def process_file(
             tales_list.append(tale)
             logger.info(f"tale section {str(idx+1)}/{len(short_docs)} done.")
 
+    # Combine all generated docstrings JSON-formated ouputs into a single,
+    # general one.
     logger.info("create dev tale")
     tale, errors = fuse_tales(tales_list, code, code_elements_dict)
 
+    # Check if we discarded some docstrings
     if len(errors) > 0:
         logger.info(
             f"We encountered errors while fusing the following \
                     tales for {file_name} - Corrupted tales: {errors}"
         )
 
+    # Generate a top-level docstrings using as context all the summaries we got
+    # from each big_doc code chunk output
     logger.info("add dev tale summary")
     summaries = split_text(str(code_elements_dict["summary"]), chunk_size=9000)
 
@@ -412,15 +432,18 @@ def process_file(
     )
     cost += call_cost
 
+    # Add the docstrings in the code file
     if fuse and not cost_estimation:
-        # add docstring label only to insert it along the docstring into the code
+        # add devtale label into the top-file summary
         tale["file_docstring"] = DOCSTRING_LABEL + "\n" + file_docstring
-        fuse_documentation(code, tale, output_path, file_name, file_ext)
+        fused_save_path = os.path.join(output_path, file_name)
+        logger.info(f"save fused dev tale in: {fused_save_path}")
+        fuse_documentation(code, tale, file_ext, save_path=fused_save_path)
 
+    # remove devtale label
     tale["file_docstring"] = file_docstring
 
     logger.info(f"save dev tale in: {save_path}")
-
     if not cost_estimation:
         with open(save_path, "w") as json_file:
             json.dump(tale, json_file, indent=2)
@@ -428,24 +451,6 @@ def process_file(
     return tale, cost
 
 
-def fuse_documentation(code, tale, output_path, file_name, file_ext):
-    save_path = os.path.join(output_path, file_name)
-    logger.info(f"save fused dev tale in: {save_path}")
-
-    if file_ext == ".py":
-        aggregator = PythonAggregator()
-    elif file_ext == ".php":
-        aggregator = PHPAggregator()
-    elif file_ext == ".go":
-        aggregator = GoAggregator()
-    elif file_ext == ".js" or file_ext == ".ts" or file_ext == ".tsx":
-        aggregator = JavascriptAggregator()
-
-    fused_tale = aggregator.document(code=code, documentation=tale)
-    with open(save_path, "w") as file:
-        file.write(fused_tale)
-
-
 @click.command()
 @click.option(
     "-p",
diff --git a/devtale/utils.py b/devtale/utils.py
index ec7dd8e..12e6345 100644
--- a/devtale/utils.py
+++ b/devtale/utils.py
@@ -11,6 +11,12 @@
 from langchain.output_parsers import PydanticOutputParser
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 
+from devtale.aggregators import (
+    GoAggregator,
+    JavascriptAggregator,
+    PHPAggregator,
+    PythonAggregator,
+)
 from devtale.constants import DOCSTRING_LABEL, GPT_PRICE
 from devtale.schema import FileDocumentation
 from devtale.templates import (
@@ -82,7 +88,25 @@ def extract_code_elements(
     return result_string["text"], cost
 
 
+def prepare_code_elements(code_elements):
+    """Convert GPT text output into a dictionary and combine each
+    dictionary into a single, general one
+    """
+    elements = {"classes": [], "methods": [], "summary": []}
+    for code_element in code_elements:
+        info = _process_extracted_code_element(code_element)
+        elements["classes"].extend(info["classes"])
+        elements["methods"].extend(info["methods"])
+        elements["summary"].append(info["summary"])
+
+    # remove duplicates
+    elements["classes"] = list(set(elements["classes"]))
+    elements["methods"] = list(set(elements["methods"]))
+    return elements
+
+
 def _process_extracted_code_element(text: str):
+    """It converts GPT text output into a dictionary of code elements"""
     classes_match = re.search(r"classes=(\[.*?\])", text)
     methods_match = re.search(r"methods=(\[.*?\])", text)
     summary_match = re.search(r'summary="([^"]*)"', text)
@@ -105,18 +129,62 @@ def _process_extracted_code_element(text: str):
     return {"classes": classes, "methods": methods, "summary": summary}
 
 
-def prepare_code_elements(code_elements):
-    elements = {"classes": [], "methods": [], "summary": []}
-    for code_element in code_elements:
-        info = _process_extracted_code_element(code_element)
-        elements["classes"].extend(info["classes"])
-        elements["methods"].extend(info["methods"])
-        elements["summary"].append(info["summary"])
+def fuse_tales(tales_list, code, code_elements_dict):
+    """Combine all the generated docstrings JSON-formatted GPT outputs into
+    a single one, remove hallucinations and duplicates.
+    """
+    fused_tale = {"classes": [], "methods": []}
+    errors = []
+    unique_methods = set()
+    unique_classes = set()
 
-    # remove duplicates
-    elements["classes"] = list(set(elements["classes"]))
-    elements["methods"] = list(set(elements["methods"]))
-    return elements
+    for tale in tales_list:
+        if "classes" in tale:
+            for class_info in tale["classes"]:
+                if isinstance(class_info, dict):
+                    class_name = class_info["class_name"]
+                    if class_name not in unique_classes and not _is_hallucination(
+                        class_name, code, code_elements_dict["classes"]
+                    ):
+                        unique_classes.add(class_name)
+                        # Attach the devtale label on each docstring
+                        class_info["class_docstring"] = (
+                            DOCSTRING_LABEL + "\n" + class_info["class_docstring"]
+                        )
+                        fused_tale["classes"].append(class_info)
+                else:
+                    if tale not in errors:
+                        errors.append(tale)
+
+        if "methods" in tale:
+            for method_info in tale["methods"]:
+                if isinstance(method_info, dict):
+                    method_name = method_info["method_name"]
+                    if method_name not in unique_methods and not _is_hallucination(
+                        method_name, code, code_elements_dict["methods"]
+                    ):
+                        unique_methods.add(method_name)
+                        # Attach the dectale label on each docstring
+                        method_info["method_docstring"] = (
+                            DOCSTRING_LABEL + "\n" + method_info["method_docstring"]
+                        )
+                        fused_tale["methods"].append(method_info)
+                else:
+                    if tale not in errors:
+                        errors.append(tale)
+
+    return fused_tale, errors
+
+
+def _is_hallucination(code_definition, code, expected_definitions):
+    # Verify that the code_definition is expected
+    if code_definition not in expected_definitions:
+        return True
+
+    # Check if the code_definition exists within the code
+    if not re.search(r"\b" + re.escape(code_definition) + r"\b", code):
+        return True
+    return False
 
 
 def redact_tale_information(
@@ -210,59 +278,6 @@ def get_unit_tale(
     return json_answer, cost
 
 
-def is_hallucination(code_definition, code, expected_definitions):
-    # Verify that the code_definition is expected
-    if code_definition not in expected_definitions:
-        return True
-
-    # Check if the code_definition exists within the code
-    if not re.search(r"\b" + re.escape(code_definition) + r"\b", code):
-        return True
-    return False
-
-
-def fuse_tales(tales_list, code, code_elements_dict):
-    fused_tale = {"classes": [], "methods": []}
-    errors = []
-    unique_methods = set()
-    unique_classes = set()
-
-    for tale in tales_list:
-        if "classes" in tale:
-            for class_info in tale["classes"]:
-                if isinstance(class_info, dict):
-                    class_name = class_info["class_name"]
-                    if class_name not in unique_classes and not is_hallucination(
-                        class_name, code, code_elements_dict["classes"]
-                    ):
-                        unique_classes.add(class_name)
-                        class_info["class_docstring"] = (
-                            DOCSTRING_LABEL + "\n" + class_info["class_docstring"]
-                        )
-                        fused_tale["classes"].append(class_info)
-                else:
-                    if tale not in errors:
-                        errors.append(tale)
-
-        if "methods" in tale:
-            for method_info in tale["methods"]:
-                if isinstance(method_info, dict):
-                    method_name = method_info["method_name"]
-                    if method_name not in unique_methods and not is_hallucination(
-                        method_name, code, code_elements_dict["methods"]
-                    ):
-                        unique_methods.add(method_name)
-                        method_info["method_docstring"] = (
-                            DOCSTRING_LABEL + "\n" + method_info["method_docstring"]
-                        )
-                        fused_tale["methods"].append(method_info)
-                else:
-                    if tale not in errors:
-                        errors.append(tale)
-
-    return fused_tale, errors
-
-
 def _add_escape_characters(invalid_json):
     control_char_pattern = re.compile(r"[\x00-\x1F\x7F-\x9F]")
     unescaped_chars = control_char_pattern.findall(invalid_json)
@@ -306,3 +321,18 @@ def build_project_tree(root_dir, indent="", gitignore_patterns=None):
             file_paths.append(item_path)
 
     return tree, file_paths
+
+
+def fuse_documentation(code, tale, file_ext, save_path):
+    if file_ext == ".py":
+        aggregator = PythonAggregator()
+    elif file_ext == ".php":
+        aggregator = PHPAggregator()
+    elif file_ext == ".go":
+        aggregator = GoAggregator()
+    elif file_ext == ".js" or file_ext == ".ts" or file_ext == ".tsx":
+        aggregator = JavascriptAggregator()
+
+    fused_tale = aggregator.document(code=code, documentation=tale)
+    with open(save_path, "w") as file:
+        file.write(fused_tale)

From 2c90cf2f1d1eb025810945745720ba613f5af338 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Thu, 28 Sep 2023 11:09:24 -0600
Subject: [PATCH 02/11] fix comments format

---
 cli.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cli.py b/cli.py
index 877d223..cc21a6b 100644
--- a/cli.py
+++ b/cli.py
@@ -322,7 +322,7 @@ def process_file(
     with open(file_path, "r") as file:
         code = file.read()
 
-    # Return empty devtale if the input file is empty
+    # Return empty devtale if the input file is empty.
     if not code:
         return {"file_docstring": ""}, cost
 
@@ -380,7 +380,7 @@ def process_file(
     logger.info("prepare code elements")
     code_elements_dict = prepare_code_elements(code_elements)
 
-    # Make a copy to keep the original dict intact
+    # Make a copy to keep the original dict intact.
     code_elements_copy = copy.deepcopy(code_elements_dict)
 
     # Clean dict copy to remove keys with empty values and the summaries
@@ -394,7 +394,7 @@ def process_file(
     logger.info("create tale sections")
     tales_list = []
     # Generate a docstring for each class and function/method in the
-    # code_elements
+    # code_elements.
     if code_elements_copy or cost_estimation:
         for idx, doc in enumerate(short_docs):
             tale, call_cost = get_unit_tale(
@@ -412,7 +412,7 @@ def process_file(
     logger.info("create dev tale")
     tale, errors = fuse_tales(tales_list, code, code_elements_dict)
 
-    # Check if we discarded some docstrings
+    # Check if we discarded some docstrings.
     if len(errors) > 0:
         logger.info(
             f"We encountered errors while fusing the following \
@@ -420,7 +420,7 @@ def process_file(
         )
 
     # Generate a top-level docstrings using as context all the summaries we got
-    # from each big_doc code chunk output
+    # from each big_doc code chunk output.
     logger.info("add dev tale summary")
     summaries = split_text(str(code_elements_dict["summary"]), chunk_size=9000)
 
@@ -432,15 +432,15 @@ def process_file(
     )
     cost += call_cost
 
-    # Add the docstrings in the code file
+    # Add the docstrings in the code file.
     if fuse and not cost_estimation:
-        # add devtale label into the top-file summary
+        # add devtale label into the top-file summary.
         tale["file_docstring"] = DOCSTRING_LABEL + "\n" + file_docstring
         fused_save_path = os.path.join(output_path, file_name)
         logger.info(f"save fused dev tale in: {fused_save_path}")
         fuse_documentation(code, tale, file_ext, save_path=fused_save_path)
 
-    # remove devtale label
+    # Remove devtale label.
     tale["file_docstring"] = file_docstring
 
     logger.info(f"save dev tale in: {save_path}")

From 048052b4492117442580c30bf42651618af45ef4 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Thu, 28 Sep 2023 11:55:55 -0600
Subject: [PATCH 03/11] update folder-level

---
 cli.py | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/cli.py b/cli.py
index cc21a6b..9746c4d 100644
--- a/cli.py
+++ b/cli.py
@@ -184,18 +184,24 @@ def process_folder(
     folder_full_name: str = None,
     cost_estimation: bool = False,
 ) -> None:
+    """It creates a dev tale for each file in the directory without exploring
+    subdirectories, and it generates a README section for the folder.
+    """
     cost = 0
     save_path = os.path.join(output_path, os.path.basename(folder_path))
     tales = []
 
+    # Iterate through each file in the folder
     for file_name in os.listdir(folder_path):
         file_path = os.path.join(folder_path, file_name)
 
+        # Check it if is a file that we need to process
         if os.path.isfile(file_path) and (
             os.path.splitext(file_name)[1] in ALLOWED_EXTENSIONS
             or os.path.splitext(file_name)[1] in ALLOWED_NO_CODE_EXTENSIONS
         ):
             logger.info(f"processing {file_path}")
+            # Create dev tale for the file
             try:
                 file_tale, file_cost = process_file(
                     file_path, save_path, model_name, fuse, debug, cost_estimation
@@ -207,6 +213,8 @@ def process_folder(
                 )
                 file_tale = None
 
+            # Create a dictionary with the tale's file_docstrings values to use them
+            # as context for the folder's README section
             if file_tale is not None:
                 if file_tale["file_docstring"]:
                     if not folder_full_name:
@@ -214,9 +222,13 @@ def process_folder(
                             os.path.abspath(folder_path)
                         )
 
+                    # If this is a root folder, make its name more aesthetic.
                     if folder_full_name == ".":
                         folder_full_name = "./"
 
+                    # Check if we already have the folder_name as key, if yes, then
+                    # append the file_docstring on it. Useful when working in a
+                    # repository level.
                     folder_entry = next(
                         (
                             item
@@ -230,7 +242,8 @@ def process_folder(
                             "folder_name": folder_full_name,
                             "folder_files": [],
                         }
-                        if folder_full_name == ".":
+                        # Add a generic description in case this is a root directory.
+                        if folder_full_name == "./":
                             folder_entry[
                                 "folder_description"
                             ] = """
@@ -247,28 +260,33 @@ def process_folder(
                         }
                     )
 
+    # For the debugging mode we do not want to generate the folder's README
+    # section. We only want to verify the input flow.
     if debug:
-        logger.debug(
-            f"""FOLDER INFO:
-        folder_path: {folder_path}
-        output_path: {output_path}
-        save_path: {save_path}
-        """
-        )
+        logger.debug(f"FOLDER INFO: folder_path: {folder_path}")
+        logger.debug(f"FOLDER INFO: output_path: {output_path}")
+        logger.debug(f"FOLDER INFO: save_path: {save_path}")
         logger.debug(f"FILE_TALES: {tales}")
         return "-", "-", cost
 
     if tales:
+        # Generate the folder's README section using as context the tales summaries.
         files_summaries = split_text(str(tales), chunk_size=10000)
-        # split into two calls to avoid issues with json decoding markdow text.
         folder_readme, fl_cost = redact_tale_information(
             "folder-level",
             files_summaries,
             model_name="gpt-3.5-turbo-16k",
             cost_estimation=cost_estimation,
         )
+
+        # Because of the template, GPT might also add the line separator, so we need
+        # to clean
         folder_readme = folder_readme.replace("----------", "")
 
+        # Generate a folder one-line description using the folder's readme as context.
+        # This is a separated call to avoid issues with json attempting to decode
+        # markdown text, and its porpuse is to be used as context for the repository
+        # mode.
         folder_overview, fd_cost = redact_tale_information(
             "folder-description",
             folder_readme,
@@ -278,6 +296,7 @@ def process_folder(
 
         cost += fl_cost + fd_cost
 
+        # save folder tale if we are not pre-estimating cost.
         if not cost_estimation:
             logger.info("save folder json..")
             with open(os.path.join(save_path, "folder_level.json"), "w") as json_file:

From 4021f01c5ff5ef3964e6628ace83538341c09b12 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Thu, 28 Sep 2023 12:49:14 -0600
Subject: [PATCH 04/11] update repository-level

---
 cli.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/cli.py b/cli.py
index 9746c4d..583c92e 100644
--- a/cli.py
+++ b/cli.py
@@ -41,13 +41,16 @@ def process_repository(
     debug: bool = False,
     cost_estimation: bool = False,
 ) -> None:
+    """It creates a dev tale for each file in the repository, and it
+    generates a README for the whole repository.
+    """
     cost = 0
     folder_tales = {
         "repository_name": os.path.basename(os.path.abspath(root_path)),
         "folders": [],
     }
 
-    # get original readme before creating a new one
+    # Extract the content of the original README if there is one already.
     original_readme_content = None
     for file_name in ["readme.md", "README.md"]:
         readme_path = os.path.join(root_path, file_name)
@@ -61,7 +64,8 @@ def process_repository(
                     logger.info(f"Error keeping the original readme file: {e}")
             break
 
-    # get project structure before we modify it
+    # Check if we have a gitignore file to extract the correct project tree
+    # and files.
     gitignore_path = os.path.join(root_path, ".gitignore")
     if os.path.exists(gitignore_path):
         with open(gitignore_path, "r") as gitignore_file:
@@ -71,22 +75,33 @@ def process_repository(
     else:
         gitignore_patterns = None
 
+    # Get the project tree before modify it along with the complete list of files
+    # that the repository has.
     project_tree, file_paths = build_project_tree(
         root_path, gitignore_patterns=gitignore_patterns
     )
     project_tree = ".\n" + project_tree
 
+    # Extract the folder paths from files list. This allows to avoid processing
+    # folders that should be ignored, and to use the process_folder logic.
     folders = list(set([os.path.dirname(file_path) for file_path in file_paths]))
+
+    # sort to always have the root folder at the beggining of the list.
     folders = sorted(folders, key=lambda path: path.count("/"))
 
+    # Get the folder's README section of each folder while it create a dev tale
+    # for each file.
     folders_readmes = []
     for folder_path in folders:
         try:
+            # Fix folder path to avoid issues with file system.
             if not folder_path.endswith("/"):
                 folder_path += "/"
 
             folder_full_name = os.path.relpath(folder_path, root_path)
 
+            # Generate folder's README, folder's one-line sentence description, and
+            # extract the cost of documenting the folder.
             folder_readme, folder_tale, folder_cost = process_folder(
                 folder_path=folder_path,
                 output_path=os.path.join(output_path, folder_full_name)
@@ -107,9 +122,11 @@ def process_repository(
             )
             folder_tale = None
 
+        # Create a dictionary with the folder's info that serves as context for
+        # generating the main repository README.
         if folder_tale:
             folders_readmes.append("\n\n" + folder_readme)
-            # add root folder summary information
+            # Fix root folder information.
             if folder_path == folders[0]:
                 folder_tales["folders"].append(
                     {
@@ -126,11 +143,13 @@ def process_repository(
                     }
                 )
 
+    # For debugging, we only care in seeing the files input workflow
     if debug:
         logger.debug(f"FOLDER_TALES: {folder_tales}")
         return None
 
     if folder_tales:
+        # Generate main README using as context the folders summaries.
         folder_summaries = split_text(str(folder_tales), chunk_size=15000)
         root_readme, call_cost = redact_tale_information(
             "root-level",
@@ -139,18 +158,21 @@ def process_repository(
             cost_estimation=cost_estimation,
         )
         cost += call_cost
+
+        # Because of the template, GPT might also add the line separator, so we need
+        # to clean.
         root_readme = root_readme.replace("----------", "")
 
-        # inject folders information
+        # Append the folders README sections.
         if folders_readmes:
             folders_information = "\n\n## Folders" + "".join(folders_readmes)
             root_readme = root_readme + folders_information
 
-        # inject project tree
+        # Append the project tree.
         tree = f"\n\n## Project Tree\n```bash\n{project_tree}```\n\n"
         root_readme = root_readme + tree
 
-        # inject original readme if there is one
+        # Append the original readme content as extra notes, removing the header.
         if original_readme_content:
             filtered_original_readme = [
                 line for line in original_readme_content if not line.startswith("# ")
@@ -158,9 +180,9 @@ def process_repository(
             modified_original_readme = "\n\n## Extra notes\n\n" + "".join(
                 filtered_original_readme
             )
-
             root_readme = root_readme + modified_original_readme
 
+        # save main README if we are not pre-estimating cost.
         if not cost_estimation:
             logger.info("save root json..")
             with open(os.path.join(output_path, "root_level.json"), "w") as json_file:

From fb26eea52916e7da9ecb090a0db74e8717d16764 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Thu, 28 Sep 2023 13:11:20 -0600
Subject: [PATCH 05/11] clean

---
 cli.py | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/cli.py b/cli.py
index 583c92e..7dc88fc 100644
--- a/cli.py
+++ b/cli.py
@@ -26,8 +26,6 @@
 )
 
 DEFAULT_OUTPUT_PATH = "devtale_demo/"
-DEFAULT_MODEL_NAME = "gpt-4"
-
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -36,7 +34,6 @@
 def process_repository(
     root_path: str,
     output_path: str = DEFAULT_OUTPUT_PATH,
-    model_name: str = DEFAULT_MODEL_NAME,
     fuse: bool = False,
     debug: bool = False,
     cost_estimation: bool = False,
@@ -107,7 +104,6 @@ def process_repository(
                 output_path=os.path.join(output_path, folder_full_name)
                 if folder_full_name != "."
                 else output_path,
-                model_name=model_name,
                 fuse=fuse,
                 debug=debug,
                 folder_full_name=folder_full_name,
@@ -200,7 +196,6 @@ def process_repository(
 def process_folder(
     folder_path: str,
     output_path: str = DEFAULT_OUTPUT_PATH,
-    model_name: str = DEFAULT_MODEL_NAME,
     fuse: bool = False,
     debug: bool = False,
     folder_full_name: str = None,
@@ -226,7 +221,7 @@ def process_folder(
             # Create dev tale for the file
             try:
                 file_tale, file_cost = process_file(
-                    file_path, save_path, model_name, fuse, debug, cost_estimation
+                    file_path, save_path, fuse, debug, cost_estimation
                 )
                 cost += file_cost
             except Exception as e:
@@ -337,7 +332,6 @@ def process_folder(
 def process_file(
     file_path: str,
     output_path: str = DEFAULT_OUTPUT_PATH,
-    model_name: str = DEFAULT_MODEL_NAME,
     fuse: bool = False,
     debug: bool = False,
     cost_estimation: bool = False,
@@ -375,7 +369,12 @@ def process_file(
         with open(save_path, "r") as file:
             found_tale = json.load(file)
         if fuse:
-            fuse_documentation(code, found_tale, output_path, file_name, file_ext)
+            fuse_documentation(
+                code=code,
+                tale=found_tale,
+                file_ext=file_ext,
+                save_path=os.path.join(output_path, file_name),
+            )
         return found_tale, cost
 
     # For config/bash files we do not aim to document the file itself. We
@@ -410,7 +409,7 @@ def process_file(
     code_elements = []
     for idx, doc in enumerate(big_docs):
         elements_set, call_cost = extract_code_elements(
-            big_doc=doc, model_name=model_name, cost_estimation=cost_estimation
+            big_doc=doc, model_name="gpt-4", cost_estimation=cost_estimation
         )
         cost += call_cost
         if elements_set:
@@ -441,7 +440,7 @@ def process_file(
             tale, call_cost = get_unit_tale(
                 short_doc=doc,
                 code_elements=code_elements_copy,
-                model_name=model_name,
+                model_name="gpt-4",
                 cost_estimation=cost_estimation,
             )
             cost += call_cost
@@ -522,37 +521,28 @@ def process_file(
     "output_path",
     required=False,
     default=DEFAULT_OUTPUT_PATH,
-    help="The destination folder where you want to save the documentation outputs",
-)
-@click.option(
-    "-n",
-    "--model-name",
-    "model_name",
-    required=False,
-    default=DEFAULT_MODEL_NAME,
-    help="The OpenAI model name you want to use. \
-    https://platform.openai.com/docs/models",
+    help="The destination folder where you want to save the documentation outputs. \
+        Default: devtale_demo/",
 )
 @click.option(
     "--debug",
     "debug",
     is_flag=True,
     default=False,
-    help="Mock answer and avoid GPT calls",
+    help="Mock answers avoiding any GPT call.",
 )
 @click.option(
     "--estimation",
     "cost_estimation",
     is_flag=True,
     default=False,
-    help="When true, estimate the cost of openAI's API usage, without making any call",
+    help="When true, estimate the cost of openAI's API usage, without making any call.",
 )
 def main(
     path: str,
     recursive: bool,
     fuse: bool,
     output_path: str = DEFAULT_OUTPUT_PATH,
-    model_name: str = DEFAULT_MODEL_NAME,
     debug: bool = False,
     cost_estimation: bool = False,
 ):
@@ -569,7 +559,6 @@ def main(
             price = process_repository(
                 root_path=path,
                 output_path=output_path,
-                model_name=model_name,
                 fuse=fuse,
                 debug=debug,
                 cost_estimation=cost_estimation,
@@ -579,7 +568,6 @@ def main(
             _, _, price = process_folder(
                 folder_path=path,
                 output_path=output_path,
-                model_name=model_name,
                 fuse=fuse,
                 debug=debug,
                 cost_estimation=cost_estimation,
@@ -589,7 +577,6 @@ def main(
         _, price = process_file(
             file_path=path,
             output_path=output_path,
-            model_name=model_name,
             fuse=fuse,
             debug=debug,
             cost_estimation=cost_estimation,

From e684743f00a9a27a472b4f5e850f508d4905081c Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Thu, 28 Sep 2023 13:33:08 -0600
Subject: [PATCH 06/11] update action

---
 README.md  |  3 ++-
 action.yml | 26 +++++++++++++++++---------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index f0eba83..bd29578 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # devtale
 
-Every software product depends on some legacy, undocumented code repositories, whose authors left the company years ago. 
+Every software product depends on some legacy, undocumented code repositories, whose authors left the company years ago.
 
 Who isn't afraid to make a change, if the code is unreadable?
 
@@ -60,6 +60,7 @@ jobs:
           path: ${{ github.workspace }}
           recursive: true
           target_branch: main
+          save_tales: false
 ```
 
 The `recursive` option allows you to document the entire repository. Alternatively, you can specify a specific path to document a single file or folder and set `recursive` to `false`. The workflow action will automatically create the `devtale/documentation` branch and push a new pull request for your review towards the `target_branch`, including the added documentation.
diff --git a/action.yml b/action.yml
index 9535024..a9bb9ae 100644
--- a/action.yml
+++ b/action.yml
@@ -6,18 +6,22 @@ branding:
 
 inputs:
   openai_api_key:
-    description: "Your OpenAI API key"
+    description: "Your OpenAI API key."
     required: true
   path:
     description: "Path to your repository, folder, or file."
     required: true
   recursive:
-    description: "True if you want to document the full repository. Otherwise False"
+    description: "True if you want to document the full repository. Otherwise False."
     required: false
     default: false
   target_branch:
-    description: "Branch name for the documentation pull request."
+    description: "Base branch name to which the documentation pull request should point."
     required: true
+  save_tales:
+    description: "True if you want to keep the tale files. Otherwise False to remove them."
+    required: false
+    default: false
 
 runs:
   using: "composite"
@@ -48,12 +52,16 @@ runs:
 
     - name: Clean Documentation Files
       run: |
-        rm -f *.py.json
-        rm -f *.php.json
-        rm -f *.go.json
-        rm -f *.js.json
-        rm -f *folder_level.json
-        rm -f *root_level.json
+        if ! ${{ inputs.save_tales }}; then
+          rm -f *.py.json
+          rm -f *.php.json
+          rm -f *.go.json
+          rm -f *.js.json
+          rm -f *.ts.json
+          rm -f *.tsx.json
+          rm -f *folder_level.json
+          rm -f *root_level.json
+        fi
       shell: bash
 
     - name: Push PR

From e8d3f0dfa833e82333e6e043f9d785afb3f4ac99 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Thu, 28 Sep 2023 13:58:50 -0600
Subject: [PATCH 07/11] update utils

---
 cli.py           |   4 +-
 devtale/utils.py | 290 +++++++++++++++++++++++------------------------
 2 files changed, 147 insertions(+), 147 deletions(-)

diff --git a/cli.py b/cli.py
index 7dc88fc..69d5479 100644
--- a/cli.py
+++ b/cli.py
@@ -17,7 +17,7 @@
     build_project_tree,
     extract_code_elements,
     fuse_documentation,
-    fuse_tales,
+    fuse_tales_chunks,
     get_unit_tale,
     prepare_code_elements,
     redact_tale_information,
@@ -450,7 +450,7 @@ def process_file(
     # Combine all generated docstrings JSON-formated ouputs into a single,
     # general one.
     logger.info("create dev tale")
-    tale, errors = fuse_tales(tales_list, code, code_elements_dict)
+    tale, errors = fuse_tales_chunks(tales_list, code, code_elements_dict)
 
     # Check if we discarded some docstrings.
     if len(errors) > 0:
diff --git a/devtale/utils.py b/devtale/utils.py
index 12e6345..74fdd1b 100644
--- a/devtale/utils.py
+++ b/devtale/utils.py
@@ -38,16 +38,6 @@
 }
 
 
-def calculate_cost(input: str, model: str):
-    if model == "text-davinci-003":
-        encoding = "p50k_base"
-    else:
-        encoding = "cl100k_base"
-
-    tokens = tiktoken.get_encoding(encoding).encode(input)
-    return (len(tokens) / 1000) * GPT_PRICE[model]
-
-
 def split_text(text, chunk_size=1000, chunk_overlap=0):
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=chunk_size, chunk_overlap=chunk_overlap
@@ -76,7 +66,7 @@ def extract_code_elements(
     )
 
     if cost_estimation:
-        estimated_cost = calculate_cost(
+        estimated_cost = _calculate_cost(
             prompt.format(code=big_doc.page_content), model_name
         )
         return "", estimated_cost
@@ -88,6 +78,72 @@ def extract_code_elements(
     return result_string["text"], cost
 
 
+def get_unit_tale(
+    short_doc, code_elements, model_name="gpt-4", verbose=False, cost_estimation=False
+):
+    parser = PydanticOutputParser(pydantic_object=FileDocumentation)
+    prompt = PromptTemplate(
+        template=CODE_LEVEL_TEMPLATE,
+        input_variables=["code", "code_elements"],
+        partial_variables={"format_instructions": parser.get_format_instructions()},
+    )
+    teller_of_tales = LLMChain(
+        llm=ChatOpenAI(model_name=model_name), prompt=prompt, verbose=verbose
+    )
+
+    if cost_estimation:
+        estimated_cost = _calculate_cost(
+            prompt.format(
+                code=short_doc.page_content, code_elements=str(code_elements)
+            ),
+            model_name,
+        )
+        return {"classes": [], "methods": []}, estimated_cost
+
+    with get_openai_callback() as cb:
+        result_string = teller_of_tales(
+            {"code": short_doc.page_content, "code_elements": code_elements}
+        )
+        cost = cb.total_cost
+
+    json_answer = _convert_to_json(result_string)
+    if not json_answer:
+        print("Returning empty JSON due to a failure")
+        json_answer = {"classes": [], "methods": []}
+    return json_answer, cost
+
+
+def redact_tale_information(
+    content_type,
+    docs,
+    verbose=False,
+    model_name="text-davinci-003",
+    cost_estimation=False,
+):
+    prompt = PromptTemplate(
+        template=TYPE_INFORMATION[content_type], input_variables=["information"]
+    )
+    teller_of_tales = LLMChain(
+        llm=OpenAI(model_name=model_name), prompt=prompt, verbose=verbose
+    )
+    if content_type not in ["no-code-file", "folder-description"]:
+        information = str(docs[0].page_content)
+    else:
+        information = str(docs)
+
+    if cost_estimation:
+        estimated_cost = _calculate_cost(
+            prompt.format(information=information), model_name
+        )
+        return "", estimated_cost
+
+    with get_openai_callback() as cb:
+        text_answer = teller_of_tales({"information": information})
+        cost = cb.total_cost
+
+    return text_answer["text"], cost
+
+
 def prepare_code_elements(code_elements):
     """Convert GPT text output into a dictionary and combine each
     dictionary into a single, general one
@@ -105,31 +161,7 @@ def prepare_code_elements(code_elements):
     return elements
 
 
-def _process_extracted_code_element(text: str):
-    """It converts GPT text output into a dictionary of code elements"""
-    classes_match = re.search(r"classes=(\[.*?\])", text)
-    methods_match = re.search(r"methods=(\[.*?\])", text)
-    summary_match = re.search(r'summary="([^"]*)"', text)
-
-    classes = []
-    methods = []
-    summary = ""
-
-    if classes_match:
-        classes_str = classes_match.group(1)
-        classes = re.findall(r'"(.*?)"', classes_str)
-
-    if methods_match:
-        methods_str = methods_match.group(1)
-        methods = re.findall(r'"(.*?)"', methods_str)
-
-    if summary_match:
-        summary = summary_match.group(1)
-
-    return {"classes": classes, "methods": methods, "summary": summary}
-
-
-def fuse_tales(tales_list, code, code_elements_dict):
+def fuse_tales_chunks(tales_list, code, code_elements_dict):
     """Combine all the generated docstrings JSON-formatted GPT outputs into
     a single one, remove hallucinations and duplicates.
     """
@@ -176,49 +208,58 @@ def fuse_tales(tales_list, code, code_elements_dict):
     return fused_tale, errors
 
 
-def _is_hallucination(code_definition, code, expected_definitions):
-    # Verify that the code_definition is expected
-    if code_definition not in expected_definitions:
-        return True
+def build_project_tree(root_dir, indent="", gitignore_patterns=None):
+    if gitignore_patterns is None:
+        gitignore_patterns = []
 
-    # Check if the code_definition exists within the code
-    if not re.search(r"\b" + re.escape(code_definition) + r"\b", code):
-        return True
-    return False
+    tree = ""
+    items = [item for item in os.listdir(root_dir) if not item.startswith(".")]
+    file_paths = []
 
+    for item in sorted(items):
+        item_path = os.path.join(root_dir, item)
+        if _should_ignore(item_path, gitignore_patterns):
+            continue
+        if os.path.isdir(item_path):
+            tree += indent + "├── " + item + "\n"
+            subtree, subfile_paths = build_project_tree(
+                item_path, indent + "│   ", gitignore_patterns
+            )
+            tree += subtree
+            file_paths.extend(subfile_paths)
+        else:
+            tree += indent + "└── " + item + "\n"
+            file_paths.append(item_path)
 
-def redact_tale_information(
-    content_type,
-    docs,
-    verbose=False,
-    model_name="text-davinci-003",
-    cost_estimation=False,
-):
-    prompt = PromptTemplate(
-        template=TYPE_INFORMATION[content_type], input_variables=["information"]
-    )
-    teller_of_tales = LLMChain(
-        llm=OpenAI(model_name=model_name), prompt=prompt, verbose=verbose
-    )
-    if content_type not in ["no-code-file", "folder-description"]:
-        information = str(docs[0].page_content)
-    else:
-        information = str(docs)
+    return tree, file_paths
 
-    if cost_estimation:
-        estimated_cost = calculate_cost(
-            prompt.format(information=information), model_name
-        )
-        return "", estimated_cost
 
-    with get_openai_callback() as cb:
-        text_answer = teller_of_tales({"information": information})
-        cost = cb.total_cost
+def fuse_documentation(code, tale, file_ext, save_path):
+    if file_ext == ".py":
+        aggregator = PythonAggregator()
+    elif file_ext == ".php":
+        aggregator = PHPAggregator()
+    elif file_ext == ".go":
+        aggregator = GoAggregator()
+    elif file_ext == ".js" or file_ext == ".ts" or file_ext == ".tsx":
+        aggregator = JavascriptAggregator()
 
-    return text_answer["text"], cost
+    fused_tale = aggregator.document(code=code, documentation=tale)
+    with open(save_path, "w") as file:
+        file.write(fused_tale)
 
 
-def convert_to_json(text_answer):
+def _calculate_cost(input: str, model: str):
+    if model == "text-davinci-003":
+        encoding = "p50k_base"
+    else:
+        encoding = "cl100k_base"
+
+    tokens = tiktoken.get_encoding(encoding).encode(input)
+    return (len(tokens) / 1000) * GPT_PRICE[model]
+
+
+def _convert_to_json(text_answer):
     try:
         result_json = json.loads(text_answer["text"])
         return result_json
@@ -243,41 +284,6 @@ def convert_to_json(text_answer):
             return None
 
 
-def get_unit_tale(
-    short_doc, code_elements, model_name="gpt-4", verbose=False, cost_estimation=False
-):
-    parser = PydanticOutputParser(pydantic_object=FileDocumentation)
-    prompt = PromptTemplate(
-        template=CODE_LEVEL_TEMPLATE,
-        input_variables=["code", "code_elements"],
-        partial_variables={"format_instructions": parser.get_format_instructions()},
-    )
-    teller_of_tales = LLMChain(
-        llm=ChatOpenAI(model_name=model_name), prompt=prompt, verbose=verbose
-    )
-
-    if cost_estimation:
-        estimated_cost = calculate_cost(
-            prompt.format(
-                code=short_doc.page_content, code_elements=str(code_elements)
-            ),
-            model_name,
-        )
-        return {"classes": [], "methods": []}, estimated_cost
-
-    with get_openai_callback() as cb:
-        result_string = teller_of_tales(
-            {"code": short_doc.page_content, "code_elements": code_elements}
-        )
-        cost = cb.total_cost
-
-    json_answer = convert_to_json(result_string)
-    if not json_answer:
-        print("Returning empty JSON due to a failure")
-        json_answer = {"classes": [], "methods": []}
-    return json_answer, cost
-
-
 def _add_escape_characters(invalid_json):
     control_char_pattern = re.compile(r"[\x00-\x1F\x7F-\x9F]")
     unescaped_chars = control_char_pattern.findall(invalid_json)
@@ -289,50 +295,44 @@ def _add_escape_characters(invalid_json):
     return invalid_json
 
 
-def _should_ignore(path, gitignore_patterns):
-    path = Path(path)
-    for pattern in gitignore_patterns:
-        if path.match(pattern) or any(p.match(pattern) for p in path.parents):
-            return True
-    return False
+def _process_extracted_code_element(text: str):
+    """It converts GPT text output into a dictionary of code elements"""
+    classes_match = re.search(r"classes=(\[.*?\])", text)
+    methods_match = re.search(r"methods=(\[.*?\])", text)
+    summary_match = re.search(r'summary="([^"]*)"', text)
 
+    classes = []
+    methods = []
+    summary = ""
 
-def build_project_tree(root_dir, indent="", gitignore_patterns=None):
-    if gitignore_patterns is None:
-        gitignore_patterns = []
+    if classes_match:
+        classes_str = classes_match.group(1)
+        classes = re.findall(r'"(.*?)"', classes_str)
 
-    tree = ""
-    items = [item for item in os.listdir(root_dir) if not item.startswith(".")]
-    file_paths = []
+    if methods_match:
+        methods_str = methods_match.group(1)
+        methods = re.findall(r'"(.*?)"', methods_str)
 
-    for item in sorted(items):
-        item_path = os.path.join(root_dir, item)
-        if _should_ignore(item_path, gitignore_patterns):
-            continue
-        if os.path.isdir(item_path):
-            tree += indent + "├── " + item + "\n"
-            subtree, subfile_paths = build_project_tree(
-                item_path, indent + "│   ", gitignore_patterns
-            )
-            tree += subtree
-            file_paths.extend(subfile_paths)
-        else:
-            tree += indent + "└── " + item + "\n"
-            file_paths.append(item_path)
+    if summary_match:
+        summary = summary_match.group(1)
 
-    return tree, file_paths
+    return {"classes": classes, "methods": methods, "summary": summary}
 
 
-def fuse_documentation(code, tale, file_ext, save_path):
-    if file_ext == ".py":
-        aggregator = PythonAggregator()
-    elif file_ext == ".php":
-        aggregator = PHPAggregator()
-    elif file_ext == ".go":
-        aggregator = GoAggregator()
-    elif file_ext == ".js" or file_ext == ".ts" or file_ext == ".tsx":
-        aggregator = JavascriptAggregator()
+def _is_hallucination(code_definition, code, expected_definitions):
+    # Verify that the code_definition is expected
+    if code_definition not in expected_definitions:
+        return True
 
-    fused_tale = aggregator.document(code=code, documentation=tale)
-    with open(save_path, "w") as file:
-        file.write(fused_tale)
+    # Check if the code_definition exists within the code
+    if not re.search(r"\b" + re.escape(code_definition) + r"\b", code):
+        return True
+    return False
+
+
+def _should_ignore(path, gitignore_patterns):
+    path = Path(path)
+    for pattern in gitignore_patterns:
+        if path.match(pattern) or any(p.match(pattern) for p in path.parents):
+            return True
+    return False

From 3250becb5d84797f3d10ab6e9226b733b7d95c19 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Fri, 29 Sep 2023 18:58:14 -0600
Subject: [PATCH 08/11] update javascript aggregator

---
 devtale/aggregators/javascript.py | 38 +++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/devtale/aggregators/javascript.py b/devtale/aggregators/javascript.py
index 3ad8878..8e25ac6 100644
--- a/devtale/aggregators/javascript.py
+++ b/devtale/aggregators/javascript.py
@@ -11,6 +11,7 @@ def document(self, documentation, code):
         documented_code = self._add_docstrings(
             documentation, documented_code, type="methods"
         )
+        documented_code = self._add_tsx_docstrings(documentation, documented_code)
         documented_code = self._add_docstrings(
             documentation, documented_code, type="classes"
         )
@@ -49,6 +50,41 @@ def _add_docstrings(self, documentation, code, type="methods"):
             for i, line in enumerate(lines):
                 if re.findall(pattern, line, re.MULTILINE):
                     if previous_line:
+                        # Check if the function or class is already documented
+                        if "*/" not in previous_line and "//" not in previous_line:
+                            indentation = self._extract_indentation(line)
+                            fixed_docstring = self._break_large_strings(docstring)
+                            fixed_docstring = self._format_docstring(
+                                fixed_docstring, indentation
+                            )
+                            lines.insert(i, fixed_docstring)
+                            break
+                elif line.strip():
+                    previous_line = line
+
+        return "\n".join(lines)
+
+    def _add_tsx_docstrings(self, documentation, code):
+        entities = documentation["methods"]
+        lines = code.splitlines()
+        previous_line = None
+
+        for entity in entities:
+            name_to_search = entity["method_name"]
+            docstring = entity["method_docstring"]
+
+            pattern = (
+                r""
+                + re.escape(name_to_search)
+                + "\s*=\s*(\(\s*\)\s*=>\s*{|\(\s*([^)]*)\s*\)\s*=>)|"
+                + re.escape(name_to_search)
+                + r"\(\)\s*=>\s*{\)"
+            )
+
+            for i, line in enumerate(lines):
+                if re.findall(pattern, line, re.MULTILINE):
+                    if previous_line:
+                        # Check if the function or class is already documented
                         if "*/" not in previous_line and "//" not in previous_line:
                             indentation = self._extract_indentation(line)
                             fixed_docstring = self._break_large_strings(docstring)
@@ -74,6 +110,7 @@ def _extract_indentation(self, code_line):
         return indentation
 
     def _format_docstring(self, docstring, indentation):
+        """It adds the in-line comment key character."""
         lines = docstring.split("\n")
         js_docstring = "\n" + " " * indentation + "/*\n"
         for line in lines:
@@ -84,6 +121,7 @@ def _format_docstring(self, docstring, indentation):
     def _document_file(self, documentation, code):
         file_description = self._break_large_strings(documentation["file_docstring"])
         words = code.split()
+        # Check if the file already has a top-file docstring
         if words[0] != "//" and words[0] != "/*" and not words[0].startswith("/*"):
             code = "/*" + file_description + "*/\n" + code
 

From 3539bb11a893aa585889d1e0bb13830bceeefdc8 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Sat, 30 Sep 2023 19:26:01 -0600
Subject: [PATCH 09/11] update go

---
 devtale/aggregators/go.py         | 4 ++++
 devtale/aggregators/javascript.py | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/devtale/aggregators/go.py b/devtale/aggregators/go.py
index 066baad..ea53103 100644
--- a/devtale/aggregators/go.py
+++ b/devtale/aggregators/go.py
@@ -65,6 +65,9 @@ def _add_docstrings(self, documentation, code, type="method"):
         return documented_code
 
     def _break_large_strings(self, string, max_lenght=90):
+        """Avoid very long in-line comments by breaking them into smaller
+        segments with a maximum length.
+        """
         words = string.replace("\\n", " \n ").split()
         lines = []
         current_line = ""
@@ -82,6 +85,7 @@ def _break_large_strings(self, string, max_lenght=90):
         return "\n".join(["// " + line for line in lines])
 
     def _document_file(self, documentation, code):
+        """Add a top-level docstring if there isn't one already."""
         file_description = self._break_large_strings(documentation["file_docstring"])
         words = code.split()
         if words[0] != "//" and words[0] != "/*":
diff --git a/devtale/aggregators/javascript.py b/devtale/aggregators/javascript.py
index 8e25ac6..a8c0172 100644
--- a/devtale/aggregators/javascript.py
+++ b/devtale/aggregators/javascript.py
@@ -110,7 +110,7 @@ def _extract_indentation(self, code_line):
         return indentation
 
     def _format_docstring(self, docstring, indentation):
-        """It adds the in-line comment key character."""
+        """Add the in-line comment character key"""
         lines = docstring.split("\n")
         js_docstring = "\n" + " " * indentation + "/*\n"
         for line in lines:
@@ -119,6 +119,7 @@ def _format_docstring(self, docstring, indentation):
         return js_docstring
 
     def _document_file(self, documentation, code):
+        """Add a top-level docstring if there isn't one already."""
         file_description = self._break_large_strings(documentation["file_docstring"])
         words = code.split()
         # Check if the file already has a top-file docstring
@@ -128,6 +129,9 @@ def _document_file(self, documentation, code):
         return code
 
     def _break_large_strings(self, string, max_lenght=90):
+        """Avoid very long in-line comments by breaking them into smaller
+        segments with a maximum length.
+        """
         words = string.replace("\\n", " \n ").split()
         lines = []
         current_line = ""

From 3580f74619c6d0eaeb34dbecdad37ba7deb89504 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Sat, 30 Sep 2023 19:34:43 -0600
Subject: [PATCH 10/11] update php

---
 devtale/aggregators/php.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/devtale/aggregators/php.py b/devtale/aggregators/php.py
index 4df7c4e..5961578 100644
--- a/devtale/aggregators/php.py
+++ b/devtale/aggregators/php.py
@@ -83,6 +83,7 @@ def _document_classes(self, documentation, code):
         return code
 
     def _format_docstring(self, docstring, indentation):
+        """Add the in-line comment character key"""
         lines = docstring.split("\n")
         php_docstring = "\n" + " " * indentation + "/**\n"
         for line in lines:
@@ -109,6 +110,9 @@ def _extract_indentation(self, text, code_line):
         return indentation
 
     def _break_large_strings(self, string, max_lenght=90):
+        """Avoid very long in-line comments by breaking them into smaller
+        segments with a maximum length.
+        """
         words = string.split()
         lines = []
         current_line = ""

From 0716b5879b23d82c8c09e97ba30f85dbce565963 Mon Sep 17 00:00:00 2001
From: Alberto Gaona <albertoo_3c@hotmail.com>
Date: Sat, 30 Sep 2023 20:39:19 -0600
Subject: [PATCH 11/11] update python

---
 devtale/aggregators/python.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/devtale/aggregators/python.py b/devtale/aggregators/python.py
index 0f31089..a644b19 100644
--- a/devtale/aggregators/python.py
+++ b/devtale/aggregators/python.py
@@ -29,19 +29,26 @@ def document(self, documentation, code):
         code_definitions = self._get_code_definitions(code_w_placeholders)
         documented_code = code
 
+        # For each function/method or class definition we found using AST, we match
+        # it with the dev tale info.
         for name, definition in code_definitions.items():
             splited_definition = definition.split()
 
-            prefix = splited_definition[0]
-            postfix = splited_definition[-1]
+            prefix = splited_definition[0]  # def, class
+            postfix = splited_definition[-1]  # last text Eg. "->None", "):", etc
 
             type_item = "method" if prefix == "def" else "class"
-            if len(splited_definition) == 2:
+            # Extract only the last character if we have conflicting text that won't
+            # allow us to match the pattern.
+            if len(splited_definition) == 2 or "'" in postfix or '"' in postfix:
                 postfix = postfix[-1]
+
             pattern = r"" + prefix + "\s+" + name + "[\s\S]*?" + re.escape(postfix)
 
             docstring = self._get_docstring(type_item, name, documentation)
-            docstring = self._fix_docstring(docstring)
+
+            # docstring = self._fix_docstring(docstring)
+            docstring = self._break_large_strings(docstring)
             comment = f'\n"""{docstring}"""'
             match = re.findall(pattern, documented_code)
 
@@ -69,6 +76,7 @@ def document(self, documentation, code):
         return documented_code
 
     def _add_file_level_docstring(self, code: str, documentation):
+        """Add a top-level docstring if there isn't one already."""
         file_description = self._break_large_strings(documentation["file_docstring"])
         docstring = f'"""{file_description}\n"""\n'
 
@@ -79,6 +87,11 @@ def _add_file_level_docstring(self, code: str, documentation):
         return code
 
     def _add_placeholders(self, code: str):
+        """AST is capable of adding docstrings to the code; however, it reformats
+        the file. To avoid this, we add a placeholder that we later search for in
+        the process. This helps us determine the location where the docstring
+        should be attached.
+        """
         code_tree = ast.parse(code)
         placeholder_adder = Placeholder()
         modified_ast = placeholder_adder.visit(code_tree)
@@ -87,6 +100,9 @@ def _add_placeholders(self, code: str):
         return modified_code
 
     def _get_code_definitions(self, code_w_placeholders):
+        """Search for the placeholder we added and extract the function/method or
+        class signature.
+        """
         code_definitions = {}
         lines = code_w_placeholders.splitlines()
 
@@ -139,6 +155,9 @@ def _extract_indentation(self, text, code_line):
         return indentation_size
 
     def _break_large_strings(self, string, max_lenght=90):
+        """Avoid very long in-line comments by breaking them into smaller
+        segments with a maximum length.
+        """
         words = string.split()
         lines = []
         current_line = ""
@@ -153,7 +172,7 @@ def _break_large_strings(self, string, max_lenght=90):
         if current_line:
             lines.append(current_line)
 
-        return "\n".join(lines)
+        return "\n".join([line for line in lines])
 
     def _fix_docstring(self, docstring):
         pattern = r"^(.*?)(?=Args:|Returns:|$)"