Merge pull request #12 from ML-Dev-Hub/cli_python

Cli python
ML-Dev-Hub · Mar 24, 2023 · 844f169 · 844f169
2 parents 4718c4c + 41efe8e
commit 844f169
Show file tree

Hide file tree

Showing 6 changed files with 135 additions and 46 deletions.
diff --git a/__main__.py b/__main__.py
diff --git a/cli.py b/cli.py
diff --git a/csv_trans/cli.py b/csv_trans/cli.py
@@ -0,0 +1,28 @@
+from argparse import ArgumentParser
+from csv_trans.translate import translate
+
+def parse_arguments_from_cli(parser : ArgumentParser):
+    '''
+    Parse the arguments from the command line
+
+    ----------------
+        Parameters:
+            parser: ArgumentParser
+                The parser to parse the arguments from the command line
+    ----------------
+        Returns:
+            parser: ArgumentParser
+                The parser with the arguments parsed from the command line
+    '''
+
+    parser.add_argument('-f', '--file_path', type=str, required=True, help='file path')
+    parser.add_argument('-fs', '--file_separator', type=str, default=',', required=False, help='file separator')
+    parser.add_argument('-sl', '--source_language', type=str, required=True, help='source language')
+    parser.add_argument('-tl', '--target_language', type=str, required=True, help='target language')
+    return parser
+
+def main():
+    parser = ArgumentParser()
+    parser = parse_arguments_from_cli(parser)
+    args = parser.parse_args()
+    translate(args.file_path, args.source_language, args.target_language, args.file_separator)
diff --git a/csv_trans/translate.py b/csv_trans/translate.py
@@ -9,7 +9,22 @@
 def translate(file: str, source_lang: str, target_lang: str, sep: str = ',') -> None:
     """
     Translates the data in the file to the target language and saves the result
+
+    ----------------
+        Parameters:
+            file: str
+                The path to the file
+            source_lang: str
+                The source language of the data
+            target_lang: str
+                The target language to translate the data to
+            sep: str
+                The separator of the file
+    ----------------
+        Returns:
+            None                
     """
+
     encoding_scheme = detect_encoding_scheme(file)
     # read the data from the csv file
     data = read_csv_file(file, encoding_scheme, sep)
@@ -27,7 +42,7 @@ def translate(file: str, source_lang: str, target_lang: str, sep: str = ',') ->
         pbar.update()
 
     # save the data to the csv file
-    save_csv_file(data, file, encoding_scheme)
+    save_csv_file(data, file, encoding_scheme, target_lang)
 
 
 # make file_separator optional

diff --git a/csv_trans/utils.py b/csv_trans/utils.py
@@ -14,7 +14,18 @@
 
 
 def detect_encoding_scheme(file_path):
-    """Detect encoding scheme of a CSV file"""
+    """
+    Detect encoding scheme of a CSV file
+
+    --------------------
+    Parameters:
+        file_path: str
+            The path to the file
+    --------------------
+    Returns:
+        encoding_scheme: str
+            The encoding scheme of the file
+    """
     try:
         with open(file_path, 'rb') as f:
             rawdata = f.read(200)
@@ -28,6 +39,15 @@ def detect_encoding_scheme(file_path):
 def validate_dataframe(df):
     """
     Check if the data is a valid dataframe and not empty.
+
+    --------------------
+    Parameters:
+        df: pandas.DataFrame
+            The dataframe to be validated
+    --------------------
+    Returns:
+        bool
+            True if the dataframe is valid and not empty, False otherwise
     """
     if isinstance(df, pd.DataFrame) and not df.empty:
         return True
@@ -38,13 +58,23 @@ def translate_text(texts, target_language, source_language='en', chunk_size=4000
     """
     Translate the text into the target language using Google Translator API
 
-    :param texts: List of texts to be translated
-    :param source_language: The language of the input text
-    :param target_language: The target language to translate the text to
-    :param chunk_size: The size of chunk to split the input text
-    :param timeout: Timeout length for the request
-    
-    :return:
+    --------------------
+    Parameters:
+        texts: list
+            List of texts to be translated
+        source_language: str
+            The language of the input text
+        target_language: str
+            The target language to translate the text to
+        chunk_size: int
+            The size of chunk to split the input text
+        timeout: int
+            Timeout length for the request
+
+    --------------------
+    Returns:
+        translations: list
+            The list of translated texts
     """
     translations = []
 
@@ -83,9 +113,17 @@ def translate_text(texts, target_language, source_language='en', chunk_size=4000
 def split_text_data(text, chunk_size):
     """
     Split the input data/text into a fixed chunk size
-    
-    :param text: the input data to be split
-    :param chunk_size: The chunk size to split the data
+
+    ----------------
+        Parameters:
+            text: str
+                The input data to be split
+            chunk_size: int
+                The chunk size to split the data
+    ----------------
+        Returns:
+            chunks: list
+                The list of chunks
     """
     chunks = []
     start = 0
@@ -112,6 +150,19 @@ def split_text_data(text, chunk_size):
 def translate_dataframe(df, source_language, target_language):
     """
     Translate a given pandas DataFrame to a desired language
+
+    ----------------
+        Parameters:
+            df: pandas.DataFrame
+                The dataframe to be translated
+            source_language: str
+                The language of the input text
+            target_language: str
+                The target language to translate the text to
+    ----------------
+        Returns:
+            result_df: pandas.DataFrame
+                The translated dataframe
     """
     # Determine the number of threads to use based on the number of available CPU cores
     num_threads = min(cpu_count(), len(df.columns))
@@ -132,11 +183,21 @@ def translate_dataframe(df, source_language, target_language):
 def read_csv_file(file_path, encoding_scheme, separator=','):
     """
     Read a CSV file using the given encoding scheme and delimiter
-    
-    :param file_path: the path to the input file
-    :param encoding_scheme: the encoding to use when reading the file
-    :param separator: the delimiter to use when reading the CSV file
+
+    ----------------
+        Parameters:
+            file_path: str
+                The path to the input file
+            encoding_scheme: str
+                The encoding to use when reading the file
+            separator: str
+                The delimiter to use when reading the CSV file
+    ----------------
+        Returns:
+            data: pandas.DataFrame
+                The DataFrame containing the data from the CSV file
     """
+
     try:
         data = pd.read_csv(file_path, encoding=encoding_scheme, sep=separator, engine='pyarrow')
         return data
@@ -145,20 +206,27 @@ def read_csv_file(file_path, encoding_scheme, separator=','):
         return None
 
 
-def save_csv_file(df, file_path, encoding_scheme):
+def save_csv_file(df, file_path, encoding_scheme, target_language):
     """
     Save a pandas DataFrame to a CSV file
-    
-    :param df: the DataFrame to be saved as CSV
-    :param file_path: the full path including the filename of the output file
-    :param encoding_scheme: the encoding scheme to use when saving the CSV file
+
+    ----------------
+    Parameters:
+        df: pandas.DataFrame
+            The DataFrame to be saved as CSV
+        file_path: str
+            The full path including the filename of the output file
+        encoding_scheme: str
+            The encoding scheme to use when saving the CSV file
+        target_language: str
+            The target language of the translated text
     """
 
     path = os.path.dirname(file_path)
     file_name = os.path.basename(file_path)
     try:
-        df.to_csv(os.path.join(path, "translated_" + file_name), encoding=encoding_scheme, index=False)
+        df.to_csv(os.path.join(path, "translated_" + target_language + '_' + file_name), encoding=encoding_scheme, index=False)
     except UnicodeEncodeError:
-        df.to_csv(os.path.join(path, "translated_" + file_name), encoding='utf-8', index=False)
+        df.to_csv(os.path.join(path, "translated_" + target_language + '_' + file_name), encoding='utf-8', index=False)
     except Exception as e:
         print(f"Error saving file {file_name}: {e}")
diff --git a/setup.py b/setup.py
@@ -92,7 +92,7 @@
         ],
     entry_points={
         "console_scripts": [
-            "csv_trans = csv_trans.__main__:cli"
+            "csv_trans = csv_trans.cli:main"
         ]
     },
     keywords=['python', 'csv', 'translate', 'translator', 'google',