Introduced support for opening/parsing files with a fallback encoding…

… mechanism. Supported Encodings: ISO-8859-1, UTF-8 (default). * Encodings are configurable and can be extended by modifying the fallback_order parameter in the readfile_FallbackEncoding helper function located in /utils/file_utils.py. * The ability to configure or set the file encoding will later be moved to a configuration file, enabling users to make changes without altering the code files. Improved Console Output: * Long file paths displayed during scanning are now redacted or partially obscured to prevent unnecessary wrapping and reduce excessive screen scrolling.
coffeeandsecurity · Nov 11, 2024 · 5032ca8 · 5032ca8
1 parent 4df60b5
commit 5032ca8
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 144 deletions.
diff --git a/core/parser.py b/core/parser.py
@@ -1,7 +1,7 @@
 import re
 import sys
 import json
-import ast
+
 import xml.etree.ElementTree as ET
 from pathlib import Path 
 from timeit import default_timer as timer
@@ -90,7 +90,7 @@ def sourceParser(rule_input, targetfile, outputfile):
                     iCnt += 1
 
                     if str(state.verbosity) == '1':
-                        if len(filepath) > 60:
+                        if len(filepath) > 50:
                             print('\t Parsing file: ' + "["+str(iCnt)+"] "+ futils.getShortPath(filepath), end='\r')
                         else:
                             print('\t Parsing file: ' + "["+str(iCnt)+"] "+ filepath, end='\r')
@@ -101,7 +101,8 @@ def sourceParser(rule_input, targetfile, outputfile):
                     #sys.stdout.write("\033[F\033[K")  # move the cursor up one line and clear line to prevent overlap of texts
 
                     try:
-                        with open(filepath, 'r', encoding='ISO-8859-1') as fo_target:
+                        #with open(filepath, 'r', encoding='ISO-8859-1') as fo_target:
+                        with futils.readfile_FallbackEncoding(filepath) as fo_target:
                             linecount = 0
                             flag_fpath = False
 
@@ -163,146 +164,6 @@ def sourceParser(rule_input, targetfile, outputfile):
     return matched_rules, unmatched_rules
 
 
-'''
-def sourceParser(rule_input, targetfile, outputfile):
-    """
-    Parses rules from XML files and applies them to target files.
-    Supports both individual Path and dictionary of Paths as input.
-
-    Parameters:
-        rule_input (dict or Path): Rule file paths or a single Path to an XML file.
-        targetfile (str): File containing paths of target source files.
-        outputfile (str or file object): File path or open file object to write scan results.
-
-    Returns:
-        tuple: (matched_rules, unmatched_rules) - Lists of rule titles for matched and unmatched patterns.
-    """
-    
-    # If outputfile is a string (path), open the file for writing
-    if isinstance(outputfile, str):
-        f_scanout = open(outputfile, "a")
-    else:
-        # Otherwise, assume it's already an open file object
-        f_scanout = outputfile
-
-    # Determine if input is a dict or a single Path
-    if isinstance(rule_input, dict):
-        rule_paths = rule_input.values()
-    elif isinstance(rule_input, Path):
-        rule_paths = [rule_input]
-    else:
-        raise TypeError(f"Expected a dict or Path, but got {type(rule_input)}")
-
-    iCnt = 0
-    rule_no = 0
-    error_count = 0
-    unmatched_rules = []  # Store unmatched patterns
-    matched_rules = []  # Store matched patterns
-
-    # Process each rule path (which corresponds to a platform like PHP, JAVA, etc.)
-    for rule_path in rule_paths:
-        platform_name = rule_path.stem.upper()  # Extract platform name from the XML file (e.g., PHP, JAVA)
-        
-        # Reset rule count for each platform
-        rule_no = 0
-        
-        # Write platform heading
-        f_scanout.write(f"\n-- {platform_name} Findings ---\n")
-
-        # Parse the XML rule file
-        xmltree = ET.parse(rule_path)
-        root = xmltree.getroot()
-
-        # Loop through categories and rules in the XML file
-        for category in root:
-            category_name = category.get('name')
-            if category_name:
-                print(f"     [-] Category: {category_name}")
-
-            # Process each rule in the category
-            for rule in category:
-                rule_title = rule.find("name").text
-                pattern = rule.find("regex").text
-                rule_desc = rule.find("rule_desc").text
-                vuln_desc = rule.find("vuln_desc").text
-                dev_note = rule.find("developer").text
-                rev_note = rule.find("reviewer").text
-
-                exclude = rule.find("exclude").text if rule.find("exclude") is not None else ""
-                flag_title_desc = False
-
-                print(f"         [-] Applying Rule: {rule_title}")
-
-                # Process each target file
-                for eachfilepath in targetfile:
-                    filepath = eachfilepath.rstrip()
-                    iCnt += 1
-
-                    # Print the file being processed
-                    print(f"\t Parsing file: [{iCnt}] {filepath}", end='\r')
-
-                    try:
-                        with open(filepath, 'r', encoding='ISO-8859-1') as fo_target:
-                            linecount = 0
-                            flag_fpath = False
-
-                            # Scan each line of the target file
-                            for line in fo_target:
-                                linecount += 1
-                                if len(line) > 500:
-                                    continue  # Skip overly long lines
-
-                                # Apply regex matching
-                                if re.findall(pattern, line):
-                                    if exclude and re.search(exclude, line, re.IGNORECASE):
-                                        continue  # Skip if exclude rule matches
-
-                                    line = (line[:75] + '..') if len(line) > 300 else line
-
-                                    if not flag_title_desc:
-                                        # Increment rule_no and write rule details to the output file
-                                        rule_no += 1
-                                        matched_rules.append(rule_title)
-
-                                        f_scanout.write(
-                                            f"\n{platform_name}-{rule_no}. Rule Title: {rule_title}\n"
-                                            f"\n\t Rule Description  : {rule_desc}"
-                                            f"\n\t Issue Description : {vuln_desc}"
-                                            f"\n\t Developer Note    : {dev_note}"
-                                            f"\n\t Reviewer Note     : {rev_note} \n"
-                                        )
-                                        flag_title_desc = True
-
-                                    if not flag_fpath:
-                                        flag_fpath = True
-                                        f_scanout.write(
-                                            f"\n\t -> Source File: {filepath}\n"
-                                            f"\t\t [{linecount}] {line}"
-                                        )
-                                    else:
-                                        f_scanout.write(f"\t\t [{linecount}] {line}")
-
-                            if rule_title not in matched_rules:
-                                unmatched_rules.append(rule_title)
-
-                    except (FileNotFoundError, PermissionError, UnicodeError) as e:
-                        print(f"Error processing {filepath}: {e}")
-                        error_count += 1
-
-        # Remove duplicates from the matched and unmatched rules lists
-        matched_rules = list(set(matched_rules))
-        unmatched_rules = list(set(unmatched_rules))
-
-        # Update error count in the state object
-        state.parseErrorCnt += error_count
-
-    # Close the file if we opened it
-    if isinstance(outputfile, str):
-        f_scanout.close()
-
-    return matched_rules, unmatched_rules
-'''
-
 
 
 

diff --git a/utils/file_utils.py b/utils/file_utils.py
@@ -25,6 +25,29 @@ def detectEncodingType(targetfile):
     return result['encoding']
 
 
+def readfile_FallbackEncoding(filepath, fallback_order=("ISO-8859-1", "utf-8")):
+    """
+    Opens a file with specified encodings in fallback order.
+
+    Parameters:
+        filepath (str): The path to the file to open.
+        fallback_order (tuple): Encodings to try in order.
+
+    Returns:
+        file object: The file object opened with the first successful encoding.
+
+    Raises:
+        IOError: If all encodings fail.
+    """
+    for encoding in fallback_order:
+        try:
+            return open(filepath, 'r', encoding=encoding)
+        except (UnicodeDecodeError, ValueError):
+            continue
+    raise IOError(f"Could not open file {filepath} with any of the specified encodings: {fallback_order}")
+
+
+
 
 def getRelativePath(fpath):
     """
@@ -108,10 +131,14 @@ def getSourceFilePath(project_dir, source_file):
 
 
 def getShortPath(file_path):
-
     short_file_path = getSourceFilePath(runtime.sourcedir, file_path)
 
     directory, filename = os.path.split(file_path)
+    # Check if the filename length including extension is greater than 20 characters
+    if len(filename) > 40:
+        base, ext = os.path.splitext(filename)
+        filename = f"[FILENAME-TOO-LONG]{ext}"  # Updated name
+
     shortened = '..[SHORTENED]..'
     return f"{os.sep}{directory.split(os.sep)[1]}{os.sep}{shortened}{os.sep}{filename}"