Skip to content

Commit

Permalink
Introduced support for opening/parsing files with a fallback encoding…
Browse files Browse the repository at this point in the history
… mechanism.

Supported Encodings: ISO-8859-1, UTF-8 (default).
* Encodings are configurable and can be extended by modifying the fallback_order parameter in the readfile_FallbackEncoding helper function located in /utils/file_utils.py.
* The ability to configure or set the file encoding will later be moved to a configuration file, enabling users to make changes without altering the code files.

Improved Console Output:
* Long file paths displayed during scanning are now redacted or partially obscured to prevent unnecessary wrapping and reduce excessive screen scrolling.
  • Loading branch information
coffeeandsecurity committed Nov 11, 2024
1 parent 4df60b5 commit 5032ca8
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 144 deletions.
147 changes: 4 additions & 143 deletions core/parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
import sys
import json
import ast

import xml.etree.ElementTree as ET
from pathlib import Path
from timeit import default_timer as timer
Expand Down Expand Up @@ -90,7 +90,7 @@ def sourceParser(rule_input, targetfile, outputfile):
iCnt += 1

if str(state.verbosity) == '1':
if len(filepath) > 60:
if len(filepath) > 50:
print('\t Parsing file: ' + "["+str(iCnt)+"] "+ futils.getShortPath(filepath), end='\r')
else:
print('\t Parsing file: ' + "["+str(iCnt)+"] "+ filepath, end='\r')
Expand All @@ -101,7 +101,8 @@ def sourceParser(rule_input, targetfile, outputfile):
#sys.stdout.write("\033[F\033[K") # move the cursor up one line and clear line to prevent overlap of texts

try:
with open(filepath, 'r', encoding='ISO-8859-1') as fo_target:
#with open(filepath, 'r', encoding='ISO-8859-1') as fo_target:
with futils.readfile_FallbackEncoding(filepath) as fo_target:
linecount = 0
flag_fpath = False

Expand Down Expand Up @@ -163,146 +164,6 @@ def sourceParser(rule_input, targetfile, outputfile):
return matched_rules, unmatched_rules


'''
def sourceParser(rule_input, targetfile, outputfile):
"""
Parses rules from XML files and applies them to target files.
Supports both individual Path and dictionary of Paths as input.
Parameters:
rule_input (dict or Path): Rule file paths or a single Path to an XML file.
targetfile (str): File containing paths of target source files.
outputfile (str or file object): File path or open file object to write scan results.
Returns:
tuple: (matched_rules, unmatched_rules) - Lists of rule titles for matched and unmatched patterns.
"""
# If outputfile is a string (path), open the file for writing
if isinstance(outputfile, str):
f_scanout = open(outputfile, "a")
else:
# Otherwise, assume it's already an open file object
f_scanout = outputfile
# Determine if input is a dict or a single Path
if isinstance(rule_input, dict):
rule_paths = rule_input.values()
elif isinstance(rule_input, Path):
rule_paths = [rule_input]
else:
raise TypeError(f"Expected a dict or Path, but got {type(rule_input)}")
iCnt = 0
rule_no = 0
error_count = 0
unmatched_rules = [] # Store unmatched patterns
matched_rules = [] # Store matched patterns
# Process each rule path (which corresponds to a platform like PHP, JAVA, etc.)
for rule_path in rule_paths:
platform_name = rule_path.stem.upper() # Extract platform name from the XML file (e.g., PHP, JAVA)
# Reset rule count for each platform
rule_no = 0
# Write platform heading
f_scanout.write(f"\n-- {platform_name} Findings ---\n")
# Parse the XML rule file
xmltree = ET.parse(rule_path)
root = xmltree.getroot()
# Loop through categories and rules in the XML file
for category in root:
category_name = category.get('name')
if category_name:
print(f" [-] Category: {category_name}")
# Process each rule in the category
for rule in category:
rule_title = rule.find("name").text
pattern = rule.find("regex").text
rule_desc = rule.find("rule_desc").text
vuln_desc = rule.find("vuln_desc").text
dev_note = rule.find("developer").text
rev_note = rule.find("reviewer").text
exclude = rule.find("exclude").text if rule.find("exclude") is not None else ""
flag_title_desc = False
print(f" [-] Applying Rule: {rule_title}")
# Process each target file
for eachfilepath in targetfile:
filepath = eachfilepath.rstrip()
iCnt += 1
# Print the file being processed
print(f"\t Parsing file: [{iCnt}] {filepath}", end='\r')
try:
with open(filepath, 'r', encoding='ISO-8859-1') as fo_target:
linecount = 0
flag_fpath = False
# Scan each line of the target file
for line in fo_target:
linecount += 1
if len(line) > 500:
continue # Skip overly long lines
# Apply regex matching
if re.findall(pattern, line):
if exclude and re.search(exclude, line, re.IGNORECASE):
continue # Skip if exclude rule matches
line = (line[:75] + '..') if len(line) > 300 else line
if not flag_title_desc:
# Increment rule_no and write rule details to the output file
rule_no += 1
matched_rules.append(rule_title)
f_scanout.write(
f"\n{platform_name}-{rule_no}. Rule Title: {rule_title}\n"
f"\n\t Rule Description : {rule_desc}"
f"\n\t Issue Description : {vuln_desc}"
f"\n\t Developer Note : {dev_note}"
f"\n\t Reviewer Note : {rev_note} \n"
)
flag_title_desc = True
if not flag_fpath:
flag_fpath = True
f_scanout.write(
f"\n\t -> Source File: {filepath}\n"
f"\t\t [{linecount}] {line}"
)
else:
f_scanout.write(f"\t\t [{linecount}] {line}")
if rule_title not in matched_rules:
unmatched_rules.append(rule_title)
except (FileNotFoundError, PermissionError, UnicodeError) as e:
print(f"Error processing {filepath}: {e}")
error_count += 1
# Remove duplicates from the matched and unmatched rules lists
matched_rules = list(set(matched_rules))
unmatched_rules = list(set(unmatched_rules))
# Update error count in the state object
state.parseErrorCnt += error_count
# Close the file if we opened it
if isinstance(outputfile, str):
f_scanout.close()
return matched_rules, unmatched_rules
'''




Expand Down
29 changes: 28 additions & 1 deletion utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,29 @@ def detectEncodingType(targetfile):
return result['encoding']


def readfile_FallbackEncoding(filepath, fallback_order=("ISO-8859-1", "utf-8")):
"""
Opens a file with specified encodings in fallback order.
Parameters:
filepath (str): The path to the file to open.
fallback_order (tuple): Encodings to try in order.
Returns:
file object: The file object opened with the first successful encoding.
Raises:
IOError: If all encodings fail.
"""
for encoding in fallback_order:
try:
return open(filepath, 'r', encoding=encoding)
except (UnicodeDecodeError, ValueError):
continue
raise IOError(f"Could not open file {filepath} with any of the specified encodings: {fallback_order}")




def getRelativePath(fpath):
"""
Expand Down Expand Up @@ -108,10 +131,14 @@ def getSourceFilePath(project_dir, source_file):


def getShortPath(file_path):

short_file_path = getSourceFilePath(runtime.sourcedir, file_path)

directory, filename = os.path.split(file_path)
# Check if the filename length including extension is greater than 20 characters
if len(filename) > 40:
base, ext = os.path.splitext(filename)
filename = f"[FILENAME-TOO-LONG]{ext}" # Updated name

shortened = '..[SHORTENED]..'
return f"{os.sep}{directory.split(os.sep)[1]}{os.sep}{shortened}{os.sep}{filename}"

Expand Down

0 comments on commit 5032ca8

Please sign in to comment.