From 2fce538994c87fcbf1525db919c1f9c857537f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Wed, 4 Dec 2024 17:36:39 +0100 Subject: [PATCH] feat: added the capability to extract version constants to the FSR and also added a substitution option for software signatures to change the format (e.g. add dots) --- .../ipc/docker/ipc_analyzer/ipc_analyzer.py | 33 ++++++++++++++++++ .../code/software_components.py | 28 +++++++++++++-- .../internal/resolve_version_format_string.py | 7 +--- .../signatures/software.yara | 2 +- .../test/data/version_function_constant.elf | Bin 0 -> 15808 bytes .../test/test_plugin_software_components.py | 1 + .../test_resolve_version_format_string.py | 26 +++++--------- 7 files changed, 70 insertions(+), 27 deletions(-) create mode 100755 src/plugins/analysis/software_components/test/data/version_function_constant.elf diff --git a/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py b/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py index 26cc6def0..4e15902e6 100644 --- a/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py +++ b/src/plugins/analysis/ipc/docker/ipc_analyzer/ipc_analyzer.py @@ -336,6 +336,38 @@ def find_function_ref_strings(function_name): return strings +def find_function_constants(function_name): + """ + Get all constants that are used as operands in the function with name `function_name`. + + :param function_name: The name of the function. + :type function_name: str + :return: a list of int/long constants referenced in the function as strings + :rtype: list[str] + """ + try: + function = getGlobalFunctions(function_name)[0] + except (IndexError, TypeError): + print("Error: Function {} not found.".format(function_name)) + return [] + + constants = [] + if function is not None: + body = function.getBody() + instruction_iterator = currentProgram.getListing().getInstructions(body, True) + + for instruction in instruction_iterator: + for i in range(instruction.getNumOperands()): + for operand in instruction.getOpObjects(i): + try: + value = operand.getValue() + except AttributeError: + continue + if value is not None and isinstance(value, (int, long)): + constants.append(str(value)) + return constants + + def get_fstring_from_functions(ghidra_analysis, key_string, call_args, called_fstrings): """ :param ghidra_analysis: instance of GhidraAnalysis @@ -412,6 +444,7 @@ def find_version_strings(input_data, ghidra_analysis, result_path): print("Error: Function name not found.") return 1 result_list = find_function_ref_strings(function_name) + result_list.extend(find_function_constants(function_name)) else: print("Error: Invalid mode.") return 1 diff --git a/src/plugins/analysis/software_components/code/software_components.py b/src/plugins/analysis/software_components/code/software_components.py index 9c59bc267..363bfbc80 100644 --- a/src/plugins/analysis/software_components/code/software_components.py +++ b/src/plugins/analysis/software_components/code/software_components.py @@ -1,5 +1,7 @@ from __future__ import annotations +import json +import logging import re import string from typing import TYPE_CHECKING @@ -50,9 +52,26 @@ def get_version(self, input_string: str, meta_dict: dict) -> str: pattern = re.compile(regex) version = pattern.search(input_string) if version is not None: - return self._strip_leading_zeroes(version.group(0)) + version_string = version.group(0) + if '_sub_regex' in meta_dict: + version_string = self._convert_version_str(version_string, meta_dict) + else: + version_string = self._strip_leading_zeroes(version_string) + return version_string return '' + def _convert_version_str(self, version_str: str, meta_dict: dict): + """ + The metadata entry "_sub_regex" can be used to change the version string if it does not have the expected + format (e.g. add dots). The entry should contain a regex and replacement for `re.sub()` as JSON string + """ + try: + sub_regex, replacement = json.loads(meta_dict['_sub_regex']) + return re.sub(sub_regex, replacement, version_str) + except json.JSONDecodeError: + logging.warning(f'[{self.NAME}]: signature has invalid substitution regex: {meta_dict}') + return '' + @staticmethod def _get_summary(results: dict) -> list[str]: summary = set() @@ -86,9 +105,12 @@ def get_version_for_component(self, result, file_object: FileObject): 'mode': 'version_function', 'function_name': result['meta']['_version_function'], } - versions.update( - extract_data_from_ghidra(file_object.file_path, input_data, config.backend.docker_mount_base_dir) + ghidra_data = extract_data_from_ghidra( + file_object.file_path, input_data, config.backend.docker_mount_base_dir ) + for version_str in ghidra_data: + if version := self.get_version(version_str, result['meta']): + versions.add(version) if '' in versions and len(versions) > 1: # if there are actual version results, remove the "empty" result versions.remove('') result['meta']['version'] = list(versions) diff --git a/src/plugins/analysis/software_components/internal/resolve_version_format_string.py b/src/plugins/analysis/software_components/internal/resolve_version_format_string.py index bb5f300b6..03ce9a6cc 100644 --- a/src/plugins/analysis/software_components/internal/resolve_version_format_string.py +++ b/src/plugins/analysis/software_components/internal/resolve_version_format_string.py @@ -2,7 +2,6 @@ import json import logging -import re from contextlib import suppress from pathlib import Path from tempfile import TemporaryDirectory @@ -40,11 +39,7 @@ def extract_data_from_ghidra(file_path: str, input_data: dict, path: str) -> lis try: output_file = (tmp_dir_path / DOCKER_OUTPUT_FILE).read_text() - return filter_implausible_results(json.loads(output_file)) + return json.loads(output_file) except (json.JSONDecodeError, FileNotFoundError): logging.debug('[FSR]: output file could not be read') return [] - - -def filter_implausible_results(version_list: list[str]): - return [version for version in version_list if re.search(r'\d\.\d', version)] diff --git a/src/plugins/analysis/software_components/signatures/software.yara b/src/plugins/analysis/software_components/signatures/software.yara index c60637615..746c44850 100644 --- a/src/plugins/analysis/software_components/signatures/software.yara +++ b/src/plugins/analysis/software_components/signatures/software.yara @@ -79,9 +79,9 @@ rule OPKG { open_source = true website = "https://openwrt.org/docs/guide-user/additional-software/opkg" description = "Opkg lightweight embedded package manager" - // the version is not stored as a number; instead a git commit hash and a date is used: [hash] ([YYYY-MM-DD]) version_regex = "[0-9a-z]{40} \\(\\d{4}-\\d{2}-\\d{2}\\)" strings: + // the version is not stored as a number; instead a git commit hash and a date is used: [hash] ([YYYY-MM-DD]) // see https://github.com/openwrt/opkg-lede/blob/38eccbb1fd694d4798ac1baf88f9ba83d1eac616/src/opkg-cl.c#L158 $a = "opkg version %s\n" nocase ascii $b = /[0-9a-z]{40} \(\d{4}-\d{2}-\d{2}\)/ ascii diff --git a/src/plugins/analysis/software_components/test/data/version_function_constant.elf b/src/plugins/analysis/software_components/test/data/version_function_constant.elf new file mode 100755 index 0000000000000000000000000000000000000000..a0b1dbb165c818f2eda6fd129d82b85759d62b49 GIT binary patch literal 15808 zcmeHOU2Ggz6~4Q65{EkWCT)mGLNkJlXd))H8@F098e|7bk;leuCza9 zcb3`}Dn_BDQ5(@91mytvg;YEcQ7Rcxs3U%4KzKOk-t(;| zqcxQZ5)aI^cD{SQ^KSv>3s+ga(HFI5(@k#$a{iX0#+PcL~CeMfOv+v8UlMMX(|L1R|fD6MhX# zKBL$tVSrJ;y!b_NT>OCC2A6Yh`nh-0-hJZNZs{;8ii3t$K(zNs^3!<{{W}4M`K+|J z**K&m|3FEvI$SJdhXxN9bG^kvxxU!DI6TxlG-%f<_7UAQ_sgOe BA&npVkU{2Dg zlFLR$Jl5SNdba=l;&&hW-qx?C-?&`Jer2)h-TT@j9@}ug*@tboa6C*AmbuPtvW>~d zm#8LloA9dlvyvt@xv<}^7P>LwtQNep318lXzm@n8w1}e|hcY_r)O@$<3Xw^(@CQ>^Cv4ylvVQzyopBlZ#dXtO>rsGO;pQ%=sSdh>;v?^S0`jTI|pZ^q3Q zJ&HA7s+1+VV@z8bat0In&x}-pWbQBKOMP!)J7@hK;a^HbjUGJYYsU@GW5{EIay`U{$9*NlV~(-UYKZ6ggR&OldA`IY&On@jI0JD8 z;ta$Yh%*ppAkF}1;7{GR|2=czt&YqSTV6k`ROZo!pA3GUx$sKI6`i2q=rcqI2cM;D z_kpyw4-Wo_q`B?`v)QO$YP#JML(h)yI!_Al-bd0O^4zE5QAgWM-F_BZYo8T%pma~T)t|FRxbSbknMF=B6GzMWa@ zX;tc)=-*>#^|;_If<1z}1p(L_*^vE@+eFbP4WSwLCe_Ul&v0PVr; z>pe11*Ye;YVg5~9oQHOGBZ!)Co)e^bQW9Z_0q+-XWzXY~R9$L8=AoD?OZe|Z_yOTD zuVFt$m4xb1skC7*PQ1H1LElzjo*J!EU!V%Ni}wk5o_`5-tHQho{{yPD>i=f<^@5JG zORZRluz{`O{8-!5ERT%yYuat6YPUB0x%Ml1CDeNr=0CJIsM2cRcNZnZf>8*MX?~AV z=@3cPyNOS!JLh~T=)UvV+BX2KN zAwEUp+mnW+!4cwj(Kx1Mp6KtQNac$?<^#Nz#eP_xbNV|ZQa(yN_a|z9nRpA%A$(5a z9FjQtyDHMZL_D6cA#?Q$wolSiEi#|gx!J1UZ&zq%yPGXIzB{josQLA|IeS)Z?1Fdv zk~7Qu;A>==t2pz;O4cnpIloe^Ic|MX%~nbaMbGzgc3*1?-hp3m+-lWba=fx%T~c#Z zx8ymwda1NTE+NSwOaCU*&wJHcp;D$UInLzS(Nh!7#OZN|HsFune|q%P)ELoplEInC zh&D5RRyil9&zu;YcFs&ro|~9)W=2m;Pmo9Sd_Wq<0N!cqzn61;SLxfJC@h$B03_Ue zD8R|pDo)-l=lHmBZJu{)d1dF8%G88GziKq-o;DM~Azjrgy6m8A9$95C6wSq+uTaJJ7U{~< zgDfjL%eI%736al{KP2OCW`^-Vor1fi!Yp-%2g#-cE1RZdiDp{(ZSbz^y#AoK5IwU^ ziJm9J`a{&(7InrC(u>y!@Zap*6x(vay1+J7x3^LYbz&>d#+JE%n5>+^-@W(n4*bqas zAHDx?lAiAg`sQ<7p66H}OBmf2>Wtr}SJ>YYKcI+B7s$Y8BL01%0KyIdAOoL^_@_ky z?2HT;{1ESjh(G@S1M&F*_R;zKlK3~*CYD$VM(rU3&H89#{IDJZ4oWxx{2>37DtwC= z|6$8$fG--cdBymH_$5`k=!Nw?)`vK67`2BCe1m*>jR1eFN7s6Zqyjr5ZOGu?CI!zu z_&3*ya(=WSPL0+c{ zb7&vytd#gi?d5jhuG)xTPRyG;{O@csrUL&kV(i1F!xHd8=@`F{d5)q#c>ZlC4t)>% gKg}OVs9ZxsM$~&rw(?&%YOJE~zeqUsFTuF9VgLXD literal 0 HcmV?d00001 diff --git a/src/plugins/analysis/software_components/test/test_plugin_software_components.py b/src/plugins/analysis/software_components/test/test_plugin_software_components.py index 439532383..4147a9db3 100644 --- a/src/plugins/analysis/software_components/test/test_plugin_software_components.py +++ b/src/plugins/analysis/software_components/test/test_plugin_software_components.py @@ -51,6 +51,7 @@ def test_process_object(self, analysis_plugin): ('OpenSSL 0.9.8zh', '0.9.8zh', {'version_regex': '\\d\\.\\d\\.\\d[a-z]{0,2}'}), ('Foo v1.2.3', 'v1.2.3', {'version_regex': 'v?\\d\\.\\d\\.\\d'}), ('Bar a.b', 'a.b', {'version_regex': '[a-z]\\.[a-z]'}), + ('524', '5.24', {'version_regex': r'\d{3}', '_sub_regex': '["(\\\\d)(\\\\d{2})", "\\\\1.\\\\2"]'}), ], ) def test_get_version(self, analysis_plugin, version, expected_output, meta_dict): diff --git a/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py b/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py index 0dcfd2af5..78f5d5803 100644 --- a/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py +++ b/src/plugins/analysis/software_components/test/test_resolve_version_format_string.py @@ -4,7 +4,6 @@ from ..internal.resolve_version_format_string import ( extract_data_from_ghidra, - filter_implausible_results, ) @@ -14,29 +13,22 @@ ( 'format_string_arm-linux-gnueabihf', {'mode': 'format_string', 'key_string_list': ['get_version v%s']}, - ['1.2.3'], + '1.2.3', ), ( 'fake-liblzma', {'mode': 'version_function', 'function_name': 'lzma_version_string'}, - ['5.2.1'], + '5.2.1', + ), + ( + 'version_function_constant.elf', + {'mode': 'version_function', 'function_name': 'get_version'}, + '524', ), ], ) def test_extract_data_from_ghidra(backend_config, test_file, input_data, expected_output): test_file = Path(__file__).parent / 'data' / test_file result = extract_data_from_ghidra(str(test_file), input_data, str(backend_config.docker_mount_base_dir)) - assert len(result) == 1 - assert result == expected_output - - -@pytest.mark.parametrize( - ('test_input', 'expected_output'), - [ - ([], []), - (['1.2.3.4', 'foobar'], ['1.2.3.4']), - (['v1.2-r1234'], ['v1.2-r1234']), - ], -) -def test_filter_implausible_results(test_input, expected_output): - assert filter_implausible_results(test_input) == expected_output + assert len(result) >= 1 + assert result[0] == expected_output