Skip to content

Commit

Permalink
Yara python fix (#1330)
Browse files Browse the repository at this point in the history
* fix: install yara-python from source

Why ist this necessary? Plugins like software_components need support for YARA modules and the pre-built version of yara-python from PyPI does not include this.

* refactor: moved get_yara_error method to backend (intercom)

having the method in the frontend does not make a lot of sense, because the yara version of the backend could be different and compiling the rules could still fail
  • Loading branch information
jstucke authored Jan 22, 2025
1 parent ccb8283 commit 57e7c49
Show file tree
Hide file tree
Showing 15 changed files with 118 additions and 40 deletions.
12 changes: 8 additions & 4 deletions src/helperFunctions/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,7 @@ def install_pip_packages(package_file: Path):
"""
for package in read_package_list_from_file(package_file):
try:
command = f'pip3 install -U {package} --prefer-binary' # prefer binary release to compiling latest
if not is_virtualenv():
command = 'sudo -EH ' + command
run_cmd_with_logging(command, silent=True)
install_single_pip_package(package)
except CalledProcessError as error:
# don't fail if a package is already installed using apt and can't be upgraded
if error.stdout is not None and 'distutils installed' in error.stdout:
Expand All @@ -285,6 +282,13 @@ def install_pip_packages(package_file: Path):
raise


def install_single_pip_package(package: str):
command = f'pip3 install -U {package} --prefer-binary' # prefer binary release to compiling latest
if not is_virtualenv():
command = 'sudo -EH ' + command
run_cmd_with_logging(command, silent=True)


def read_package_list_from_file(path: Path):
"""
Reads the file at `path` into a list.
Expand Down
26 changes: 0 additions & 26 deletions src/helperFunctions/yara_binary_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,29 +130,3 @@ def _prepare_temp_rule_file(temp_rule_file: NamedTemporaryFile, yara_rules: byte
compiled_rules = yara.compile(source=yara_rules.decode())
compiled_rules.save(file=temp_rule_file)
temp_rule_file.flush()


def is_valid_yara_rule_file(yara_rules: str | bytes) -> bool:
"""
Check if ``yara_rules`` is a valid set of yara rules.
:param: A string containing yara rules.
:return: ``True`` if the rules are valid and ``False`` otherwise.
"""
return get_yara_error(yara_rules) is None


def get_yara_error(rules_file: str | bytes) -> Exception | None:
"""
Get the exception that is caused by trying to compile ``rules_file`` with yara or ``None`` if there is none.
:param rules_file: A string containing yara rules.
:result: The exception if compiling the rules causes an exception or ``None`` otherwise.
"""
try:
if isinstance(rules_file, bytes):
rules_file = rules_file.decode()
yara.compile(source=rules_file)
return None
except (yara.Error, TypeError, UnicodeDecodeError) as error:
return error
32 changes: 32 additions & 0 deletions src/install/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import os
import stat
import subprocess
import tempfile
from contextlib import suppress
from pathlib import Path
from shlex import split
from subprocess import PIPE, STDOUT

from compile_yara_signatures import main as compile_signatures
Expand All @@ -17,6 +19,7 @@
apt_install_packages,
dnf_install_packages,
install_pip_packages,
install_single_pip_package,
read_package_list_from_file,
)

Expand Down Expand Up @@ -153,6 +156,35 @@ def _install_yara():
cmd_process = subprocess.run(command, shell=True, stdout=PIPE, stderr=STDOUT, text=True, check=False)
if cmd_process.returncode != 0:
raise InstallationError(f'Error in yara installation.\n{cmd_process.stdout}')
_install_yara_python(version=yara_version)


def _install_yara_python(version: str):
"""
yara-python must be installed from source, because the pre-built version from PyPI is missing the magic module
"""
logging.info(f'Installing yara-python {version}')
with tempfile.TemporaryDirectory() as tmp_dir:
file = f'{version}.tar.gz'
url = f'https://github.com/VirusTotal/yara-python/archive/refs/tags/{file}'
with OperateInDirectory(tmp_dir):
wget_process = subprocess.run(split(f'wget "{url}"'), capture_output=True, text=True, check=False)
if wget_process.returncode != 0:
raise InstallationError(f'Error downloading yara-python: {wget_process.stdout}')
subprocess.run(split(f'tar xf {file}'), capture_output=True, text=True, check=True)
Path(file).unlink()
output_paths = [p for p in Path(tmp_dir).iterdir() if p.name.startswith('yara-python')]
if len(output_paths) != 1:
raise InstallationError('Extracting yara-python failed.')
with OperateInDirectory(output_paths[0]):
try:
subprocess.run(split('pip uninstall -y yara-python'), capture_output=True, text=True, check=False)
subprocess.run(
split('python setup.py build --dynamic-linking'), capture_output=True, text=True, check=True
)
install_single_pip_package('.')
except subprocess.CalledProcessError as error:
raise InstallationError('Error during yara-python installation') from error


def _install_checksec():
Expand Down
1 change: 0 additions & 1 deletion src/install/requirements_common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ rich==12.6.0
sqlalchemy~=2.0.30
ssdeep==3.4
xmltodict==0.13.0
yara-python==4.5.0

# Config validation
pydantic==2.4.0
Expand Down
23 changes: 23 additions & 0 deletions src/intercom/back_end_binding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pathlib import Path
from typing import TYPE_CHECKING

import yara

import config
from helperFunctions.process import stop_processes
from helperFunctions.yara_binary_search import YaraBinarySearchScanner
Expand Down Expand Up @@ -56,6 +58,7 @@ def __init__(
InterComBackEndPeekBinaryTask(),
InterComBackEndLogsTask(),
InterComBackEndCancelTask(self._cancel_task),
InterComBackEndCheckYaraRuleTask(),
]

def start(self):
Expand Down Expand Up @@ -224,3 +227,23 @@ def get_response(self, task): # noqa: ARG002
if backend_logs.is_file():
return backend_logs.read_text().splitlines()[-100:]
return []


class InterComBackEndCheckYaraRuleTask(InterComListenerAndResponder):
CONNECTION_TYPE = 'check_yara_rules_task'
OUTGOING_CONNECTION_TYPE = 'check_yara_rules_task_resp'

def get_response(self, task: str | bytes) -> str:
return self._get_yara_error(task)

@staticmethod
def _get_yara_error(rules: str | bytes):
if isinstance(rules, bytes):
rules = rules.decode(errors='ignore')
try:
yara.compile(source=rules)
if len(list(rules)) == 0:
return 'No rules found' # an empty string does not generate an error
return None
except (yara.Error, TypeError, UnicodeDecodeError) as error:
return f'{error.__class__.__name__}: {error}'
3 changes: 3 additions & 0 deletions src/intercom/front_end_binding.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def peek_in_binary(self, uid: str, offset: int, length: int) -> bytes:
def get_repacked_binary_and_file_name(self, uid: str):
return self._request_response_listener(uid, 'tar_repack_task', 'tar_repack_task_resp')

def get_yara_error(self, yara_rule: str | bytes) -> str | None:
return self._request_response_listener(yara_rule, 'check_yara_rules_task', 'check_yara_rules_task_resp')

def add_binary_search_request(self, yara_rule_binary: bytes, firmware_uid: str | None = None):
request_id = generate_task_id(yara_rule_binary)
self._add_to_redis_queue('binary_search_task', (yara_rule_binary, firmware_uid), request_id)
Expand Down
13 changes: 13 additions & 0 deletions src/test/data/yara_magic.yara
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import "magic"

/*
The sole purpose of this rule file is to test if yara-python is installed correctly.
To be more specific, it is tested whether yara is installed with the magic module enabled (which is needed for some
plugins (for more info see https://yara.readthedocs.io/en/stable/modules/magic.html).
If you get an error like e.g. `invalid field name "mime_type"` when compiling these rules, then the module is missing.
*/

rule test_magic_module_is_enabled {
condition:
magic.mime_type() == "text/plain"
}
2 changes: 1 addition & 1 deletion src/test/integration/intercom/test_backend_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from intercom.front_end_binding import InterComFrontEndBinding

# This number must be changed, whenever a listener is added or removed
NUMBER_OF_LISTENERS = 13
NUMBER_OF_LISTENERS = 14


class ServiceMock:
Expand Down
16 changes: 16 additions & 0 deletions src/test/integration/intercom/test_task_communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
InterComBackEndAnalysisTask,
InterComBackEndBinarySearchTask,
InterComBackEndCancelTask,
InterComBackEndCheckYaraRuleTask,
InterComBackEndCompareTask,
InterComBackEndFileDiffTask,
InterComBackEndLogsTask,
Expand Down Expand Up @@ -198,3 +199,18 @@ def test_cancel_task(self, intercom_frontend):
intercom_frontend.cancel_analysis(root_uid)
result = task.get_next_task()
assert result == root_uid

def test_get_yara_error(self, intercom_frontend):
listener = InterComBackEndCheckYaraRuleTask()
invalid_rule = 'rule foobar {}'
intercom_frontend.get_yara_error(invalid_rule)
task = listener.get_next_task()
assert task == invalid_rule
error = listener.get_response(task)
assert 'expecting <condition>' in error

def test_get_yara_error_valid(self, intercom_frontend):
listener = InterComBackEndCheckYaraRuleTask()
valid_rule = 'rule valid {condition: true}'
error = listener.get_response(valid_rule)
assert error is None, 'the rule should be valid and the error should be None'
7 changes: 7 additions & 0 deletions src/test/unit/analysis/test_addons_yara.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,10 @@ def test_output_is_compatible():
assert converted_match['strings'] == EXPECTED_RESULT['strings']
for key, value in EXPECTED_RESULT['meta'].items():
assert converted_match['meta'][key] == value


def test_compile():
test_file = Path(get_src_dir()) / 'test/data/yara_magic.yara'
assert test_file.is_file()
rules = yara.compile(str(test_file))
assert rules
7 changes: 7 additions & 0 deletions src/test/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ def add_re_analyze_task(self, task, unpack=True):
def cancel_analysis(self, root_uid):
self.task_list.append(root_uid)

def get_yara_error(self, rule):
if isinstance(rule, bytes):
rule = rule.decode(errors='ignore')
if 'invalid' in rule:
return 'SyntaxError: line 1: syntax error, unexpected identifier'
return None


class FrontendDatabaseMock:
"""A class mocking :py:class:`~web_interface.frontend_database.FrontendDatabase`."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_no_rule_file(test_client):


def test_wrong_rule_file_format(test_client):
result = test_client.post('/rest/binary_search', json={'rule_file': 'not an actual rule file'}).json
result = test_client.post('/rest/binary_search', json={'rule_file': 'invalid rule file'}).json
assert 'Error in YARA rule file' in result['error_message']


Expand Down
2 changes: 1 addition & 1 deletion src/test/unit/web_interface/test_app_binary_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_app_binary_search_post_invalid_rule(self, test_client):
response = _post_binary_search(
test_client, {'file': (BytesIO(b'invalid_rule'), 'test_file.txt'), 'textarea': ''}
)
assert 'Error in YARA rules' in response
assert 'syntax error' in response

def test_app_binary_search_post_empty(self, test_client):
response = _post_binary_search(test_client, {'file': None, 'textarea': ''})
Expand Down
6 changes: 3 additions & 3 deletions src/web_interface/components/database_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from helperFunctions.task_conversion import get_file_name_and_binary_from_request
from helperFunctions.uid import is_uid
from helperFunctions.web_interface import apply_filters_to_query, filter_out_illegal_characters
from helperFunctions.yara_binary_search import get_yara_error, is_valid_yara_rule_file
from storage.graphql.interface import TEMPLATE_QUERIES, GraphQlInterface, GraphQLSearchError
from storage.query_conversion import QueryConversionException
from web_interface.components.component_base import GET, POST, AppRoute, ComponentBase
Expand Down Expand Up @@ -279,12 +278,13 @@ def start_binary_search(self):
if firmware_uid and not self._firmware_is_in_db(firmware_uid):
error = f'Error: Firmware with UID {firmware_uid!r} not found in database'
elif yara_rule_file is not None:
if is_valid_yara_rule_file(yara_rule_file):
yara_error = self.intercom.get_yara_error(yara_rule_file)
if yara_error is None:
request_id = self.intercom.add_binary_search_request(yara_rule_file, firmware_uid)
return redirect(
url_for('get_binary_search_results', request_id=request_id, only_firmware=only_firmware)
)
error = f'Error in YARA rules: {get_yara_error(yara_rule_file)} (pre-compiled rules are not supported!)'
error = f'Error in YARA rules: {yara_error} (pre-compiled rules are not supported!)'
else:
error = 'please select a file or enter rules in the text area'
return render_template('database/database_binary_search.html', error=error)
Expand Down
6 changes: 3 additions & 3 deletions src/web_interface/rest/rest_binary_search.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from flask import request
from flask_restx import Namespace, fields

from helperFunctions.yara_binary_search import is_valid_yara_rule_file
from web_interface.rest.helper import error_message, success_message
from web_interface.rest.rest_resource_base import RestResourceBase
from web_interface.security.decorator import roles_accepted
Expand Down Expand Up @@ -34,8 +33,9 @@ def post(self):
`rule_file` can be something like `rule rule_name {strings: $a = \"foobar\" condition: $a}`
"""
payload_data = self.validate_payload_data(binary_search_model)
if not is_valid_yara_rule_file(payload_data['rule_file']):
return error_message('Error in YARA rule file', self.URL, request_data=request.data)
yara_error = self.intercom.get_yara_error(payload_data['rule_file'])
if yara_error is not None:
return error_message(f'Error in YARA rule file: {yara_error}', self.URL, request_data=request.data)
if payload_data['uid'] and not self.db.frontend.is_firmware(payload_data['uid']):
return error_message(
f'Firmware with UID {payload_data["uid"]} not found in database', self.URL, request_data=request.data
Expand Down

0 comments on commit 57e7c49

Please sign in to comment.