diff --git a/docsrc/conf.py b/docsrc/conf.py index 42949f631c..6d8dceb58a 100644 --- a/docsrc/conf.py +++ b/docsrc/conf.py @@ -69,7 +69,6 @@ 'docker', 'docker-compose', 'email-validator', - 'fact_helper_file', 'flaky', 'flask', 'flask_login', diff --git a/src/helperFunctions/magic.py b/src/helperFunctions/magic.py index 15c3013f2a..0c2864e68e 100644 --- a/src/helperFunctions/magic.py +++ b/src/helperFunctions/magic.py @@ -3,36 +3,36 @@ files in the default api. """ import magic as pymagic -from ctypes import c_int, c_char_p +import os from helperFunctions.fileSystem import get_src_dir -_magic_getpath = pymagic.libmagic.magic_getpath -_magic_getpath.restype = c_char_p -_magic_getpath.argtypes = [c_char_p, c_int] - -_sys_magic = _magic_getpath(None, 1) +# Default magic database according to libmagic(3) +_default_magic = os.getenv('MAGIC', '/usr/share/file/misc/magic.mgc') _fact_magic = f'{get_src_dir()}/bin/fact.mgc' _internal_symlink_magic = f'{get_src_dir()}/bin/internal_symlink.mgc' +_magic_file = f'{_internal_symlink_magic}:{_fact_magic}:{_default_magic}' -_magic_by_mime = {} +_mime_magic = pymagic.Magic( + mime=True, + magic_file=_magic_file, +) +_full_magic = pymagic.Magic( + mime=False, + magic_file=_magic_file, +) -def _get_magic(mime: bool) -> pymagic.Magic: - if mime not in _magic_by_mime: - _magic_by_mime[mime] = pymagic.Magic( - mime=mime, - magic_file=f'{_internal_symlink_magic}:{_fact_magic}:{_sys_magic}', - ) - return _magic_by_mime[mime] +def _get_magic(mime: bool) -> pymagic.Magic: + return _mime_magic if mime else _full_magic -def from_file(filename, mime=False): +def from_file(filename, mime=False) -> str: """A wrapper for pymagic's ``magic.Magic.from_file``""" return _get_magic(mime).from_file(filename) -def from_buffer(filename, mime=False): +def from_buffer(filename, mime=False) -> str: """A wrapper for pymagic's ``magic.Magic.from_buffer``""" return _get_magic(mime).from_buffer(filename) diff --git a/src/install/requirements_common.txt b/src/install/requirements_common.txt index 703894d820..e7bbbc6751 100644 --- a/src/install/requirements_common.txt +++ b/src/install/requirements_common.txt @@ -29,8 +29,6 @@ pydantic==2.1.1 # Config parsing toml==0.10.2 -git+https://github.com/fkie-cad/fact_helper_file.git - # Common code modules git+https://github.com/fkie-cad/common_helper_files.git git+https://github.com/fkie-cad/common_helper_filter.git diff --git a/src/plugins/analysis/file_type/code/file_type.py b/src/plugins/analysis/file_type/code/file_type.py index ed7af7953a..01770dedbe 100644 --- a/src/plugins/analysis/file_type/code/file_type.py +++ b/src/plugins/analysis/file_type/code/file_type.py @@ -1,7 +1,8 @@ -from fact_helper_file import get_file_type_from_path import pydantic from pydantic import Field +from helperFunctions import magic + from analysis.plugin import AnalysisPluginV0 from analysis.plugin.compat import AnalysisBasePluginAdapterMixin @@ -34,9 +35,7 @@ def summarize(self, result: Schema) -> List[str]: def analyze(self, file_handle: io.FileIO, virtual_file_path: str, analyses: dict) -> Schema: del virtual_file_path, analyses - file_dict = get_file_type_from_path(file_handle.name) - return AnalysisPlugin.Schema( - mime=file_dict['mime'], - full=file_dict['full'], + mime=magic.from_file(file_handle.name, mime=True), + full=magic.from_file(file_handle.name, mime=False), ) diff --git a/src/plugins/analysis/qemu_exec/code/qemu_exec.py b/src/plugins/analysis/qemu_exec/code/qemu_exec.py index 465110ac07..713dbffbfd 100644 --- a/src/plugins/analysis/qemu_exec/code/qemu_exec.py +++ b/src/plugins/analysis/qemu_exec/code/qemu_exec.py @@ -11,11 +11,11 @@ from multiprocessing import Manager from pathlib import Path from tempfile import TemporaryDirectory +from helperFunctions import magic from common_helper_files import get_binary_from_file, safe_rglob from docker.errors import DockerException from docker.types import Mount -from fact_helper_file import get_file_type_from_path from requests.exceptions import ReadTimeout import config @@ -125,7 +125,10 @@ def _find_relevant_files(self, extracted_files_dir: Path): result = [] for path in safe_rglob(extracted_files_dir): if path.is_file() and not path.is_symlink(): - file_type = get_file_type_from_path(path.absolute()) + file_type = { + 'full': magic.from_file(path.absolute(), mime=False), + 'mime': magic.from_file(path.absolute(), mime=True), + } if self._has_relevant_type(file_type): result.append((f'/{path.relative_to(Path(self.root_path))}', file_type['full'])) return result diff --git a/src/test/acceptance/test_io_routes.py b/src/test/acceptance/test_io_routes.py index 65570e189f..57e2a97007 100644 --- a/src/test/acceptance/test_io_routes.py +++ b/src/test/acceptance/test_io_routes.py @@ -1,8 +1,8 @@ import pytest -from fact_helper_file import get_file_type_from_binary from storage.db_interface_comparison import ComparisonDbInterface -from test.common_helper import create_test_firmware +from test.common_helper import create_test_firmware # pylint: disable=wrong-import-order +from helperFunctions import magic COMPARE_RESULT = { 'general': {'a': {'id1': '', 'id2': ''}, 'b': {'id1': '', 'id2': ''}}, @@ -68,4 +68,4 @@ def test_pdf_download(self, test_client, backend_db): assert response.status_code == 200, 'pdf download failed' # noqa: PLR2004 device = self.test_fw.device_name.replace(' ', '_') assert response.headers['Content-Disposition'] == f'attachment; filename={device}_analysis_report.pdf' - assert get_file_type_from_binary(response.data)['mime'] == 'application/pdf' + assert magic.from_buffer(response.data) == 'application/pdf' diff --git a/src/test/integration/helperFunctions/test_pdf.py b/src/test/integration/helperFunctions/test_pdf.py index d1f6c87dec..99d640813f 100644 --- a/src/test/integration/helperFunctions/test_pdf.py +++ b/src/test/integration/helperFunctions/test_pdf.py @@ -2,9 +2,8 @@ import os from pathlib import Path -from fact_helper_file import get_file_type_from_binary - from helperFunctions.pdf import build_pdf_report +from helperFunctions import magic from test.common_helper import TEST_FW @@ -21,5 +20,5 @@ def test_build_pdf_report(): pdf_path = build_pdf_report(TEST_FW, docker_mount_base_dir) - assert get_file_type_from_binary(pdf_path.read_bytes())['mime'] == 'application/pdf' + assert magic.from_buffer(pdf_path.read_bytes(), mime=True) == 'application/pdf' assert pdf_path.name == f"{TEST_FW.device_name.replace(' ', '_')}_analysis_report.pdf" diff --git a/src/unpacker/unpack.py b/src/unpacker/unpack.py index d0505b159b..6a51ac6232 100644 --- a/src/unpacker/unpack.py +++ b/src/unpacker/unpack.py @@ -5,9 +5,8 @@ from pathlib import Path from time import time -from fact_helper_file import get_file_type_from_path - import config +from helperFunctions import magic from helperFunctions.fileSystem import file_is_empty, get_relative_object_path from helperFunctions.tag import TagColor from objects.file import FileObject @@ -95,7 +94,8 @@ def generate_objects_and_store_files( continue current_file = FileObject(file_path=str(path)) current_virtual_path = get_relative_object_path(path, extraction_dir) - current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime'] + current_file.temporary_data['parent_fo_type'] = magic.from_file(parent.file_path) + if current_file.uid not in extracted_files: # the same file can be contained multiple times in one archive -> only the VFP needs an update self.unpacking_locks.set_unpacking_lock(current_file.uid) diff --git a/src/web_interface/components/io_routes.py b/src/web_interface/components/io_routes.py index 2f3659714f..16d068acfb 100644 --- a/src/web_interface/components/io_routes.py +++ b/src/web_interface/components/io_routes.py @@ -6,8 +6,8 @@ from time import sleep import requests -from fact_helper_file import get_file_type_from_binary from flask import make_response, redirect, render_template, request, Response +from helperFunctions import magic import config from helperFunctions.database import get_shared_session @@ -82,7 +82,7 @@ def _prepare_file_download(self, uid: str, packed: bool = False) -> str | Respon def _get_file_download_mime(self, binary: bytes, uid: str) -> str: type_analysis = self.db.frontend.get_analysis(uid, 'file_type') mime = type_analysis.get('mime') if type_analysis is not None else None - return mime or get_file_type_from_binary(binary)['mime'] + return mime or magic.from_buffer(binary) @roles_accepted(*PRIVILEGES['download']) @AppRoute('/ida-download/', GET)