Skip to content

Commit

Permalink
refactor: Drop fact_helper_file dependency
Browse files Browse the repository at this point in the history
It is replaced by the much simpler magic.py.
Note that we do not compile the custom magic files, as it would need
to be updated once `file` is updated.
  • Loading branch information
maringuu committed Sep 12, 2024
1 parent fd453db commit ceb69ec
Show file tree
Hide file tree
Showing 10 changed files with 25 additions and 29 deletions.
1 change: 0 additions & 1 deletion docsrc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
'docker',
'docker-compose',
'email-validator',
'fact_helper_file',
'flaky',
'flask',
'flask_login',
Expand Down
10 changes: 5 additions & 5 deletions src/helperFunctions/magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@

# On ubuntu this is provided by the libmagic-mgc package
_default_magic = os.getenv('MAGIC', '/usr/lib/file/magic.mgc')
_fact_magic = f'{get_src_dir()}/bin/firmware.mgc'
_internal_symlink_magic = f'{get_src_dir()}/bin/internal_symlink_magic.mgc'
_fact_magic = f'{get_src_dir()}/bin/firmware'
_internal_symlink_magic = f'{get_src_dir()}/bin/internal_symlink_magic'
_magic_file = f'{_internal_symlink_magic}:{_fact_magic}:{_default_magic}'

_instances = {}


def _get_magic_instance(**kwargs):
"""Returns an instance of pymagic.Maigc"""
"""Returns an instance of pymagic.Magic"""
# Dicts are not hashable but sorting and creating a tuple is a valid hash
key = hash(tuple(sorted(kwargs.items())))
instance = _instances.get(key)
Expand All @@ -34,8 +34,8 @@ def from_file(filename: bytes | str | PathLike, magic_file: str | None = _magic_
"""Like pymagic's ``magic.from_file`` but it accepts all keyword arguments
that ``magic.Magic`` accepts.
"""
m = _get_magic_instance(magic_file=magic_file, **kwargs)
return m.from_file(filename)
instance = _get_magic_instance(magic_file=magic_file, **kwargs)
return instance.from_file(filename)


def from_buffer(buf: bytes | str, magic_file: str | None = _magic_file, **kwargs) -> str:
Expand Down
7 changes: 3 additions & 4 deletions src/install/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,10 @@ def main(distribution):
BIN_DIR.mkdir(exist_ok=True)

run_cmd_with_logging(
f'wget -O {BIN_DIR / "firmware"} https://github.com/fkie-cad/firmware-magic-database/releases/download/v0.2.0/firmware'
f'wget -O {BIN_DIR / "firmware.xz"} https://github.com/fkie-cad/firmware-magic-database/releases/download/v0.2.1/firmware.xz'
)
run_cmd_with_logging(f'file -C -m {BIN_DIR / "firmware"}')
run_cmd_with_logging(f'file -C -m {INSTALL_DIR / "internal_symlink_magic"}')
run_cmd_with_logging(f'mv {INSTALL_DIR / "internal_symlink_magic.mgc"} {BIN_DIR}')
run_cmd_with_logging(f'unxz --force {BIN_DIR / "firmware.xz"}')
run_cmd_with_logging(f'cp --force {INSTALL_DIR / "internal_symlink_magic"} {BIN_DIR}')

apt_packages_path = INSTALL_DIR / 'apt-pkgs-common.txt'
dnf_packages_path = INSTALL_DIR / 'dnf-pkgs-common.txt'
Expand Down
2 changes: 0 additions & 2 deletions src/install/requirements_common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ pydantic==2.4.0
# Config parsing
toml==0.10.2

git+https://github.com/fkie-cad/fact_helper_file.git

# Common code modules
git+https://github.com/fkie-cad/common_helper_files.git
git+https://github.com/fkie-cad/common_helper_filter.git
Expand Down
8 changes: 3 additions & 5 deletions src/plugins/analysis/file_type/code/file_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from typing import List

import pydantic
from fact_helper_file import get_file_type_from_path
from pydantic import Field

from analysis.plugin import AnalysisPluginV0
from analysis.plugin.compat import AnalysisBasePluginAdapterMixin
from helperFunctions import magic

if typing.TYPE_CHECKING:
import io
Expand Down Expand Up @@ -39,9 +39,7 @@ def summarize(self, result: Schema) -> List[str]:
def analyze(self, file_handle: io.FileIO, virtual_file_path: str, analyses: dict) -> Schema:
del virtual_file_path, analyses

file_dict = get_file_type_from_path(file_handle.name)

return AnalysisPlugin.Schema(
mime=file_dict['mime'],
full=file_dict['full'],
mime=magic.from_file(file_handle.name, mime=True),
full=magic.from_file(file_handle.name, mime=False),
)
7 changes: 5 additions & 2 deletions src/plugins/analysis/qemu_exec/code/qemu_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
from common_helper_files import get_binary_from_file, safe_rglob
from docker.errors import DockerException
from docker.types import Mount
from fact_helper_file import get_file_type_from_path
from requests.exceptions import ReadTimeout

import config
from analysis.PluginBase import AnalysisBasePlugin
from helperFunctions import magic
from helperFunctions.docker import run_docker_container
from helperFunctions.tag import TagColor
from helperFunctions.uid import create_uid
Expand Down Expand Up @@ -125,7 +125,10 @@ def _find_relevant_files(self, extracted_files_dir: Path):
result = []
for path in safe_rglob(extracted_files_dir):
if path.is_file() and not path.is_symlink():
file_type = get_file_type_from_path(path.absolute())
file_type = {
'full': magic.from_file(path.absolute(), mime=False),
'mime': magic.from_file(path.absolute(), mime=True),
}
if self._has_relevant_type(file_type):
result.append((f'/{path.relative_to(Path(self.root_path))}', file_type['full']))
return result
Expand Down
4 changes: 2 additions & 2 deletions src/test/acceptance/test_io_routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from fact_helper_file import get_file_type_from_binary

from helperFunctions import magic
from storage.db_interface_comparison import ComparisonDbInterface
from test.common_helper import create_test_firmware

Expand Down Expand Up @@ -68,4 +68,4 @@ def test_pdf_download(self, test_client, backend_db):
assert response.status_code == 200, 'pdf download failed' # noqa: PLR2004
device = self.test_fw.device_name.replace(' ', '_')
assert response.headers['Content-Disposition'] == f'attachment; filename={device}_analysis_report.pdf'
assert get_file_type_from_binary(response.data)['mime'] == 'application/pdf'
assert magic.from_buffer(response.data, mime=True) == 'application/pdf'
5 changes: 2 additions & 3 deletions src/test/integration/helperFunctions/test_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import os
from pathlib import Path

from fact_helper_file import get_file_type_from_binary

from helperFunctions import magic
from helperFunctions.pdf import build_pdf_report
from test.common_helper import TEST_FW

Expand All @@ -21,5 +20,5 @@ def test_build_pdf_report():

pdf_path = build_pdf_report(TEST_FW, docker_mount_base_dir)

assert get_file_type_from_binary(pdf_path.read_bytes())['mime'] == 'application/pdf'
assert magic.from_buffer(pdf_path.read_bytes(), mime=True) == 'application/pdf'
assert pdf_path.name == f"{TEST_FW.device_name.replace(' ', '_')}_analysis_report.pdf"
6 changes: 3 additions & 3 deletions src/unpacker/unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
from time import time
from typing import TYPE_CHECKING, Optional

from fact_helper_file import get_file_type_from_path

import config
from analysis.PluginBase import sanitize_processed_analysis
from helperFunctions import magic
from helperFunctions.fileSystem import file_is_empty, get_relative_object_path
from helperFunctions.tag import TagColor
from objects.file import FileObject
Expand Down Expand Up @@ -94,7 +93,8 @@ def generate_objects_and_store_files(
continue
current_file = FileObject(file_path=str(path))
current_virtual_path = get_relative_object_path(path, extraction_dir)
current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime']
current_file.temporary_data['parent_fo_type'] = magic.from_file(parent.file_path, mime=True)

if current_file.uid not in extracted_files:
# the same file can be contained multiple times in one archive -> only the VFP needs an update
self.unpacking_locks.set_unpacking_lock(current_file.uid)
Expand Down
4 changes: 2 additions & 2 deletions src/web_interface/components/io_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from time import sleep

import requests
from fact_helper_file import get_file_type_from_binary
from flask import Response, make_response, redirect, render_template, request

import config
from helperFunctions import magic
from helperFunctions.database import get_shared_session
from helperFunctions.pdf import build_pdf_report
from helperFunctions.task_conversion import check_for_errors, convert_analysis_task_to_fw_obj, create_analysis_task
Expand Down Expand Up @@ -82,7 +82,7 @@ def _prepare_file_download(self, uid: str, packed: bool = False) -> str | Respon
def _get_file_download_mime(self, binary: bytes, uid: str) -> str:
type_analysis = self.db.frontend.get_analysis(uid, 'file_type')
mime = type_analysis.get('mime') if type_analysis is not None else None
return mime or get_file_type_from_binary(binary)['mime']
return mime or magic.from_buffer(binary, mime=True)

@roles_accepted(*PRIVILEGES['download'])
@AppRoute('/ida-download/<compare_id>', GET)
Expand Down

0 comments on commit ceb69ec

Please sign in to comment.