From 3c8f3214dcf0b432cfd8ec4cf4b81275a3e5a9a4 Mon Sep 17 00:00:00 2001 From: Kevin Meinhardt <kmeinhardt@mozilla.com> Date: Mon, 6 Jan 2025 16:26:23 +0100 Subject: [PATCH] Add build_info.py including content hash verification for static build directories: - Removed the previous build information generation method and replaced it with a new Python script (scripts/build_info.py) that creates a build info file with content hashes for static assets and locales. - Updated Dockerfile to utilize the new build_info.py script for generating build information during the Docker build process. - Adjusted the static check in the Django app to validate the content hash of static files against the expected hash. - Cleaned up .dockerignore and .gitignore files by removing the exclusion of build*.py files. - Added unit tests for the new build_info.py script and its functionality. --- .dockerignore | 1 - .gitignore | 1 - Dockerfile | 65 ++++++++---- scripts/build_info.py | 80 ++++++++++++++ src/olympia/core/apps.py | 45 ++------ src/olympia/core/tests/test_apps.py | 86 ++++----------- tests/make/test_build_info.py | 158 ++++++++++++++++++++++++++++ 7 files changed, 313 insertions(+), 123 deletions(-) create mode 100755 scripts/build_info.py create mode 100644 tests/make/test_build_info.py diff --git a/.dockerignore b/.dockerignore index e7b5b4c3d5aa..7e1e1b89a565 100644 --- a/.dockerignore +++ b/.dockerignore @@ -26,7 +26,6 @@ .tox/ .vscode backups -build*.py buildx-bake-metadata.json deps/* docker*.yml diff --git a/.gitignore b/.gitignore index 1303a8a5131d..8b069175fbda 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,6 @@ .tox/ .vscode backups -build*.py buildx-bake-metadata.json deps/* docker*.yml diff --git a/Dockerfile b/Dockerfile index bbf5967e18d6..76bdf90a0d62 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,12 @@ FROM python:3.12-slim-bookworm AS olympia ENV BUILD_INFO=/build-info.json +# Set permissions to make the file readable by all but only writable by root +RUN <<EOF +echo "{}" > ${BUILD_INFO} +chmod 644 ${BUILD_INFO} +EOF + # Set shell to bash with logs and errors for build SHELL ["/bin/bash", "-xue", "-c"] @@ -20,29 +26,14 @@ ENV HOME=/data/olympia WORKDIR ${HOME} RUN chown -R olympia:olympia ${HOME} -FROM olympia AS info +FROM olympia AS build_info -# Build args that represent static build information -# These are passed to docker via the bake.hcl file and -# should not be overridden in the container environment. -ARG DOCKER_COMMIT -ARG DOCKER_VERSION -ARG DOCKER_BUILD +# We only need the DOCKER_TARGET at this point ARG DOCKER_TARGET - # Create the build file hard coding build variables to the image -RUN <<EOF -cat <<INNEREOF > ${BUILD_INFO} -{ - "commit": "${DOCKER_COMMIT}", - "version": "${DOCKER_VERSION}", - "build": "${DOCKER_BUILD}", - "target": "${DOCKER_TARGET}", - "source": "https://github.com/mozilla/addons-server" -} -INNEREOF -# Set permissions to make the file readable by all but only writable by root -chmod 644 ${BUILD_INFO} +RUN --mount=type=bind,source=scripts/build_info.py,target=${HOME}/scripts/build_info.py \ +<<EOF +${HOME}/scripts/build_info.py --output ${BUILD_INFO} --target ${DOCKER_TARGET} EOF FROM olympia AS base @@ -147,7 +138,7 @@ EOF FROM base AS development # Copy build info from info -COPY --from=info ${BUILD_INFO} ${BUILD_INFO} +COPY --from=build_info ${BUILD_INFO} ${BUILD_INFO} FROM base AS locales ARG LOCALE_DIR=${HOME}/locale @@ -178,12 +169,42 @@ COPY --chown=olympia:olympia static/ ${HOME}/static/ RUN \ --mount=type=bind,src=src,target=${HOME}/src \ --mount=type=bind,src=Makefile-docker,target=${HOME}/Makefile-docker \ + --mount=type=bind,src=scripts/build_info.py,target=${HOME}/scripts/build_info.py \ --mount=type=bind,src=scripts/update_assets.py,target=${HOME}/scripts/update_assets.py \ --mount=type=bind,src=manage.py,target=${HOME}/manage.py \ <<EOF make -f Makefile-docker update_assets EOF +# This stage extends from olympia so we can run as root +# prod_info includes production build directories to hash content +# ensuring built assets are not modified at runtime. +FROM olympia AS prod_info + +# Copy compiled locales from builder +COPY --from=locales --chown=olympia:olympia ${HOME}/locale ${HOME}/locale +# Copy assets from assets +COPY --from=assets --chown=olympia:olympia ${HOME}/site-static ${HOME}/site-static + +# Build args that represent static build information +# These are passed to docker via the bake.hcl file and +# should not be set in the container environment. +ARG DOCKER_COMMIT +ARG DOCKER_VERSION +ARG DOCKER_BUILD +ARG DOCKER_TARGET + +RUN \ + --mount=type=bind,source=scripts/build_info.py,target=${HOME}/scripts/build_info.py \ +<<EOF +${HOME}/scripts/build_info.py \ + --output "${BUILD_INFO}" \ + --commit "${DOCKER_COMMIT}" \ + --version "${DOCKER_VERSION}" \ + --build "${DOCKER_BUILD}" \ + --target "${DOCKER_TARGET}" +EOF + FROM base AS production # Copy the rest of the source files from the host COPY --chown=olympia:olympia . ${HOME} @@ -193,7 +214,7 @@ COPY --from=locales --chown=olympia:olympia ${HOME}/locale ${HOME}/locale COPY --from=assets --chown=olympia:olympia ${HOME}/site-static ${HOME}/site-static COPY --from=assets --chown=olympia:olympia ${HOME}/static-build ${HOME}/static-build # Copy build info from info -COPY --from=info ${BUILD_INFO} ${BUILD_INFO} +COPY --from=prod_info ${BUILD_INFO} ${BUILD_INFO} # Copy compiled locales from builder COPY --from=locales --chown=olympia:olympia ${HOME}/locale ${HOME}/locale # Copy dependencies from `pip_production` diff --git a/scripts/build_info.py b/scripts/build_info.py new file mode 100755 index 000000000000..100b57a8657a --- /dev/null +++ b/scripts/build_info.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +import argparse +import hashlib +import json +from pathlib import Path + + +root = Path(__file__).parent.parent + + +def hash_directory( + directory: Path, + exclude_patterns: list[str] = None, + verbose: bool = False, +) -> str: + if not directory.exists(): + return None + + hasher = hashlib.blake2b(digest_size=32) + + exclude_patterns = exclude_patterns or [] + + files = sorted(f for f in directory.rglob('*')) + + for f in files: + is_hidden = any(part.startswith('.') for part in f.parts) + is_excluded = any(f.match(pattern) for pattern in exclude_patterns) + if not is_hidden and not is_excluded: + if verbose: + print(f'Hashing {f}') + hasher.update(f.name.encode()) + if f.is_file(): + hasher.update(f.read_bytes()) + elif verbose: + print(f'Skipping {f}') + + return hasher.hexdigest() + + +def build_info( + commit: str = None, version: str = None, build: str = None, target: str = None +): + """ + Create a build info file with the current build information from the environment. + """ + return { + 'commit': commit, + 'version': version, + 'build': build, + 'target': target, + 'source': 'https://github.com/mozilla/addons-server', + 'content_hash': { + 'site_static_hash': hash_directory( + (root / 'site-static'), exclude_patterns=['staticfiles.json'] + ), + 'locale_hash': hash_directory((root / 'locale')), + }, + } + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--output', type=str, required=False) + + parser.add_argument('--commit', type=str, required=False) + parser.add_argument('--version', type=str, required=False) + parser.add_argument('--build', type=str, required=False) + # Docker target is used to determine build time and runtime behavior + parser.add_argument('--target', type=str, required=True) + + args = parser.parse_args() + + version = build_info(args.commit, args.version, args.build, args.target) + + if args.output: + with open(args.output, 'w') as f: + json.dump(version, f) + else: + print(json.dumps(version)) diff --git a/src/olympia/core/apps.py b/src/olympia/core/apps.py index 9d878f84781e..97198436e2bb 100644 --- a/src/olympia/core/apps.py +++ b/src/olympia/core/apps.py @@ -2,20 +2,18 @@ import os import subprocess import warnings -from io import StringIO from pwd import getpwnam from django.apps import AppConfig from django.conf import settings from django.core.checks import Error, Tags, register -from django.core.management import call_command -from django.core.management.base import CommandError from django.db import connection from django.utils.translation import gettext_lazy as _ import requests from olympia.core.utils import REQUIRED_VERSION_KEYS, get_version_json +from scripts.build_info import build_info log = logging.getLogger('z.startup') @@ -85,46 +83,25 @@ def version_check(app_configs, **kwargs): @register(CustomTags.custom_setup) def static_check(app_configs, **kwargs): - errors = [] - output = StringIO() + """ + Check that the static files have not been modified since the original build. + """ version = get_version_json() # We only run this check in production images. if version.get('target') != 'production': return [] - try: - call_command('compress_assets', dry_run=True, stdout=output) - stripped_output = output.getvalue().strip() - - if stripped_output: - file_paths = stripped_output.split('\n') - for file_path in file_paths: - if not os.path.exists(file_path): - error = f'Compressed asset file does not exist: {file_path}' - errors.append( - Error( - error, - id='setup.E003', - ) - ) - else: - errors.append( - Error( - 'No compressed asset files were found.', - id='setup.E003', - ) - ) - - except CommandError as e: - errors.append( + site_static_hash = build_info().get('content_hash').get('site_static_hash') + if site_static_hash != version.get('content_hash').get('site_static_hash'): + return [ Error( - f'Error running compress_assets command: {str(e)}', - id='setup.E004', + 'Site static directory does not match expected content hash', + id='setup.E003', ) - ) + ] - return errors + return [] @register(CustomTags.custom_setup) diff --git a/src/olympia/core/tests/test_apps.py b/src/olympia/core/tests/test_apps.py index 8c6823aeca15..d8368fe95d1c 100644 --- a/src/olympia/core/tests/test_apps.py +++ b/src/olympia/core/tests/test_apps.py @@ -2,7 +2,7 @@ import tempfile from unittest import mock -from django.core.management import CommandError, call_command +from django.core.management import call_command from django.core.management.base import SystemCheckError from django.test import TestCase from django.test.utils import override_settings @@ -21,6 +21,10 @@ def setUp(self): 'version': '1.0', 'build': 'http://example.com/build', 'source': 'https://github.com/mozilla/addons-server', + 'content_hash': { + 'site_static_hash': 'abc', + 'locale_hash': 'def', + }, } patch = mock.patch( 'olympia.core.apps.get_version_json', @@ -34,12 +38,10 @@ def setUp(self): with open(self.fake_css_file, 'w') as f: f.write('body { background: red; }') - patch_command = mock.patch('olympia.core.apps.call_command') - self.mock_call_command = patch_command.start() - self.mock_call_command.side_effect = ( - lambda command, dry_run, stdout: stdout.write(f'{self.fake_css_file}\n') - ) - self.addCleanup(patch_command.stop) + mock_build_info = mock.patch('olympia.core.apps.build_info') + self.mock_build_info = mock_build_info.start() + self.mock_build_info.return_value = self.default_version_json + self.addCleanup(mock_build_info.stop) self.media_root = tempfile.mkdtemp(prefix='media-root') @@ -104,64 +106,6 @@ def test_illegal_override_uid_check(self, mock_getpwnam): with override_settings(HOST_UID=1000): call_command('check') - def test_static_check_no_assets_found(self): - """ - Test static_check fails if compress_assets reports no files. - """ - self.mock_get_version_json.return_value['target'] = 'production' - # Simulate "compress_assets" returning no file paths. - self.mock_call_command.side_effect = ( - lambda command, dry_run, stdout: stdout.write('') - ) - with self.assertRaisesMessage( - SystemCheckError, 'No compressed asset files were found.' - ): - call_command('check') - - @mock.patch('os.path.exists') - def test_static_check_missing_assets(self, mock_exists): - """ - Test static_check fails if at least one specified compressed - asset file does not exist. - """ - self.mock_get_version_json.return_value['target'] = 'production' - # Simulate "compress_assets" returning a couple of files. - self.mock_call_command.side_effect = ( - lambda command, dry_run, stdout: stdout.write( - f'{self.fake_css_file}\nfoo.js\n' - ) - ) - # Pretend neither file exists on disk. - mock_exists.return_value = False - - with self.assertRaisesMessage( - SystemCheckError, - # Only the first missing file triggers the AssertionError message check - 'Compressed asset file does not exist: foo.js', - ): - call_command('check') - - def test_static_check_command_error(self): - """ - Test static_check fails if there's an error during compress_assets. - """ - self.mock_get_version_json.return_value['target'] = 'production' - self.mock_call_command.side_effect = CommandError('Oops') - with self.assertRaisesMessage( - SystemCheckError, 'Error running compress_assets command: Oops' - ): - call_command('check') - - def test_static_check_command_success(self): - """ - Test static_check succeeds if compress_assets runs without errors. - """ - self.mock_get_version_json.return_value['target'] = 'production' - self.mock_call_command.side_effect = ( - lambda command, dry_run, stdout: stdout.write(f'{self.fake_css_file}\n') - ) - call_command('check') - def test_nginx_skips_check_on_production_target(self): fake_media_root = '/fake/not/real' with override_settings(MEDIA_ROOT=fake_media_root): @@ -215,3 +159,15 @@ def test_nginx_raises_unexpected_content(self): def test_nginx_raises_unexpected_served_by(self): """Test that files are served by nginx and not redirected elsewhere.""" self._test_nginx_response('http://nginx/user-media', served_by='wow') + + def test_static_check(self): + self.mock_build_info.return_value = { + 'content_hash': { + 'site_static_hash': 'incorrect', + }, + } + with self.assertRaisesMessage( + SystemCheckError, + 'Site static directory does not match expected content hash', + ): + call_command('check') diff --git a/tests/make/test_build_info.py b/tests/make/test_build_info.py new file mode 100644 index 000000000000..6d2bcb43386e --- /dev/null +++ b/tests/make/test_build_info.py @@ -0,0 +1,158 @@ +import json +import subprocess +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +from scripts.build_info import build_info, hash_directory + + +class TestHashDirectory(unittest.TestCase): + def setUp(self): + self.dir = Path(tempfile.mkdtemp()) + self.file_one = self.dir / 'file_one.txt' + + def test_missing_directory(self): + self.assertEqual(hash_directory(Path('missing')), None) + + def test_consistent_hash(self): + hash_directory(self.dir) + self.assertEqual(hash_directory(self.dir), hash_directory(self.dir)) + + def test_change_file_contents_updates_hash(self): + original_hash = hash_directory(self.dir) + self.file_one.write_text('file_one') + self.assertNotEqual(hash_directory(self.dir), original_hash) + + def test_change_file_name_updates_hash(self): + self.file_one.write_text('initial content') + original_hash = hash_directory(self.dir) + + self.file_one.rename(self.dir / 'file_two.txt') + self.assertNotEqual(hash_directory(self.dir), original_hash) + + def test_hidden_files_are_not_hashed(self): + original_hash = hash_directory(self.dir) + (self.dir / '.hidden').touch() + self.assertEqual(hash_directory(self.dir), original_hash) + + def test_excluded_files_are_not_hashed(self): + original_hash = hash_directory(self.dir) + (self.dir / 'file_one.txt').write_text('file_one') + self.assertEqual( + hash_directory(self.dir, exclude_patterns=['file_one.txt']), original_hash + ) + + def test_exclude_supports_wildcards(self): + original_hash = hash_directory(self.dir) + (self.dir / 'file_one.txt').write_text('file_one') + self.assertEqual( + hash_directory(self.dir, exclude_patterns=['*.txt']), original_hash + ) + (self.dir / 'readmer.md').write_text('readme') + self.assertNotEqual( + hash_directory(self.dir, exclude_patterns=['*.txt']), original_hash + ) + + def test_subdirectories_are_hashed(self): + subdir = self.dir / 'subdir' + subdir.mkdir() + (subdir / 'nested_file.txt').write_text('nested content') + original_hash = hash_directory(self.dir) + + (subdir / 'nested_file.txt').write_text('modified content') + self.assertNotEqual(hash_directory(self.dir), original_hash) + + def test_empty_directory(self): + # Should return consistent hash for empty directory + self.assertEqual(hash_directory(self.dir), hash_directory(self.dir)) + + def test_renamed_directory_updates_hash(self): + sub_dir = self.dir / 'subdir' + sub_dir.mkdir() + original_hash = hash_directory(self.dir) + + sub_dir.rename(self.dir / 'subdir_renamed') + self.assertNotEqual(hash_directory(self.dir), original_hash) + + def test_symlinks(self): + # Create a file and a symlink to it + self.file_one.write_text('content') + symlink = self.dir / 'link.txt' + symlink.symlink_to(self.file_one) + + original_hash = hash_directory(self.dir) + # Modify the target file + self.file_one.write_text('new content') + # Hash should change when target content changes + self.assertNotEqual(hash_directory(self.dir), original_hash) + + +class TestBuildInfo(unittest.TestCase): + def _run_build_info(self, *args): + result = subprocess.run( + ['./scripts/build_info.py', *args], + check=True, + capture_output=True, + text=True, + ) + + # If we use an output file then there are no logs + if '--output' in args: + return result + + return json.loads(result.stdout) + + def test_missing_target_raises(self): + with self.assertRaises(subprocess.CalledProcessError): + self._run_build_info() + + @mock.patch('scripts.build_info.hash_directory', return_value=None) + def test_default_build_info(self, mock_hash_directory): + self.assertEqual( + build_info(target='test'), + { + 'commit': None, + 'version': None, + 'build': None, + 'target': 'test', + 'source': 'https://github.com/mozilla/addons-server', + 'content_hash': {'site_static_hash': None, 'locale_hash': None}, + }, + ) + + site_static, locale = mock_hash_directory.call_args_list + + # Check we are hashing the correct directories + assert site_static[0][0].name == 'site-static' + assert locale[0][0].name == 'locale' + + # Asset we exclude staticfiles.json from site-static + assert 'staticfiles.json' in site_static[1]['exclude_patterns'] + + def test_build_info_with_args(self): + info = self._run_build_info( + '--commit', + '1234567890', + '--version', + '1.0.0', + '--build', + 'www.build.com', + '--target', + 'dev', + ) + + assert info['commit'] == '1234567890' + assert info['version'] == '1.0.0' + assert info['build'] == 'www.build.com' + assert info['target'] == 'dev' + + def test_build_info_output(self): + out_file = Path(tempfile.mkdtemp()) / 'output.txt' + + self._run_build_info('--target', 'test', '--output', out_file.as_posix()) + assert out_file.exists() + + json_output = json.loads(out_file.read_text()) + assert json_output['target'] == 'test'