From ea5c98baa584c5445dcbedf159d409dfeeddf7ce Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Mon, 6 May 2024 21:49:26 +0000 Subject: [PATCH 1/4] feat: add option to create footprints (#959) Co-authored-by: Paul Fouquet Co-authored-by: Alice Fage Co-authored-by: Megan Davidson --- .github/workflows/format-tests.yml | 12 ++++----- README.md | 2 +- scripts/standardise_validate.py | 16 +++++++++++ scripts/standardising.py | 43 +++++++++++++++++------------- 4 files changed, 47 insertions(+), 26 deletions(-) diff --git a/.github/workflows/format-tests.yml b/.github/workflows/format-tests.yml index d8d287857..0beefb408 100644 --- a/.github/workflows/format-tests.yml +++ b/.github/workflows/format-tests.yml @@ -32,23 +32,23 @@ jobs: - name: End to end test - Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true cmp --silent "${{ runner.temp }}/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff - name: End to end test - Elevation run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30m --create-footprints=true cmp --silent "${{ runner.temp }}/BK39_10000_0102.tiff" ./scripts/tests/data/output/BK39_10000_0102.tiff cmp --silent "${{ runner.temp }}/BK39_10000_0101.tiff" ./scripts/tests/data/output/BK39_10000_0101.tiff - name: End to end test - Historical Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60m --create-footprints=true cmp --silent "${{ runner.temp }}/BQ31_5000_0608.tiff" ./scripts/tests/data/output/BQ31_5000_0608.tiff - name: End to end test - Cutline (Aerial Imagery) run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10m --create-footprints=true cmp --silent "${{ runner.temp }}/cutline/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829_cut.tiff - name: End to end test - Thumbnails (Topo50/Topo250) @@ -59,7 +59,7 @@ jobs: - name: End to end test - Restandardise Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true cmp --silent "${{ runner.temp }}/restandardise/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff - name: End to end test - Translate Ascii Files (Elevation) @@ -69,7 +69,7 @@ jobs: - name: End to end test - Remove empty files run: | - docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60m + docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60m --create-footprints=true empty_target_directory="$(find "${{ runner.temp }}/tmp-empty" -maxdepth 0 -type d -empty)" [[ -n "$empty_target_directory" ]] diff --git a/README.md b/README.md index d8195b5f5..ee676671b 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Run `docker run topo-imagery python standardise_validate.py --help` to get the l - Example of local execution. This example uses the test data available on this repo and create the output will be created in a `~/tmp/` on the local machine (volume share with `Docker`): ```bash -docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10m +docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10m --create-footprints=true ``` To use an AWS test dataset (input located in an AWS S3 bucket), log into the AWS account and add the following arguments to the `docker run` command: diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index cd0cee9fe..07f20dab0 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -15,6 +15,14 @@ from scripts.standardising import run_standardising +def str_to_bool(value: str) -> bool: + if value == "true": + return True + if value == "false": + return False + raise argparse.ArgumentTypeError(f"Invalid boolean (must be exactly 'true' or 'false'): {value}") + + def main() -> None: # pylint: disable-msg=too-many-locals parser = argparse.ArgumentParser() @@ -30,6 +38,13 @@ def main() -> None: help="The target EPSG code. If different to source the imagery will be reprojected", ) parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True) + parser.add_argument( + "--create-footprints", + dest="create_footprints", + help="Create footprints for each tile ('true' / 'false')", + type=str_to_bool, + required=True, + ) parser.add_argument("--cutline", dest="cutline", help="Optional cutline to cut imagery to", required=False, nargs="?") parser.add_argument("--collection-id", dest="collection_id", help="Unique id for collection", required=True) parser.add_argument( @@ -60,6 +75,7 @@ def main() -> None: arguments.source_epsg, arguments.target_epsg, arguments.gsd, + arguments.create_footprints, arguments.target, ) diff --git a/scripts/standardising.py b/scripts/standardising.py index 0123b5a65..cfb170609 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -34,6 +34,7 @@ def run_standardising( source_epsg: str, target_epsg: str, gsd: str, + create_footprints: bool, target_output: str = "/tmp/", ) -> List[FileTiff]: """Run `standardising()` in parallel (`concurrency`). @@ -68,6 +69,7 @@ def run_standardising( target_epsg=target_epsg, target_output=target_output, gsd=gsd, + create_footprints=create_footprints, cutline=cutline, ), todo, @@ -107,6 +109,7 @@ def standardising( source_epsg: str, target_epsg: str, gsd: str, + create_footprints: bool, cutline: Optional[str], target_output: str = "/tmp/", ) -> Optional[FileTiff]: @@ -212,25 +215,27 @@ def standardising( with TiffFile(standardized_working_path) as file_handle: if any(tile_byte_count != 0 for tile_byte_count in file_handle.pages.first.tags["TileByteCounts"].value): - # Create footprint GeoJSON - run_gdal( - [ - "gdal_footprint", - "-t_srs", - EpsgCode.EPSG_4326, - "-max_points", - "unlimited", - "-simplify", - str(get_buffer_distance(gsd_to_float(gsd))), - ], - standardized_working_path, - footprint_tmp_path, - ) - write( - footprint_file_path, - read(footprint_tmp_path), - content_type=ContentType.GEOJSON.value, - ) + if create_footprints: + # Create footprint GeoJSON + run_gdal( + [ + "gdal_footprint", + "-t_srs", + EpsgCode.EPSG_4326, + "-max_points", + "unlimited", + "-simplify", + str(get_buffer_distance(gsd_to_float(gsd))), + ], + standardized_working_path, + footprint_tmp_path, + ) + write( + footprint_file_path, + read(footprint_tmp_path), + content_type=ContentType.GEOJSON.value, + ) + write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value) return tiff From e71444fefa83b106249c5e7d359f934bae329ca4 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Tue, 7 May 2024 03:08:04 +0000 Subject: [PATCH 2/4] refactor: Pull out function for any epoch datetime TDE-1147 (#960) * refactor: Pull out function for any epoch datetime * refactor: Pull out test fixture to default location To be able to share between tests. * fix: Add to rather than replacing dictionary key To avoid clobbering existing data. --- scripts/files/tests/conftest.py | 17 +++++++++++++++++ scripts/files/tests/fs_local_test.py | 15 --------------- scripts/stac/imagery/item.py | 9 ++++----- scripts/tests/datetimes_test.py | 13 ++++++++++--- 4 files changed, 31 insertions(+), 23 deletions(-) create mode 100644 scripts/files/tests/conftest.py diff --git a/scripts/files/tests/conftest.py b/scripts/files/tests/conftest.py new file mode 100644 index 000000000..1fbf5d9a3 --- /dev/null +++ b/scripts/files/tests/conftest.py @@ -0,0 +1,17 @@ +from shutil import rmtree +from tempfile import mkdtemp +from typing import Generator + +import pytest + + +@pytest.fixture(name="setup", autouse=True) +def fixture_setup() -> Generator[str, None, None]: + """ + This function creates a temporary directory and deletes it after each test. + See following link for details: + https://docs.pytest.org/en/stable/fixture.html#yield-fixtures-recommended + """ + target = mkdtemp() + yield target + rmtree(target) diff --git a/scripts/files/tests/fs_local_test.py b/scripts/files/tests/fs_local_test.py index db08e1d84..839846e7b 100644 --- a/scripts/files/tests/fs_local_test.py +++ b/scripts/files/tests/fs_local_test.py @@ -1,25 +1,10 @@ import os -from shutil import rmtree -from tempfile import mkdtemp -from typing import Generator import pytest from scripts.files.fs_local import exists, read, write -@pytest.fixture(name="setup", autouse=True) -def fixture_setup() -> Generator[str, None, None]: - """ - This function creates a temporary directory and deletes it after each test. - See following link for details: - https://docs.pytest.org/en/stable/fixture.html#yield-fixtures-recommended - """ - target = mkdtemp() - yield target - rmtree(target) - - @pytest.mark.dependency(name="write") def test_write(setup: str) -> None: target = setup diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index 22267f5e5..9bc703623 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -36,11 +36,10 @@ def update_datetime(self, start_datetime: str, end_datetime: str) -> None: start_datetime: a start date in `YYYY-MM-DD` format end_datetime: a end date in `YYYY-MM-DD` format """ - self.stac["properties"] = { - "start_datetime": start_datetime, - "end_datetime": end_datetime, - "datetime": None, - } + self.stac.setdefault("properties", {}) + self.stac["properties"]["start_datetime"] = start_datetime + self.stac["properties"]["end_datetime"] = end_datetime + self.stac["properties"]["datetime"] = None # FIXME: redefine the 'Any' def update_spatial(self, geometry: Dict[str, Any], bbox: Tuple[float, ...]) -> None: diff --git a/scripts/tests/datetimes_test.py b/scripts/tests/datetimes_test.py index 044d3d646..ffb8d3b95 100644 --- a/scripts/tests/datetimes_test.py +++ b/scripts/tests/datetimes_test.py @@ -40,9 +40,7 @@ def test_should_raise_error_when_formatting_a_naive_datetime(subtests: SubTests) def test_should_be_able_to_invert_conversion() -> None: - start = datetime(1800, 1, 1, tzinfo=timezone.utc) - end = datetime(2100, 1, 1, tzinfo=timezone.utc) - original_datetime = any_datetime_between(start, end) + original_datetime = any_epoch_datetime() assert parse_rfc_3339_datetime(format_rfc_3339_datetime_string(original_datetime)) == original_datetime @@ -51,6 +49,15 @@ def test_should_format_rfc_3339_nz_midnight_datetime_string() -> None: assert format_rfc_3339_nz_midnight_datetime_string(datetime_object) == "2001-02-02T11:00:00Z" +def any_epoch_datetime() -> datetime: + """ + Get arbitrary datetime + """ + start = datetime(1970, 1, 1, tzinfo=timezone.utc) + end = datetime(2100, 1, 1, tzinfo=timezone.utc) + return any_datetime_between(start, end) + + def any_datetime_between(start: datetime, end: datetime) -> datetime: """ Get arbitrary datetime between start (inclusive) and end (exclusive), with second precision. From 759b32d61810177ec4ecc5d02eac838f5d94c0e9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 8 May 2024 22:10:55 +0000 Subject: [PATCH 3/4] release: 4.6.0 (#951) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ pyproject.toml | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a78f18ebc..7506d438d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## [4.6.0](https://github.com/linz/topo-imagery/compare/v4.5.0...v4.6.0) (2024-05-07) + + +### Features + +* add checksum to item links in collections TDE-1138 ([#953](https://github.com/linz/topo-imagery/issues/953)) ([afea8f0](https://github.com/linz/topo-imagery/commit/afea8f0f9722d0980657385e420f040e93085eef)) +* add option to create footprints ([#959](https://github.com/linz/topo-imagery/issues/959)) ([ea5c98b](https://github.com/linz/topo-imagery/commit/ea5c98baa584c5445dcbedf159d409dfeeddf7ce)) + + +### Bug Fixes + +* Make compatible with latest moto ([#949](https://github.com/linz/topo-imagery/issues/949)) ([5902df0](https://github.com/linz/topo-imagery/commit/5902df0fec6dd15bb3a89ec616a2c27136a6aec5)) +* Use correct types for S3 client ([#954](https://github.com/linz/topo-imagery/issues/954)) ([2c96c13](https://github.com/linz/topo-imagery/commit/2c96c1382d521d641e2b167e4096759af9b13dce)) + ## [4.5.0](https://github.com/linz/topo-imagery/compare/v4.4.0...v4.5.0) (2024-03-24) diff --git a/pyproject.toml b/pyproject.toml index e32c2ca24..0b682feaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore_missing_imports = true [tool.poetry] name = "topo-imagery" -version = "4.5.0" +version = "4.6.0" description = "A collection of scripts for processing imagery" authors = [ "Blayne Chard ", From 2828f14bc2bfb1bc03963a31a2c2b64ba24f6f75 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Thu, 9 May 2024 04:02:35 +0000 Subject: [PATCH 4/4] feat: timestamps when created TDE-1147 (#956) #### Motivation Give dataset users more detailed info about the datasets' life cycle. #### Modification Set item `created` and `updated` properties to current datetime when processing #### Checklist - [x] Tests updated - [ ] Docs updated (N/A) - [x] Issue linked in Title --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- scripts/files/fs.py | 16 +++++++++- scripts/files/fs_local.py | 8 +++++ scripts/files/fs_s3.py | 6 ++++ scripts/files/tests/fs_local_test.py | 12 ++++++- scripts/files/tests/fs_s3_test.py | 21 +++++++++++- scripts/files/tests/fs_test.py | 32 +++++++++++++++++-- scripts/stac/imagery/item.py | 5 +++ scripts/stac/imagery/tests/collection_test.py | 18 +++++++---- scripts/stac/imagery/tests/item_test.py | 4 +-- 9 files changed, 109 insertions(+), 13 deletions(-) diff --git a/scripts/files/fs.py b/scripts/files/fs.py index a037457a1..359846c75 100644 --- a/scripts/files/fs.py +++ b/scripts/files/fs.py @@ -1,6 +1,8 @@ import os from concurrent.futures import Future, ThreadPoolExecutor, as_completed -from typing import List, Optional +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional from boto3 import resource from linz_logger import get_log @@ -8,6 +10,11 @@ from scripts.aws.aws_helper import is_s3 from scripts.files import fs_local, fs_s3 +if TYPE_CHECKING: + from mypy_boto3_s3 import S3Client +else: + S3Client = dict + def write(destination: str, source: bytes, content_type: Optional[str] = None) -> str: """Write a file from its source to a destination path. @@ -79,6 +86,13 @@ def exists(path: str) -> bool: return fs_local.exists(path) +def modified(path: str, s3_client: Optional[S3Client] = None) -> datetime: + """Get modified datetime for S3 URL or local path""" + if is_s3(path): + return fs_s3.modified(fs_s3.bucket_name_from_path(path), fs_s3.prefix_from_path(path), s3_client) + return fs_local.modified(Path(path)) + + def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> List[str]: """Writes list of files to target destination using multithreading. diff --git a/scripts/files/fs_local.py b/scripts/files/fs_local.py index ca2d5bdd8..8f174983b 100644 --- a/scripts/files/fs_local.py +++ b/scripts/files/fs_local.py @@ -1,4 +1,6 @@ import os +from datetime import datetime, timezone +from pathlib import Path def write(destination: str, source: bytes) -> None: @@ -36,3 +38,9 @@ def exists(path: str) -> bool: True if the path exists """ return os.path.exists(path) + + +def modified(path: Path) -> datetime: + """Get path modified datetime as UTC""" + modified_timestamp = os.path.getmtime(path) + return datetime.fromtimestamp(modified_timestamp, tz=timezone.utc) diff --git a/scripts/files/fs_s3.py b/scripts/files/fs_s3.py index 6d2d47b2d..a5296f1f0 100644 --- a/scripts/files/fs_s3.py +++ b/scripts/files/fs_s3.py @@ -1,5 +1,6 @@ from concurrent import futures from concurrent.futures import ThreadPoolExecutor +from datetime import datetime from typing import TYPE_CHECKING, Any, Generator, List, Optional, Union from boto3 import client, resource @@ -237,3 +238,8 @@ def get_object_parallel_multithreading( yield key, future.result() else: yield key, exception + + +def modified(bucket_name: str, key: str, s3_client: Optional[S3Client]) -> datetime: + s3_client = s3_client or client("s3") + return _get_object(bucket_name, key, s3_client)["LastModified"] diff --git a/scripts/files/tests/fs_local_test.py b/scripts/files/tests/fs_local_test.py index 839846e7b..780387060 100644 --- a/scripts/files/tests/fs_local_test.py +++ b/scripts/files/tests/fs_local_test.py @@ -1,8 +1,10 @@ import os +from pathlib import Path import pytest -from scripts.files.fs_local import exists, read, write +from scripts.files.fs_local import exists, modified, read, write +from scripts.tests.datetimes_test import any_epoch_datetime @pytest.mark.dependency(name="write") @@ -43,3 +45,11 @@ def test_exists(setup: str) -> None: def test_exists_file_not_found() -> None: found = exists("/tmp/test.file") assert found is False + + +def test_should_get_modified_datetime(setup: str) -> None: + path = Path(os.path.join(setup, "modified.file")) + path.touch() + modified_datetime = any_epoch_datetime() + os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + assert modified(path) == modified_datetime diff --git a/scripts/files/tests/fs_s3_test.py b/scripts/files/tests/fs_s3_test.py index 3fbf99439..5885b35d0 100644 --- a/scripts/files/tests/fs_s3_test.py +++ b/scripts/files/tests/fs_s3_test.py @@ -3,12 +3,17 @@ from boto3 import client, resource from botocore.exceptions import ClientError from moto import mock_aws +from moto.core.models import DEFAULT_ACCOUNT_ID +from moto.s3.models import s3_backends from moto.s3.responses import DEFAULT_REGION_NAME +from moto.wafv2.models import GLOBAL_REGION +from mypy_boto3_s3 import S3Client from pytest import CaptureFixture, raises from pytest_subtests import SubTests from scripts.files.files_helper import ContentType -from scripts.files.fs_s3 import exists, list_files_in_uri, read, write +from scripts.files.fs_s3 import exists, list_files_in_uri, modified, read, write +from scripts.tests.datetimes_test import any_epoch_datetime @mock_aws @@ -156,3 +161,17 @@ def test_list_files_in_uri(subtests: SubTests) -> None: with subtests.test(): assert "data/image.tiff" not in files + + +@mock_aws +def test_should_get_modified_datetime() -> None: + bucket_name = "any-bucket-name" + key = "any-key" + modified_datetime = any_epoch_datetime() + + s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body") + s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime + + assert modified(bucket_name, key, s3_client) == modified_datetime diff --git a/scripts/files/tests/fs_test.py b/scripts/files/tests/fs_test.py index 4077b9740..71d55e8e9 100644 --- a/scripts/files/tests/fs_test.py +++ b/scripts/files/tests/fs_test.py @@ -1,14 +1,20 @@ import os +from pathlib import Path from shutil import rmtree from tempfile import mkdtemp -from boto3 import resource +from boto3 import client, resource from moto import mock_aws +from moto.core.models import DEFAULT_ACCOUNT_ID +from moto.s3.models import s3_backends from moto.s3.responses import DEFAULT_REGION_NAME +from moto.wafv2.models import GLOBAL_REGION +from mypy_boto3_s3 import S3Client from pytest import CaptureFixture, raises from pytest_subtests import SubTests -from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars +from scripts.files.fs import NoSuchFileError, modified, read, write, write_all, write_sidecars +from scripts.tests.datetimes_test import any_epoch_datetime def test_read_key_not_found_local() -> None: @@ -81,3 +87,25 @@ def test_write_sidecars_one_found(capsys: CaptureFixture[str], subtests: SubTest assert "wrote_sidecar_file" in logs rmtree(target) + + +@mock_aws +def test_should_get_s3_object_modified_datetime() -> None: + bucket_name = "any-bucket-name" + key = "any-key" + modified_datetime = any_epoch_datetime() + + s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body") + s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime + + assert modified(f"s3://{bucket_name}/{key}", s3_client) == modified_datetime + + +def test_should_get_local_file_modified_datetime(setup: str) -> None: + path = os.path.join(setup, "modified.file") + Path(path).touch() + modified_datetime = any_epoch_datetime() + os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + assert modified(path) == modified_datetime diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index 9bc703623..651f6b553 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -1,7 +1,9 @@ import os from typing import Any, Dict, Tuple +from scripts.datetimes import format_rfc_3339_datetime_string from scripts.files import fs +from scripts.files.fs import modified from scripts.stac.util import checksum from scripts.stac.util.STAC_VERSION import STAC_VERSION from scripts.stac.util.stac_extensions import StacExtensions @@ -12,6 +14,7 @@ class ImageryItem: def __init__(self, id_: str, file: str) -> None: file_content = fs.read(file) + file_modified_datetime = format_rfc_3339_datetime_string(modified(file)) self.stac = { "type": "Feature", "stac_version": STAC_VERSION, @@ -24,6 +27,8 @@ def __init__(self, id_: str, file: str) -> None: "href": os.path.join(".", os.path.basename(file)), "type": "image/tiff; application=geotiff; profile=cloud-optimized", "file:checksum": checksum.multihash_as_hex(file_content), + "created": file_modified_datetime, + "updated": file_modified_datetime, } }, "stac_extensions": [StacExtensions.file.value], diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index 453015f2b..9ad0d8125 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -1,14 +1,13 @@ import json import os import tempfile -from datetime import datetime +from datetime import datetime, timezone from shutil import rmtree from tempfile import mkdtemp from typing import Generator import pytest import shapely.geometry -from pytest_mock import MockerFixture from pytest_subtests import SubTests from scripts.files.fs import read @@ -17,6 +16,7 @@ from scripts.stac.imagery.metadata_constants import CollectionMetadata from scripts.stac.imagery.provider import Provider, ProviderRole from scripts.stac.util.stac_extensions import StacExtensions +from scripts.tests.datetimes_test import any_epoch_datetime # pylint: disable=duplicate-code @@ -113,10 +113,12 @@ def test_interval_updated_from_existing(metadata: CollectionMetadata) -> None: assert collection.stac["extent"]["temporal"]["interval"] == [["2021-01-27T00:00:00Z", "2021-02-20T00:00:00Z"]] -def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: SubTests) -> None: +def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None: collection = ImageryCollection(metadata) - mocker.patch("scripts.files.fs.read", return_value=b"") - item = ImageryItem("BR34_5000_0304", "./test/BR34_5000_0304.tiff") + item_file_path = "./scripts/tests/data/empty.tiff" + modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc) + os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + item = ImageryItem("BR34_5000_0304", item_file_path) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]], @@ -131,7 +133,7 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: with subtests.test(): assert { - "file:checksum": "1220a049888b3971d9ed3fd52b830cfeb379d7069d6b7a927456bcf1fabab0ec4f46", + "file:checksum": "122097b5d2b049c6ffdf608af28c4ba2744fad7f03046d1f58b2523402f30577f618", "rel": "item", "href": "./BR34_5000_0304.json", "type": "application/json", @@ -143,6 +145,10 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: with subtests.test(): assert collection.stac["extent"]["spatial"]["bbox"] == [bbox] + for property_name in ["created", "updated"]: + with subtests.test(msg=f"{property_name} property"): + assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z" + def test_write_collection(metadata: CollectionMetadata) -> None: target = mkdtemp() diff --git a/scripts/stac/imagery/tests/item_test.py b/scripts/stac/imagery/tests/item_test.py index 575551be0..4b3be4858 100644 --- a/scripts/stac/imagery/tests/item_test.py +++ b/scripts/stac/imagery/tests/item_test.py @@ -18,7 +18,7 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None: bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) mocker.patch("scripts.files.fs.read", return_value=b"") - path = "./test/BR34_5000_0302.tiff" + path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) start_datetime = "2021-01-27T00:00:00Z" end_datetime = "2021-01-27T00:00:00Z" @@ -74,7 +74,7 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No ulid = "fake_ulid" collection = ImageryCollection(metadata=metadata, collection_id=ulid) - path = "./test/BR34_5000_0302.tiff" + path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) mocker.patch("scripts.files.fs.read", return_value=b"") item = ImageryItem(id_, path)