diff --git a/.github/workflows/format-tests.yml b/.github/workflows/format-tests.yml index d8d287857..0beefb408 100644 --- a/.github/workflows/format-tests.yml +++ b/.github/workflows/format-tests.yml @@ -32,23 +32,23 @@ jobs: - name: End to end test - Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true cmp --silent "${{ runner.temp }}/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff - name: End to end test - Elevation run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30m --create-footprints=true cmp --silent "${{ runner.temp }}/BK39_10000_0102.tiff" ./scripts/tests/data/output/BK39_10000_0102.tiff cmp --silent "${{ runner.temp }}/BK39_10000_0101.tiff" ./scripts/tests/data/output/BK39_10000_0101.tiff - name: End to end test - Historical Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60m --create-footprints=true cmp --silent "${{ runner.temp }}/BQ31_5000_0608.tiff" ./scripts/tests/data/output/BQ31_5000_0608.tiff - name: End to end test - Cutline (Aerial Imagery) run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10m --create-footprints=true cmp --silent "${{ runner.temp }}/cutline/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829_cut.tiff - name: End to end test - Thumbnails (Topo50/Topo250) @@ -59,7 +59,7 @@ jobs: - name: End to end test - Restandardise Aerial Imagery run: | - docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m + docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true cmp --silent "${{ runner.temp }}/restandardise/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff - name: End to end test - Translate Ascii Files (Elevation) @@ -69,7 +69,7 @@ jobs: - name: End to end test - Remove empty files run: | - docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60m + docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60m --create-footprints=true empty_target_directory="$(find "${{ runner.temp }}/tmp-empty" -maxdepth 0 -type d -empty)" [[ -n "$empty_target_directory" ]] diff --git a/CHANGELOG.md b/CHANGELOG.md index a78f18ebc..7506d438d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## [4.6.0](https://github.com/linz/topo-imagery/compare/v4.5.0...v4.6.0) (2024-05-07) + + +### Features + +* add checksum to item links in collections TDE-1138 ([#953](https://github.com/linz/topo-imagery/issues/953)) ([afea8f0](https://github.com/linz/topo-imagery/commit/afea8f0f9722d0980657385e420f040e93085eef)) +* add option to create footprints ([#959](https://github.com/linz/topo-imagery/issues/959)) ([ea5c98b](https://github.com/linz/topo-imagery/commit/ea5c98baa584c5445dcbedf159d409dfeeddf7ce)) + + +### Bug Fixes + +* Make compatible with latest moto ([#949](https://github.com/linz/topo-imagery/issues/949)) ([5902df0](https://github.com/linz/topo-imagery/commit/5902df0fec6dd15bb3a89ec616a2c27136a6aec5)) +* Use correct types for S3 client ([#954](https://github.com/linz/topo-imagery/issues/954)) ([2c96c13](https://github.com/linz/topo-imagery/commit/2c96c1382d521d641e2b167e4096759af9b13dce)) + ## [4.5.0](https://github.com/linz/topo-imagery/compare/v4.4.0...v4.5.0) (2024-03-24) diff --git a/README.md b/README.md index d8195b5f5..ee676671b 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Run `docker run topo-imagery python standardise_validate.py --help` to get the l - Example of local execution. This example uses the test data available on this repo and create the output will be created in a `~/tmp/` on the local machine (volume share with `Docker`): ```bash -docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10m +docker run -v ${HOME}/tmp/:/tmp/:rw topo-imagery python standardise_validate.py --preset webp --from-file ./tests/data/aerial.json --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --target /tmp/ --source-epsg 2193 --target-epsg 2193 --gsd 10m --create-footprints=true ``` To use an AWS test dataset (input located in an AWS S3 bucket), log into the AWS account and add the following arguments to the `docker run` command: diff --git a/pyproject.toml b/pyproject.toml index bb5bbb2b8..56c03d3eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore_missing_imports = true [tool.poetry] name = "topo-imagery" -version = "4.5.0" +version = "4.6.0" description = "A collection of scripts for processing imagery" authors = [ "Blayne Chard ", diff --git a/scripts/files/fs.py b/scripts/files/fs.py index a037457a1..359846c75 100644 --- a/scripts/files/fs.py +++ b/scripts/files/fs.py @@ -1,6 +1,8 @@ import os from concurrent.futures import Future, ThreadPoolExecutor, as_completed -from typing import List, Optional +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional from boto3 import resource from linz_logger import get_log @@ -8,6 +10,11 @@ from scripts.aws.aws_helper import is_s3 from scripts.files import fs_local, fs_s3 +if TYPE_CHECKING: + from mypy_boto3_s3 import S3Client +else: + S3Client = dict + def write(destination: str, source: bytes, content_type: Optional[str] = None) -> str: """Write a file from its source to a destination path. @@ -79,6 +86,13 @@ def exists(path: str) -> bool: return fs_local.exists(path) +def modified(path: str, s3_client: Optional[S3Client] = None) -> datetime: + """Get modified datetime for S3 URL or local path""" + if is_s3(path): + return fs_s3.modified(fs_s3.bucket_name_from_path(path), fs_s3.prefix_from_path(path), s3_client) + return fs_local.modified(Path(path)) + + def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> List[str]: """Writes list of files to target destination using multithreading. diff --git a/scripts/files/fs_local.py b/scripts/files/fs_local.py index ca2d5bdd8..8f174983b 100644 --- a/scripts/files/fs_local.py +++ b/scripts/files/fs_local.py @@ -1,4 +1,6 @@ import os +from datetime import datetime, timezone +from pathlib import Path def write(destination: str, source: bytes) -> None: @@ -36,3 +38,9 @@ def exists(path: str) -> bool: True if the path exists """ return os.path.exists(path) + + +def modified(path: Path) -> datetime: + """Get path modified datetime as UTC""" + modified_timestamp = os.path.getmtime(path) + return datetime.fromtimestamp(modified_timestamp, tz=timezone.utc) diff --git a/scripts/files/fs_s3.py b/scripts/files/fs_s3.py index 6d2d47b2d..a5296f1f0 100644 --- a/scripts/files/fs_s3.py +++ b/scripts/files/fs_s3.py @@ -1,5 +1,6 @@ from concurrent import futures from concurrent.futures import ThreadPoolExecutor +from datetime import datetime from typing import TYPE_CHECKING, Any, Generator, List, Optional, Union from boto3 import client, resource @@ -237,3 +238,8 @@ def get_object_parallel_multithreading( yield key, future.result() else: yield key, exception + + +def modified(bucket_name: str, key: str, s3_client: Optional[S3Client]) -> datetime: + s3_client = s3_client or client("s3") + return _get_object(bucket_name, key, s3_client)["LastModified"] diff --git a/scripts/files/tests/conftest.py b/scripts/files/tests/conftest.py new file mode 100644 index 000000000..1fbf5d9a3 --- /dev/null +++ b/scripts/files/tests/conftest.py @@ -0,0 +1,17 @@ +from shutil import rmtree +from tempfile import mkdtemp +from typing import Generator + +import pytest + + +@pytest.fixture(name="setup", autouse=True) +def fixture_setup() -> Generator[str, None, None]: + """ + This function creates a temporary directory and deletes it after each test. + See following link for details: + https://docs.pytest.org/en/stable/fixture.html#yield-fixtures-recommended + """ + target = mkdtemp() + yield target + rmtree(target) diff --git a/scripts/files/tests/fs_local_test.py b/scripts/files/tests/fs_local_test.py index db08e1d84..780387060 100644 --- a/scripts/files/tests/fs_local_test.py +++ b/scripts/files/tests/fs_local_test.py @@ -1,23 +1,10 @@ import os -from shutil import rmtree -from tempfile import mkdtemp -from typing import Generator +from pathlib import Path import pytest -from scripts.files.fs_local import exists, read, write - - -@pytest.fixture(name="setup", autouse=True) -def fixture_setup() -> Generator[str, None, None]: - """ - This function creates a temporary directory and deletes it after each test. - See following link for details: - https://docs.pytest.org/en/stable/fixture.html#yield-fixtures-recommended - """ - target = mkdtemp() - yield target - rmtree(target) +from scripts.files.fs_local import exists, modified, read, write +from scripts.tests.datetimes_test import any_epoch_datetime @pytest.mark.dependency(name="write") @@ -58,3 +45,11 @@ def test_exists(setup: str) -> None: def test_exists_file_not_found() -> None: found = exists("/tmp/test.file") assert found is False + + +def test_should_get_modified_datetime(setup: str) -> None: + path = Path(os.path.join(setup, "modified.file")) + path.touch() + modified_datetime = any_epoch_datetime() + os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + assert modified(path) == modified_datetime diff --git a/scripts/files/tests/fs_s3_test.py b/scripts/files/tests/fs_s3_test.py index 3fbf99439..5885b35d0 100644 --- a/scripts/files/tests/fs_s3_test.py +++ b/scripts/files/tests/fs_s3_test.py @@ -3,12 +3,17 @@ from boto3 import client, resource from botocore.exceptions import ClientError from moto import mock_aws +from moto.core.models import DEFAULT_ACCOUNT_ID +from moto.s3.models import s3_backends from moto.s3.responses import DEFAULT_REGION_NAME +from moto.wafv2.models import GLOBAL_REGION +from mypy_boto3_s3 import S3Client from pytest import CaptureFixture, raises from pytest_subtests import SubTests from scripts.files.files_helper import ContentType -from scripts.files.fs_s3 import exists, list_files_in_uri, read, write +from scripts.files.fs_s3 import exists, list_files_in_uri, modified, read, write +from scripts.tests.datetimes_test import any_epoch_datetime @mock_aws @@ -156,3 +161,17 @@ def test_list_files_in_uri(subtests: SubTests) -> None: with subtests.test(): assert "data/image.tiff" not in files + + +@mock_aws +def test_should_get_modified_datetime() -> None: + bucket_name = "any-bucket-name" + key = "any-key" + modified_datetime = any_epoch_datetime() + + s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body") + s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime + + assert modified(bucket_name, key, s3_client) == modified_datetime diff --git a/scripts/files/tests/fs_test.py b/scripts/files/tests/fs_test.py index 4077b9740..71d55e8e9 100644 --- a/scripts/files/tests/fs_test.py +++ b/scripts/files/tests/fs_test.py @@ -1,14 +1,20 @@ import os +from pathlib import Path from shutil import rmtree from tempfile import mkdtemp -from boto3 import resource +from boto3 import client, resource from moto import mock_aws +from moto.core.models import DEFAULT_ACCOUNT_ID +from moto.s3.models import s3_backends from moto.s3.responses import DEFAULT_REGION_NAME +from moto.wafv2.models import GLOBAL_REGION +from mypy_boto3_s3 import S3Client from pytest import CaptureFixture, raises from pytest_subtests import SubTests -from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars +from scripts.files.fs import NoSuchFileError, modified, read, write, write_all, write_sidecars +from scripts.tests.datetimes_test import any_epoch_datetime def test_read_key_not_found_local() -> None: @@ -81,3 +87,25 @@ def test_write_sidecars_one_found(capsys: CaptureFixture[str], subtests: SubTest assert "wrote_sidecar_file" in logs rmtree(target) + + +@mock_aws +def test_should_get_s3_object_modified_datetime() -> None: + bucket_name = "any-bucket-name" + key = "any-key" + modified_datetime = any_epoch_datetime() + + s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body") + s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime + + assert modified(f"s3://{bucket_name}/{key}", s3_client) == modified_datetime + + +def test_should_get_local_file_modified_datetime(setup: str) -> None: + path = os.path.join(setup, "modified.file") + Path(path).touch() + modified_datetime = any_epoch_datetime() + os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + assert modified(path) == modified_datetime diff --git a/scripts/json_codec.py b/scripts/json_codec.py new file mode 100644 index 000000000..2318ded75 --- /dev/null +++ b/scripts/json_codec.py @@ -0,0 +1,17 @@ +import json +from typing import Any, Dict + + +def dict_to_json_bytes(input_dict: Dict[str, Any]) -> bytes: + """ + Try to convert a `dict` into UTF-8 encoded `bytes` representing a JSON dictionary + + Examples: + >>> dict_to_json_bytes({}) + b'{}' + >>> dict_to_json_bytes({"ā": "😀"}) # Unicode code points U+0101 and U+1F600 + b'{"\xc4\x81": "\xf0\x9f\x98\x80"}' + >>> json.loads(dict_to_json_bytes({"ā": "😀"})) + {'ā': '😀'} + """ + return json.dumps(input_dict, ensure_ascii=False).encode("utf-8") diff --git a/scripts/stac/imagery/collection.py b/scripts/stac/imagery/collection.py index 1c16e450e..03dd8340c 100644 --- a/scripts/stac/imagery/collection.py +++ b/scripts/stac/imagery/collection.py @@ -1,4 +1,3 @@ -import json import os from typing import Any, Dict, List, Optional @@ -8,6 +7,7 @@ from scripts.datetimes import format_rfc_3339_datetime_string, parse_rfc_3339_datetime from scripts.files.files_helper import ContentType from scripts.files.fs import write +from scripts.json_codec import dict_to_json_bytes from scripts.stac.imagery.capture_area import generate_capture_area, gsd_to_float from scripts.stac.imagery.metadata_constants import ( DATA_CATEGORIES, @@ -95,7 +95,7 @@ def add_capture_area(self, polygons: List[shapely.geometry.shape], target: str, # The GSD is measured in meters (e.g., `0.3m`) capture_area_document = generate_capture_area(polygons, gsd_to_float(self.metadata["gsd"])) - capture_area_content: bytes = json.dumps(capture_area_document).encode("utf-8") + capture_area_content: bytes = dict_to_json_bytes(capture_area_document) file_checksum = checksum.multihash_as_hex(capture_area_content) capture_area = { "href": f"./{CAPTURE_AREA_FILE_NAME}", @@ -129,20 +129,21 @@ def add_item(self, item: Dict[Any, Any]) -> None: item: STAC Item to add """ item_self_link = next((feat for feat in item["links"] if feat["rel"] == "self"), None) + file_checksum = checksum.multihash_as_hex(dict_to_json_bytes(item)) if item_self_link: - self.add_link(href=item_self_link["href"]) + self.add_link(href=item_self_link["href"], file_checksum=file_checksum) self.update_temporal_extent(item["properties"]["start_datetime"], item["properties"]["end_datetime"]) self.update_spatial_extent(item["bbox"]) - def add_link(self, href: str, rel: str = "item", file_type: str = "application/json") -> None: + def add_link(self, href: str, file_checksum: str) -> None: """Add a `link` to the existing `links` list of the Collection. Args: href: path - rel: type of link. Defaults to "item". - file_type: type of file pointed by the link. Defaults to "application/json". + file_checksum: Optional checksum of file. """ - self.stac["links"].append({"rel": rel, "href": href, "type": file_type}) + link = {"rel": "item", "href": href, "type": "application/json", "file:checksum": file_checksum} + self.stac["links"].append(link) def add_providers(self, providers: List[Provider]) -> None: """Add a list of Providers to the existing list of `providers` of the Collection. @@ -234,7 +235,7 @@ def write_to(self, destination: str) -> None: Args: destination: path of the destination """ - write(destination, json.dumps(self.stac, ensure_ascii=False).encode("utf-8"), content_type=ContentType.JSON.value) + write(destination, dict_to_json_bytes(self.stac), content_type=ContentType.JSON.value) def _title(self) -> str: """Generates the title for imagery and elevation datasets. diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index 22267f5e5..651f6b553 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -1,7 +1,9 @@ import os from typing import Any, Dict, Tuple +from scripts.datetimes import format_rfc_3339_datetime_string from scripts.files import fs +from scripts.files.fs import modified from scripts.stac.util import checksum from scripts.stac.util.STAC_VERSION import STAC_VERSION from scripts.stac.util.stac_extensions import StacExtensions @@ -12,6 +14,7 @@ class ImageryItem: def __init__(self, id_: str, file: str) -> None: file_content = fs.read(file) + file_modified_datetime = format_rfc_3339_datetime_string(modified(file)) self.stac = { "type": "Feature", "stac_version": STAC_VERSION, @@ -24,6 +27,8 @@ def __init__(self, id_: str, file: str) -> None: "href": os.path.join(".", os.path.basename(file)), "type": "image/tiff; application=geotiff; profile=cloud-optimized", "file:checksum": checksum.multihash_as_hex(file_content), + "created": file_modified_datetime, + "updated": file_modified_datetime, } }, "stac_extensions": [StacExtensions.file.value], @@ -36,11 +41,10 @@ def update_datetime(self, start_datetime: str, end_datetime: str) -> None: start_datetime: a start date in `YYYY-MM-DD` format end_datetime: a end date in `YYYY-MM-DD` format """ - self.stac["properties"] = { - "start_datetime": start_datetime, - "end_datetime": end_datetime, - "datetime": None, - } + self.stac.setdefault("properties", {}) + self.stac["properties"]["start_datetime"] = start_datetime + self.stac["properties"]["end_datetime"] = end_datetime + self.stac["properties"]["datetime"] = None # FIXME: redefine the 'Any' def update_spatial(self, geometry: Dict[str, Any], bbox: Tuple[float, ...]) -> None: diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index 189244cd8..9ad0d8125 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -1,14 +1,13 @@ import json import os import tempfile -from datetime import datetime +from datetime import datetime, timezone from shutil import rmtree from tempfile import mkdtemp from typing import Generator import pytest import shapely.geometry -from pytest_mock import MockerFixture from pytest_subtests import SubTests from scripts.files.fs import read @@ -17,6 +16,7 @@ from scripts.stac.imagery.metadata_constants import CollectionMetadata from scripts.stac.imagery.provider import Provider, ProviderRole from scripts.stac.util.stac_extensions import StacExtensions +from scripts.tests.datetimes_test import any_epoch_datetime # pylint: disable=duplicate-code @@ -113,10 +113,12 @@ def test_interval_updated_from_existing(metadata: CollectionMetadata) -> None: assert collection.stac["extent"]["temporal"]["interval"] == [["2021-01-27T00:00:00Z", "2021-02-20T00:00:00Z"]] -def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: SubTests) -> None: +def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None: collection = ImageryCollection(metadata) - mocker.patch("scripts.files.fs.read", return_value=b"") - item = ImageryItem("BR34_5000_0304", "./test/BR34_5000_0304.tiff") + item_file_path = "./scripts/tests/data/empty.tiff" + modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc) + os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + item = ImageryItem("BR34_5000_0304", item_file_path) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]], @@ -130,7 +132,12 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: collection.add_item(item.stac) with subtests.test(): - assert {"rel": "item", "href": "./BR34_5000_0304.json", "type": "application/json"} in collection.stac["links"] + assert { + "file:checksum": "122097b5d2b049c6ffdf608af28c4ba2744fad7f03046d1f58b2523402f30577f618", + "rel": "item", + "href": "./BR34_5000_0304.json", + "type": "application/json", + } in collection.stac["links"] with subtests.test(): assert collection.stac["extent"]["temporal"]["interval"] == [[start_datetime, end_datetime]] @@ -138,6 +145,10 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: with subtests.test(): assert collection.stac["extent"]["spatial"]["bbox"] == [bbox] + for property_name in ["created", "updated"]: + with subtests.test(msg=f"{property_name} property"): + assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z" + def test_write_collection(metadata: CollectionMetadata) -> None: target = mkdtemp() diff --git a/scripts/stac/imagery/tests/item_test.py b/scripts/stac/imagery/tests/item_test.py index 575551be0..4b3be4858 100644 --- a/scripts/stac/imagery/tests/item_test.py +++ b/scripts/stac/imagery/tests/item_test.py @@ -18,7 +18,7 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None: bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) mocker.patch("scripts.files.fs.read", return_value=b"") - path = "./test/BR34_5000_0302.tiff" + path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) start_datetime = "2021-01-27T00:00:00Z" end_datetime = "2021-01-27T00:00:00Z" @@ -74,7 +74,7 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No ulid = "fake_ulid" collection = ImageryCollection(metadata=metadata, collection_id=ulid) - path = "./test/BR34_5000_0302.tiff" + path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) mocker.patch("scripts.files.fs.read", return_value=b"") item = ImageryItem(id_, path) diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index 95b32142f..07f20dab0 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -1,5 +1,4 @@ import argparse -import json import os import sys from typing import List @@ -11,10 +10,19 @@ from scripts.files.files_helper import SUFFIX_JSON, ContentType from scripts.files.fs import exists, write from scripts.gdal.gdal_helper import get_srs, get_vfs_path +from scripts.json_codec import dict_to_json_bytes from scripts.stac.imagery.create_stac import create_item from scripts.standardising import run_standardising +def str_to_bool(value: str) -> bool: + if value == "true": + return True + if value == "false": + return False + raise argparse.ArgumentTypeError(f"Invalid boolean (must be exactly 'true' or 'false'): {value}") + + def main() -> None: # pylint: disable-msg=too-many-locals parser = argparse.ArgumentParser() @@ -30,6 +38,13 @@ def main() -> None: help="The target EPSG code. If different to source the imagery will be reprojected", ) parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True) + parser.add_argument( + "--create-footprints", + dest="create_footprints", + help="Create footprints for each tile ('true' / 'false')", + type=str_to_bool, + required=True, + ) parser.add_argument("--cutline", dest="cutline", help="Optional cutline to cut imagery to", required=False, nargs="?") parser.add_argument("--collection-id", dest="collection_id", help="Unique id for collection", required=True) parser.add_argument( @@ -60,6 +75,7 @@ def main() -> None: arguments.source_epsg, arguments.target_epsg, arguments.gsd, + arguments.create_footprints, arguments.target, ) @@ -102,7 +118,7 @@ def main() -> None: item = create_item( file.get_path_standardised(), start_datetime, end_datetime, arguments.collection_id, file.get_gdalinfo() ) - write(stac_item_path, json.dumps(item.stac).encode("utf-8"), content_type=ContentType.GEOJSON.value) + write(stac_item_path, dict_to_json_bytes(item.stac), content_type=ContentType.GEOJSON.value) get_log().info("stac_saved", path=stac_item_path) diff --git a/scripts/standardising.py b/scripts/standardising.py index 0123b5a65..cfb170609 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -34,6 +34,7 @@ def run_standardising( source_epsg: str, target_epsg: str, gsd: str, + create_footprints: bool, target_output: str = "/tmp/", ) -> List[FileTiff]: """Run `standardising()` in parallel (`concurrency`). @@ -68,6 +69,7 @@ def run_standardising( target_epsg=target_epsg, target_output=target_output, gsd=gsd, + create_footprints=create_footprints, cutline=cutline, ), todo, @@ -107,6 +109,7 @@ def standardising( source_epsg: str, target_epsg: str, gsd: str, + create_footprints: bool, cutline: Optional[str], target_output: str = "/tmp/", ) -> Optional[FileTiff]: @@ -212,25 +215,27 @@ def standardising( with TiffFile(standardized_working_path) as file_handle: if any(tile_byte_count != 0 for tile_byte_count in file_handle.pages.first.tags["TileByteCounts"].value): - # Create footprint GeoJSON - run_gdal( - [ - "gdal_footprint", - "-t_srs", - EpsgCode.EPSG_4326, - "-max_points", - "unlimited", - "-simplify", - str(get_buffer_distance(gsd_to_float(gsd))), - ], - standardized_working_path, - footprint_tmp_path, - ) - write( - footprint_file_path, - read(footprint_tmp_path), - content_type=ContentType.GEOJSON.value, - ) + if create_footprints: + # Create footprint GeoJSON + run_gdal( + [ + "gdal_footprint", + "-t_srs", + EpsgCode.EPSG_4326, + "-max_points", + "unlimited", + "-simplify", + str(get_buffer_distance(gsd_to_float(gsd))), + ], + standardized_working_path, + footprint_tmp_path, + ) + write( + footprint_file_path, + read(footprint_tmp_path), + content_type=ContentType.GEOJSON.value, + ) + write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value) return tiff diff --git a/scripts/tests/datetimes_test.py b/scripts/tests/datetimes_test.py index 044d3d646..ffb8d3b95 100644 --- a/scripts/tests/datetimes_test.py +++ b/scripts/tests/datetimes_test.py @@ -40,9 +40,7 @@ def test_should_raise_error_when_formatting_a_naive_datetime(subtests: SubTests) def test_should_be_able_to_invert_conversion() -> None: - start = datetime(1800, 1, 1, tzinfo=timezone.utc) - end = datetime(2100, 1, 1, tzinfo=timezone.utc) - original_datetime = any_datetime_between(start, end) + original_datetime = any_epoch_datetime() assert parse_rfc_3339_datetime(format_rfc_3339_datetime_string(original_datetime)) == original_datetime @@ -51,6 +49,15 @@ def test_should_format_rfc_3339_nz_midnight_datetime_string() -> None: assert format_rfc_3339_nz_midnight_datetime_string(datetime_object) == "2001-02-02T11:00:00Z" +def any_epoch_datetime() -> datetime: + """ + Get arbitrary datetime + """ + start = datetime(1970, 1, 1, tzinfo=timezone.utc) + end = datetime(2100, 1, 1, tzinfo=timezone.utc) + return any_datetime_between(start, end) + + def any_datetime_between(start: datetime, end: datetime) -> datetime: """ Get arbitrary datetime between start (inclusive) and end (exclusive), with second precision.