From 8951012bbd31eaee020f03d4da2bdfcf949ac269 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Thu, 30 May 2024 15:37:29 +1200 Subject: [PATCH 1/3] refactor: Use idiomatic command format Options always come before other arguments, and key/value pairs should be separated by `=` for clarity. --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index d6351caa..50b751f8 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "license": "MIT", "scripts": { "format": "prettier -w template/ *.md", - "lint": "eslint . --quiet --fix --ignore-path .gitignore" + "lint": "eslint --quiet --fix --ignore-path=.gitignore ." }, "devDependencies": { "@linzjs/style": "^4.0.0" From c4934abace979ea752dcebc6b2cd6aca7536acb9 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Thu, 30 May 2024 15:38:44 +1200 Subject: [PATCH 2/3] refactor: Format entire repository --- .prettierignore | 1 + docs/naming.md | 4 +- docs/tiff-compression/README.md | 58 +++++++++++++-------------- docs/usage.md | 70 +++++++++++++++++++-------------- package.json | 2 +- 5 files changed, 72 insertions(+), 63 deletions(-) create mode 100644 .prettierignore diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..f3674da1 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +publish-odr-parameters diff --git a/docs/naming.md b/docs/naming.md index d78366f6..d0717004 100644 --- a/docs/naming.md +++ b/docs/naming.md @@ -44,7 +44,7 @@ which can be broken down as: ### S3 Path Restrictions -The _path_ is restricted to a limited set of characters with no whitespace: lowercase "a through "z", numbers "0" through "9", hyphen ("-"), and underscore ("_"). When generating a [dataset S3 path](#imagery-dataset-s3-paths), the system will pass through these characters unchanged to the path, and will transform many others to allowed characters - see the subsections for details. Any characters not mentioned in this section or subsections will result in an error. +The _path_ is restricted to a limited set of characters with no whitespace: lowercase "a through "z", numbers "0" through "9", hyphen ("-"), and underscore ("\_"). When generating a [dataset S3 path](#imagery-dataset-s3-paths), the system will pass through these characters unchanged to the path, and will transform many others to allowed characters - see the subsections for details. Any characters not mentioned in this section or subsections will result in an error. #### Uppercase characters @@ -60,7 +60,7 @@ These characters are replaced with a hyphen. For example, "Tikitapu/Blue Lake" i #### Apostrophes -These are *removed,* so "Hawke's Bay" is changed to "hawkes-bay". +These are _removed,_ so "Hawke's Bay" is changed to "hawkes-bay". #### Ampersands diff --git a/docs/tiff-compression/README.md b/docs/tiff-compression/README.md index aebd7b53..04914aff 100644 --- a/docs/tiff-compression/README.md +++ b/docs/tiff-compression/README.md @@ -17,7 +17,7 @@ LINZ is required to store the elevation data in a safe and secure manner while a ## Goals -As the data is stored in [AWS S3](https://aws.amazon.com/s3/) every GB of data stored costs LINZ [$0.0025 USD / month](https://aws.amazon.com/s3/pricing/?nc=sn&loc=4), to reduce costs for new zealand the imagery needs to be compressed to ensure cost efficient storage. +As the data is stored in [AWS S3](https://aws.amazon.com/s3/) every GB of data stored costs LINZ [$0.0025 USD / month](https://aws.amazon.com/s3/pricing/?nc=sn&loc=4), to reduce costs for new zealand the imagery needs to be compressed to ensure cost efficient storage. This data is large and will also be accessed remotely it should be stored in a cloud optimized format so users can extract only the parts of the data they need, [Cloud optimised geotiff (COG)](https://www.cogeo.org/) without downloading the entire TIFF. @@ -35,26 +35,25 @@ Stacked lerc compression was not tested due to increased testing complexity and GDAL provides a two options for most compression type -- Compression Level +- Compression Level - Predictor (none), Predictor 2 (Integer data type) or 3 (floating-point predictor) LERC has additional options for adjusting how much error is allowed, as the datasets generally have a ~20cm vertical accuracy, to keep the accuracy high only small amounts of allowed z error were tested (<5mm); - ZSTD (66 Tests) - - Predictor NO/1/2 - - Level 1-22 + - Predictor NO/1/2 + - Level 1-22 - Deflate (27 Tests) - - Predictor NO/1/2 - - level 1-9 + - Predictor NO/1/2 + - level 1-9 - LZW (3 Tests) - - Predictor NO/1/2 + - Predictor NO/1/2 - LERC (5 Tests) - - max z error: 0.1mm, 0.5mm, 1mm, 2mm, 5mm + - max z error: 0.1mm, 0.5mm, 1mm, 2mm, 5mm Testing data was selected from [CF15](https://data.linz.govt.nz/data/?mv.basemap=Streets&mv.content=layer.104687.color:003399.opacity:100,layer.109627.opacity:100&mv.zoom=9&mv.centre=169.8493143938876,-45.98894052690091) in [Otago - Coastal Catchments LiDAR 1m DEM (2021)](https://data.linz.govt.nz/layer/109627-otago-coastal-catchments-lidar-1m-dem-2021/) as it provided a mix of flat and hilly regions. which resulted in 2,999 source tiff files approximately 3.4GB -to provide a somewhat reproducible result A docker container [`gdal-ubuntu-small-3.7.0`](https://github.com/OSGeo/gdal/pkgs/container/gdal/91692621?tag=ubuntu-small-3.7.0) was used - +to provide a somewhat reproducible result A docker container [`gdal-ubuntu-small-3.7.0`](https://github.com/OSGeo/gdal/pkgs/container/gdal/91692621?tag=ubuntu-small-3.7.0) was used ### Process @@ -85,46 +84,45 @@ docker run \ /output/cf15-${compression}_predictor-${precdictor}_level-${level}_error-${error}.tiff ``` - ## Results - A full table of results can be found in the [output.tsv](./compression-results.tsv) Key results -|Id|File Size (mb)|Duration for 5 runs (ms)| -|-|-|-| -|base | 3456.00| 0| -|lzw_predictor-2 | 2686.27| 190926| -|deflate_predictor-2_level-9| 1982.15| 208897| -|zstd_predictor-2_level-17 | 1858.77| 469185| -|lerc_z-error-0.001 | 1319.11| 192573| - +| Id | File Size (mb) | Duration for 5 runs (ms) | +| --------------------------- | -------------- | ------------------------ | +| base | 3456.00 | 0 | +| lzw_predictor-2 | 2686.27 | 190926 | +| deflate_predictor-2_level-9 | 1982.15 | 208897 | +| zstd_predictor-2_level-17 | 1858.77 | 469185 | +| lerc_z-error-0.001 | 1319.11 | 192573 | LERC is by far the best compression and speed for converting the DEMS into COGs. if having a minor (1mm) error is allowed it is a huge savings. - As most of this work is for cost reduction for storage and egress of data, what level of cost reduction would this have for 1TB of input data. With the current [AWS S3 pricing](https://aws.amazon.com/s3/pricing/) the following metrics were used -Standard Access - used for frequently accessed data +Standard Access - used for frequently accessed data + - $0.025 / GB / month = $300 / TB /Year Infrequent Access - used for infrequently accessed data (most of our DEMs would fit into this category) + - $0.0138 / GB / month = $165 / TB / Year Egress - Cost to send the data out of AWS + - $0.114 / GB = $114.00 / TB -|Id|Cost/TB/Year (Standard)| Cost/TB/Year (Infrequent)|Egress (1 copy downloaded)| -|-|-|-|-| -|base | $300.00| $165.60| $114.00 | -|lzw_predictor-2 | $233.18| $128.71| $88.61 | -|deflate_predictor-2_level-9| $172.06| $94.78| $65.38 | -|zstd_predictor-2_level-17 | $161.35| $89.06| $61.31 | -|lerc_z-error-0.001 | $114.50| $63.20| $45.51 | +| Id | Cost/TB/Year (Standard) | Cost/TB/Year (Infrequent) | Egress (1 copy downloaded) | +| --------------------------- | ----------------------- | ------------------------- | -------------------------- | +| base | $300.00 | $165.60 | $114.00 | +| lzw_predictor-2 | $233.18 | $128.71 | $88.61 | +| deflate_predictor-2_level-9 | $172.06 | $94.78 | $65.38 | +| zstd_predictor-2_level-17 | $161.35 | $89.06 | $61.31 | +| lerc_z-error-0.001 | $114.50 | $63.20 | $45.51 | So using LERC (1mm) for 1TB of input data would result in approx $190 USD / year in storage costs savings, and $70 in savings for every copy of the data that was egressed out of AWS. @@ -147,7 +145,7 @@ sudo apt-get install libwebp-dev sudo apt-get install libzstd-dev # Download GDAL for specific QGIS version you already have installed -# e.g. QGIS 3.22.4 uses GDAL 3.4.1 +# e.g. QGIS 3.22.4 uses GDAL 3.4.1 wget -c http://download.osgeo.org/gdal/3.4.1/gdal-3.4.1.tar.gz tar -xvzf gdal-3.4.1.tar.gz cd gdal-3.4.1 diff --git a/docs/usage.md b/docs/usage.md index 9a5614c8..465aa617 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,7 +6,7 @@ Our public New Zealand Elevation bucket can be used with a number of application The STAC Catalog, https://nz-elevation.s3-ap-southeast-2.amazonaws.com/catalog.json, is the entry point of the LINZ elevation data. It points to the different STAC Collections representing a dataset. -> **_Note:_** The files in the *nz-elevation* bucket can be accessed via `s3://nz-elevation` or `https://nz-elevation.s3-ap-southeast-2.amazonaws.com/`. +> **_Note:_** The files in the _nz-elevation_ bucket can be accessed via `s3://nz-elevation` or `https://nz-elevation.s3-ap-southeast-2.amazonaws.com/`. ### Manually navigate through the STAC Catalog to a get a TIFF @@ -14,31 +14,34 @@ This is an example using the "Taranaki LiDAR 1m DEM (2021)" dataset. 1. From `catalog.json` get the Collection link for "Taranaki LiDAR 1m DEM (2021)" in the `links` list: - ```json - { - "rel": "child", - "href": "./taranaki/taranaki_2021/dem_1m/2193/collection.json", - "title": "Taranaki LiDAR 1m DEM (2021)", - "file:checksum": "122098144561ea4de8e880fb3b857b1ca07d32400189cf48cf9cad96921efdeb3f15", - "file:size": 25426 - }, - ``` + ```json + { + "rel": "child", + "href": "./taranaki/taranaki_2021/dem_1m/2193/collection.json", + "title": "Taranaki LiDAR 1m DEM (2021)", + "file:checksum": "122098144561ea4de8e880fb3b857b1ca07d32400189cf48cf9cad96921efdeb3f15", + "file:size": 25426 + }, + ``` + 2. The Collection can be accessed from this GitHub repository, `stac/taranaki/taranaki_2021/dem_1m/2193/collection.json` or from `https://nz-elevation.s3-ap-southeast-2.amazonaws.com/taranaki/taranaki_2021/dem_1m/2193/collection.json`. From the Collection get an Item link in the `links` list: - ```json - { "rel": "item", "href": "./BH31_10000_0403.json", "type": "application/json" } - ``` + ```json + { "rel": "item", "href": "./BH31_10000_0403.json", "type": "application/json" } + ``` + 3. The Item can be accessed from `https://nz-elevation.s3-ap-southeast-2.amazonaws.com/taranaki/taranaki_2021/dem_1m/2193/BH31_10000_0403.json`. A list of assets can be found: - ```json - "assets": { - "visual": { - "href": "./BH31_10000_0403.tiff", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "file:checksum": "122042af9034edaf0c2ff7522e26db4fb3a4a5835015f6dfc118acbd6c3f2b011ee5" - } - } - ``` + ```json + "assets": { + "visual": { + "href": "./BH31_10000_0403.tiff", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "file:checksum": "122042af9034edaf0c2ff7522e26db4fb3a4a5835015f6dfc118acbd6c3f2b011ee5" + } + } + ``` + 4. In this case, the TIFF can be accessed from `https://nz-elevation.s3-ap-southeast-2.amazonaws.com/taranaki/taranaki_2021/dem_1m/2193/BH31_10000_0403.tiff` > **_Note:_** Our TIFF files and STAC Item files use the same base name. Having the link to the STAC Item, you can determine the TIFF link by changing its suffix from `.json` to `.tiff`. @@ -79,6 +82,7 @@ Check the total size of all elevation data held for one region. ```shell s5cmd --no-sign-request du --humanize s3://nz-elevation/canterbury/* ``` + ``` 103.5G bytes in 7784 objects: s3://nz-elevation/canterbury/* ``` @@ -88,6 +92,7 @@ Check the total size of a specific elevation dataset and limit to just the eleva ```shell s5cmd --no-sign-request du --humanize s3://nz-elevation/canterbury/christchurch_2020-2021/dem_1m/2193/*.tiff ``` + ``` 592.5M bytes in 27 objects: s3://nz-elevation/canterbury/christchurch_2020-2021/dem_1m/2193/*.tiff ``` @@ -178,7 +183,7 @@ Upper Right ( 1746400.000, 5672400.000) (174d41'33.63"E, 39d 5' 6.88"S) Lower Right ( 1746400.000, 5665200.000) (174d41'39.22"E, 39d 9' 0.35"S) Center ( 1744000.000, 5668800.000) (174d39'56.52"E, 39d 7' 5.05"S) Band 1 Block=512x512 Type=Float32, ColorInterp=Gray - Min=130.799 Max=454.541 + Min=130.799 Max=454.541 Minimum=130.799, Maximum=454.541, Mean=289.727, StdDev=59.772 NoData Value=-9999 Overviews: 2400x3600, 1200x1800, 600x900, 300x450 @@ -202,8 +207,8 @@ Band 1 Block=512x512 Type=Float32, ColorInterp=Gray 4. Paste the HTTPS URL to the TIFF, for example `https://nz-elevation.s3-ap-southeast-2.amazonaws.com/taranaki/taranaki_2021/dem_1m/2193/BH31_10000_0403.tiff` 5. Click on the "Add" button and wait for the file to load. - ![Data Source Manager](img/usage/qgis_data-source-manager.png) - ![QGIS View TIFF](img/usage/qgis_visualisation.png) + ![Data Source Manager](img/usage/qgis_data-source-manager.png) + ![QGIS View TIFF](img/usage/qgis_visualisation.png) ## ArcGIS Pro @@ -211,14 +216,17 @@ Band 1 Block=512x512 Type=Float32, ColorInterp=Gray 1. On the "Insert" ribbon, select "Connections" then "Cloud Store" then "New Cloud Storage Connection". - ![ArcGIS Pro New Cloud Storage Connection](img/usage/arcgis_pro_new_cloud_connection.png) + ![ArcGIS Pro New Cloud Storage Connection](img/usage/arcgis_pro_new_cloud_connection.png) + 2. In the "Create Cloud Storage Connection" dialog, add a "Connection File Name" e.g. `New Zealand Elevation`, "Service Provider" as `AMAZON`, "Bucket Name" as `nz-elevation`, "Region" as `Asia Pacific (Sydney)` and "Service Endpoint" as `s3.ap-southeast-2.amazon.com`. 3. Because this is a Public Bucket, add a "Provider Option" with "Name" of `AWS_NO_SIGN_REQUEST` and "Value" of `YES`. This means that you won't need an Access Key ID or Secret Access Key. - ![ArcGIS Pro Cloud Storage Connection Details](img/usage/arcgis_pro_connection_details.png) + ![ArcGIS Pro Cloud Storage Connection Details](img/usage/arcgis_pro_connection_details.png) + 4. Click "OK". The cloud storage connection will appear in the "Catalog" pane, where the bucket structure can be explored. - ![ArcGIS Pro Catalog View](img/usage/arcgis_pro_catalog_view.png) + ![ArcGIS Pro Catalog View](img/usage/arcgis_pro_catalog_view.png) + 5. From here you can add individual TIFFs to the map, export them to different data formats, etc. ## Cyberduck @@ -231,8 +239,10 @@ Band 1 Block=512x512 Type=Float32, ColorInterp=Gray 2. Expand the "Connection Profiles" dropdown (that defaults to FTP) and select "More Options" at the bottom of the list. Search using "HTTPS" to find the `S3 (HTTPS)` connection profile. Select it, then close the "Preferences" window. 3. Add a "Server" of `s3-ap-southeast-2.amazonaws.com`, check the "Anonymous Login" checkbox and add a "Path" of `nz-elevation`. - ![Cyberduck Open Connection](img/usage/cyberduck_open_connection.png) + ![Cyberduck Open Connection](img/usage/cyberduck_open_connection.png) + 4. Click "Connect". The top level of the bucket will be displayed and can be explored. - ![Cyberduck Bucket View](img/usage/cyberduck_bucket_view.png) + ![Cyberduck Bucket View](img/usage/cyberduck_bucket_view.png) + 5. From here you can use "Get Info" to calculate the size of particular directories or datasets and right-click to "Download" or "Synchronize". diff --git a/package.json b/package.json index 50b751f8..b9b07224 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "repository": "git@github.com:linz/elevation.git", "license": "MIT", "scripts": { - "format": "prettier -w template/ *.md", + "format": "prettier --check --write .", "lint": "eslint --quiet --fix --ignore-path=.gitignore ." }, "devDependencies": { From 0d2544aa7465780bac75e380039d0e6c8219a846 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Thu, 30 May 2024 15:39:23 +1200 Subject: [PATCH 3/3] feat: Enforce Prettier formatting --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index b9b07224..e442307b 100644 --- a/package.json +++ b/package.json @@ -5,8 +5,7 @@ "repository": "git@github.com:linz/elevation.git", "license": "MIT", "scripts": { - "format": "prettier --check --write .", - "lint": "eslint --quiet --fix --ignore-path=.gitignore ." + "lint": "eslint --quiet --fix --ignore-path=.gitignore . && prettier --check --write ." }, "devDependencies": { "@linzjs/style": "^4.0.0"