Skip to content

Commit

Permalink
Adding support for dataset categories (for now optional)
Browse files Browse the repository at this point in the history
Categories will be added to keywords.

PiperOrigin-RevId: 729274084
  • Loading branch information
simonff authored and copybara-github committed Feb 20, 2025
1 parent 7c3daa6 commit 3f538cd
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 0 deletions.
2 changes: 2 additions & 0 deletions checker/node/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from checker.node import feature_view
from checker.node import file_path
from checker.node import gee_bitmask
from checker.node import gee_categories
from checker.node import gee_classes
from checker.node import gee_is_derived
from checker.node import gee_provider_ids
Expand Down Expand Up @@ -69,6 +70,7 @@
version_extension.Check,
# gee extensions
gee_bitmask.Check,
gee_categories.Check,
gee_classes.Check,
gee_is_derived.Check,
gee_provider_ids.Check,
Expand Down
62 changes: 62 additions & 0 deletions checker/node/gee_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Checks that the "gee:categories" field is valid."""

from typing import Iterator

from checker import stac

# Use dashes, not underscores, as the HTML catalog converts
# underscores to dashes anyway.
CATEGORIES = frozenset([
'agriculture',
'atmosphere',
'climate',
'cryosphere',
'ecosystems',
'elevation-topography',
'fire',
'forest-biomass',
'hydrology',
'infrastructure-boundaries',
'landuse-landcover',
'oceanography',
'orthophotos',
'plant-productivity',
'population',
'precipitation',
'satellite-imagery',
'soil',
'vegetation-indices',
])

GEE_CATEGORIES = 'gee:categories'


class Check(stac.NodeCheck):
"""Checks the gee:categories field."""

name = 'gee_categories'

@classmethod
def run(cls, node: stac.Node) -> Iterator[stac.Issue]:
if node.type == stac.StacType.CATALOG:
return

if GEE_CATEGORIES not in node.stac:
# TODO(b/397988701): require categories when all datasets have them
# yield cls.new_issue(node, f'Missing {GEE_CATEGORIES}')
return

categories = node.stac[GEE_CATEGORIES]
if not categories:
yield cls.new_issue(node, f'"{GEE_CATEGORIES}" must not be empty')
return

if not isinstance(categories, list):
yield cls.new_issue(node, f'"{GEE_CATEGORIES}" must be a list of strings')
return

extra_categories = set(categories) - CATEGORIES
for extra in extra_categories:
yield cls.new_issue(
node, f'Found unknown category "{extra}" in "{GEE_CATEGORIES}"'
)
38 changes: 38 additions & 0 deletions checker/node/gee_categories_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from absl.testing import absltest
from checker import test_utils
from checker.node import gee_categories


class GeeCategoiresTest(test_utils.NodeTest):

def setUp(self):
super().setUp()
self.check = gee_categories.Check

def test_catalog(self):
self.assert_catalog({})

def test_not_a_list(self):
self.assert_collection(
{'gee:categories': 77}, '"gee:categories" must be a list of strings'
)

def test_missing_ok(self):
self.assert_collection({})

# TODO(b/397988701): require categories when all datasets have them
# def test_empty_categories_is_bad(self):
# self.assert_collection({'gee:categories': []}, 'must not be empty')

def test_valid_category(self):
self.assert_collection({'gee:categories': ['soil']})

def test_bad_category(self):
self.assert_collection(
{'gee:categories': ['bogus']},
'Found unknown category "bogus" in "gee:categories"',
)


if __name__ == '__main__':
absltest.main()
1 change: 1 addition & 0 deletions checker/node/top_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
COLLECTION_ALLOWED_FIELDS = [
'description',
'extent',
'gee:categories',
'gee:interval',
'gee:is_derived',
'gee:min_zoom_level',
Expand Down

0 comments on commit 3f538cd

Please sign in to comment.