From 5eec5a6de5325145c728c2e5200e79193b2bf935 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Tue, 16 Jul 2024 16:20:13 -0400
Subject: [PATCH 01/15] using dataclasses to define metadata schema.

---
 src/geometamaker/__init__.py |   1 +
 src/geometamaker/models.py   | 195 +++++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 src/geometamaker/models.py

diff --git a/src/geometamaker/__init__.py b/src/geometamaker/__init__.py
index 9f56a76..3b8ae6c 100644
--- a/src/geometamaker/__init__.py
+++ b/src/geometamaker/__init__.py
@@ -1 +1,2 @@
 from .geometamaker import MetadataControl
+from .geometamaker import MCF_SCHEMA
diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
new file mode 100644
index 0000000..cddc7a6
--- /dev/null
+++ b/src/geometamaker/models.py
@@ -0,0 +1,195 @@
+import dataclasses
+from dataclasses import dataclass, field
+import logging
+import os
+import pprint
+
+import frictionless
+import fsspec
+import yaml
+
+
+LOGGER = logging.getLogger(__name__)
+
+# https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml
+class _NoAliasDumper(yaml.SafeDumper):
+    """Keep the yaml human-readable by avoiding anchors and aliases."""
+
+    def ignore_aliases(self, data):
+        return True
+
+
+@dataclass
+class ContactSchema:
+    """Class for keeping track of contact info."""
+
+    email: str = ''
+    organization: str = ''
+    individualname: str = ''
+    positionname: str = ''
+
+
+@dataclass
+class FieldSchema:
+    """metadata for a field in a table."""
+
+    # https://datapackage.org/standard/table-schema/
+    name: str = ''
+    title: str = ''
+    type: str = ''
+    format: str = ''
+    example: any = None
+    description: str = ''
+    units: str = ''
+
+
+@dataclass
+class TableSchema:
+    """Class for metadata for tables."""
+
+    # https://datapackage.org/standard/table-schema/
+    fields: list = field(default_factory=FieldSchema)
+    missingValues: list = field(default_factory=list)
+    primaryKey: list = field(default_factory=list)
+    foreignKeys: list = field(default_factory=list)
+
+    # def get_field():
+
+
+@dataclass
+class BandSchema:
+    """Class for metadata for a raster band."""
+
+    index: int = 1
+    description: str = ''
+
+
+@dataclass
+class RasterSchema:
+    """Class for metadata for raster bands."""
+
+    bands: list = field(default_factory=BandSchema)
+
+
+@dataclass(kw_only=True)
+class Resource:
+    """Base class for metadata for a resource.
+
+    https://datapackage.org/standard/data-resource/
+    This class should be based on Data Package - Resource
+    specification. But we have some additional properties
+    that are important to us.
+    """
+
+    path: str = ''
+    type: str = ''
+    scheme: str = ''
+    encoding: str = ''
+    format: str = ''
+    mediatype: str = ''
+    bytes: int = 0
+    hash: str = ''
+    name: str = ''
+    title: str = ''
+    description: str = ''
+    sources: list = field(default_factory=list)
+    # schema: dict = field(init=False)
+    licenses: list = field(default_factory=list)
+    contact: ContactSchema = ContactSchema()
+
+    # def __post_init__(self):
+    #     self.schema = 
+
+
+@dataclass(kw_only=True)
+class TableResource(Resource):
+    """Class for metadata for a table resource."""
+
+    # without post-init, schema ends up as a dict, or whatever is passed in.
+    schema: TableSchema = field(default_factory=TableSchema)
+    # type: str = 'table'
+    
+    def __post_init__(self):
+        # Allow init of the resource with a schema of type
+        # TableSchema, or type dict. Mostly because dataclasses.replace
+        # calls init, but the base object will have already been initialized.
+        if isinstance(self.schema, TableSchema):
+            return
+        self.schema = TableSchema(**self.schema)
+
+
+class MetadataControl(object):
+
+    def __init__(self, source_dataset_path=None):
+        if source_dataset_path is not None:
+            self.datasource = source_dataset_path
+            self.data_package_path = f'{self.datasource}.dp.yml'
+
+        # Despite naming, this does not open a resource that must be closed
+        of = fsspec.open(self.datasource)
+        if not of.fs.exists(self.datasource):
+            raise FileNotFoundError(f'{self.datasource} does not exist')
+
+        # TODO: check the filetype here and create the appropriate instance
+        # this is nice for autodetect of field types, but sometimes
+        # we will know the table schema (invest MODEL_SPEC).
+        # Is there any benefit to passing in the known schema? Maybe not
+        # Can also just overwrite the schema attribute with known data after.
+        description = frictionless.describe(source_dataset_path).to_dict()
+        # schema = TableSchema(**description['schema'])
+        # del description['schema']
+        # resource = Resource(resource_dict)
+
+        # Load existing metadata file
+        try:
+            with fsspec.open(self.data_package_path, 'r') as file:
+                yaml_string = file.read()
+
+            # This validates the existing yaml against our dataclasses.
+            existing_resource = TableResource(**yaml.safe_load(yaml_string))
+            # overwrite properties that are intrinsic to the dataset,
+            # which is everything from `description` other than schema.
+            del description['schema']
+            self.metadata = dataclasses.replace(
+                existing_resource, **description)
+
+        # Common path: metadata file does not already exist
+        except FileNotFoundError as err:
+            self.metadata = TableResource(description)
+
+    def write(self, workspace=None):
+        """Write datapackage yaml to disk.
+
+        This creates sidecar files with '.yml'
+        appended to the full filename of the data source. For example,
+
+        - 'myraster.tif'
+        - 'myraster.tif.yml'
+
+        Args:
+            workspace (str): if ``None``, files write to the same location
+                as the source data. If not ``None``, a path to a local directory
+                to write files. They will still be named to match the source
+                filename. Use this option if the source data is not on the local
+                filesystem.
+
+        """
+        if workspace is None:
+            target_path = self.data_package_path
+        else:
+            target_path = os.path.join(
+                workspace, f'{os.path.basename(self.datasource)}.dp.yml')
+
+        with open(target_path, 'w') as file:
+            file.write(yaml.dump(
+                dataclasses.asdict(self.metadata), Dumper=_NoAliasDumper))
+
+
+if __name__ == "__main__":
+    # from natcap.invest import carbon
+    # arg_spec = carbon.MODEL_SPEC['args']['carbon_pools_path']
+
+    filepath = 'C:/Users/dmf/projects/geometamaker/data/carbon_pools.csv'
+    mc = MetadataControl(filepath)
+    pprint.pprint(dataclasses.asdict(mc.metadata))
+    # mc.write()

From 4753341bcb32249f0a425fe2ad4f021c2cd8ab21 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Wed, 17 Jul 2024 11:35:20 -0400
Subject: [PATCH 02/15] implemented a VectorResource class

---
 src/geometamaker/models.py | 115 ++++++++++++++++++++++++++++++++-----
 1 file changed, 102 insertions(+), 13 deletions(-)

diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index cddc7a6..de2a6ff 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -6,7 +6,9 @@
 
 import frictionless
 import fsspec
+import pygeoprocessing
 import yaml
+from osgeo import gdal
 
 
 LOGGER = logging.getLogger(__name__)
@@ -108,7 +110,7 @@ class TableResource(Resource):
     # without post-init, schema ends up as a dict, or whatever is passed in.
     schema: TableSchema = field(default_factory=TableSchema)
     # type: str = 'table'
-    
+
     def __post_init__(self):
         # Allow init of the resource with a schema of type
         # TableSchema, or type dict. Mostly because dataclasses.replace
@@ -118,27 +120,110 @@ def __post_init__(self):
         self.schema = TableSchema(**self.schema)
 
 
+@dataclass
+class BoundingBox():
+
+    xmin: float
+    ymin: float
+    xmax: float
+    ymax: float
+
+
+@dataclass
+class SpatialSchema():
+
+    bounding_box: BoundingBox
+    crs: str
+
+
+@dataclass(kw_only=True)
+class VectorResource(TableResource):
+    """Class for metadata for a vector resource."""
+
+    spatial: SpatialSchema
+
+
+@dataclass(kw_only=True)
+class RasterResource(Resource):
+    """Class for metadata for a raster resource."""
+
+    spatial: SpatialSchema
+
+
+def get_file_type(filepath):
+    # GDAL considers CSV a vector, so check against frictionless
+    # first
+    filetype = frictionless.describe(filepath).type
+    if filetype == 'table':
+        return filetype
+    gis_type = pygeoprocessing.get_gis_type(filepath)
+    if gis_type == pygeoprocessing.VECTOR_TYPE:
+        return 'vector'
+    if gis_type == pygeoprocessing.RASTER_TYPE:
+        return 'raster'
+    raise ValueError()
+
+
+def describe_vector(source_dataset_path):
+    description = frictionless.describe(source_dataset_path).to_dict()
+    fields = []
+    vector = gdal.OpenEx(source_dataset_path, gdal.OF_VECTOR)
+    layer = vector.GetLayer()
+    for fld in layer.schema:
+        fields.append(
+            FieldSchema(name=fld.name, type=fld.type))
+    vector = layer = None
+    description['schema'] = TableSchema(fields=fields)
+
+    info = pygeoprocessing.get_vector_info(source_dataset_path)
+    spatial = {
+        'bounding_box': info['bounding_box'],
+        'crs': info['projection_wkt']
+    }
+    description['spatial'] = SpatialSchema(**spatial)
+    description['sources'] = info['file_list']
+    return description
+
+
+def describe_raster(source_dataset_path):
+    pass
+
+
+def describe_table(source_dataset_path):
+    return frictionless.describe(source_dataset_path).to_dict()
+
+
+DESRCIBE_FUNCS = {
+    'table': describe_table,
+    'vector': describe_vector,
+    'raster': describe_raster
+}
+
+RESOURCE_MODELS = {
+    'table': TableResource,
+    'vector': VectorResource,
+    'raster': RasterResource
+}
+
+
 class MetadataControl(object):
 
-    def __init__(self, source_dataset_path=None):
-        if source_dataset_path is not None:
-            self.datasource = source_dataset_path
-            self.data_package_path = f'{self.datasource}.dp.yml'
+    def __init__(self, source_dataset_path):
+        # if source_dataset_path is not None:
+        self.datasource = source_dataset_path
+        self.data_package_path = f'{self.datasource}.dp.yml'
 
         # Despite naming, this does not open a resource that must be closed
         of = fsspec.open(self.datasource)
         if not of.fs.exists(self.datasource):
             raise FileNotFoundError(f'{self.datasource} does not exist')
 
-        # TODO: check the filetype here and create the appropriate instance
+        resource_type = get_file_type(source_dataset_path)
+        description = DESRCIBE_FUNCS[resource_type](source_dataset_path)
         # this is nice for autodetect of field types, but sometimes
         # we will know the table schema (invest MODEL_SPEC).
         # Is there any benefit to passing in the known schema? Maybe not
         # Can also just overwrite the schema attribute with known data after.
-        description = frictionless.describe(source_dataset_path).to_dict()
-        # schema = TableSchema(**description['schema'])
-        # del description['schema']
-        # resource = Resource(resource_dict)
 
         # Load existing metadata file
         try:
@@ -146,16 +231,19 @@ def __init__(self, source_dataset_path=None):
                 yaml_string = file.read()
 
             # This validates the existing yaml against our dataclasses.
-            existing_resource = TableResource(**yaml.safe_load(yaml_string))
+            existing_resource = RESOURCE_MODELS[resource_type](
+                **yaml.safe_load(yaml_string))
             # overwrite properties that are intrinsic to the dataset,
             # which is everything from `description` other than schema.
+            # Some parts of schema are intrinsic, but others are human-input
+            # so replace the whole thing for now.
             del description['schema']
             self.metadata = dataclasses.replace(
                 existing_resource, **description)
 
         # Common path: metadata file does not already exist
         except FileNotFoundError as err:
-            self.metadata = TableResource(description)
+            self.metadata = RESOURCE_MODELS[resource_type](**description)
 
     def write(self, workspace=None):
         """Write datapackage yaml to disk.
@@ -189,7 +277,8 @@ def write(self, workspace=None):
     # from natcap.invest import carbon
     # arg_spec = carbon.MODEL_SPEC['args']['carbon_pools_path']
 
-    filepath = 'C:/Users/dmf/projects/geometamaker/data/carbon_pools.csv'
+    # filepath = 'C:/Users/dmf/projects/geometamaker/data/carbon_pools.csv'
+    filepath = 'C:/Users/dmf/projects/geometamaker/data/watershed_gura.shp'
     mc = MetadataControl(filepath)
     pprint.pprint(dataclasses.asdict(mc.metadata))
     # mc.write()

From 0158a01cb757ba07028de5d206b958ebfad99346 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Wed, 17 Jul 2024 12:51:10 -0400
Subject: [PATCH 03/15] implemented a RasterResource

---
 src/geometamaker/models.py | 81 ++++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 21 deletions(-)

diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index de2a6ff..7ed8978 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -21,6 +21,22 @@ def ignore_aliases(self, data):
         return True
 
 
+@dataclass
+class BoundingBox():
+
+    xmin: float
+    ymin: float
+    xmax: float
+    ymax: float
+
+
+@dataclass
+class SpatialSchema():
+
+    bounding_box: BoundingBox
+    crs: str
+
+
 @dataclass
 class ContactSchema:
     """Class for keeping track of contact info."""
@@ -62,7 +78,10 @@ class TableSchema:
 class BandSchema:
     """Class for metadata for a raster band."""
 
-    index: int = 1
+    index: int
+    gdal_type: int
+    numpy_type: str
+    nodata: int | float
     description: str = ''
 
 
@@ -70,7 +89,9 @@ class BandSchema:
 class RasterSchema:
     """Class for metadata for raster bands."""
 
-    bands: list = field(default_factory=BandSchema)
+    bands: list
+    pixel_size: list
+    raster_size: list
 
 
 @dataclass(kw_only=True)
@@ -120,22 +141,6 @@ def __post_init__(self):
         self.schema = TableSchema(**self.schema)
 
 
-@dataclass
-class BoundingBox():
-
-    xmin: float
-    ymin: float
-    xmax: float
-    ymax: float
-
-
-@dataclass
-class SpatialSchema():
-
-    bounding_box: BoundingBox
-    crs: str
-
-
 @dataclass(kw_only=True)
 class VectorResource(TableResource):
     """Class for metadata for a vector resource."""
@@ -147,12 +152,24 @@ class VectorResource(TableResource):
 class RasterResource(Resource):
     """Class for metadata for a raster resource."""
 
+    schema: RasterSchema
     spatial: SpatialSchema
 
+    def __post_init__(self):
+        # Allow init of the resource with a schema of type
+        # RasterSchema, or type dict. Mostly because dataclasses.replace
+        # calls init, but the base object will have already been initialized.
+        if isinstance(self.schema, RasterSchema):
+            return
+        self.schema = RasterSchema(**self.schema)
+
 
 def get_file_type(filepath):
+    # TODO: guard against classifying netCDF, HDF5, etc as GDAL rasters,
+    # we'll want a different data model for multi-dimensional arrays.
+
     # GDAL considers CSV a vector, so check against frictionless
-    # first
+    # first.
     filetype = frictionless.describe(filepath).type
     if filetype == 'table':
         return filetype
@@ -186,7 +203,28 @@ def describe_vector(source_dataset_path):
 
 
 def describe_raster(source_dataset_path):
-    pass
+    description = frictionless.describe(source_dataset_path).to_dict()
+
+    bands = []
+    info = pygeoprocessing.get_raster_info(source_dataset_path)
+    for i in range(info['n_bands']):
+        b = i + 1
+        # band = raster.GetRasterBand(b)
+        # datatype = 'integer' if band.DataType < 6 else 'number'
+        bands.append(BandSchema(
+            index=b,
+            gdal_type=info['datatype'],
+            numpy_type=info['numpy_type'],
+            nodata=info['nodata'][i]))
+    description['schema'] = RasterSchema(
+        bands=bands,
+        pixel_size=info['pixel_size'],
+        raster_size=info['raster_size'])
+    description['spatial'] = SpatialSchema(
+        bounding_box=info['bounding_box'],
+        crs=info['projection_wkt'])
+    description['sources'] = info['file_list']
+    return description
 
 
 def describe_table(source_dataset_path):
@@ -278,7 +316,8 @@ def write(self, workspace=None):
     # arg_spec = carbon.MODEL_SPEC['args']['carbon_pools_path']
 
     # filepath = 'C:/Users/dmf/projects/geometamaker/data/carbon_pools.csv'
-    filepath = 'C:/Users/dmf/projects/geometamaker/data/watershed_gura.shp'
+    # filepath = 'C:/Users/dmf/projects/geometamaker/data/watershed_gura.shp'
+    filepath = 'C:/Users/dmf/projects/geometamaker/data/DEM_gura.tif'
     mc = MetadataControl(filepath)
     pprint.pprint(dataclasses.asdict(mc.metadata))
     # mc.write()

From 0f37522abc62677a9020fc97022aa808adfc9981 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Wed, 17 Jul 2024 16:01:54 -0400
Subject: [PATCH 04/15] use frictionless to get file stats

---
 src/geometamaker/models.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index 7ed8978..f4cbf27 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -182,7 +182,8 @@ def get_file_type(filepath):
 
 
 def describe_vector(source_dataset_path):
-    description = frictionless.describe(source_dataset_path).to_dict()
+    description = frictionless.describe(
+        source_dataset_path, stats=True).to_dict()
     fields = []
     vector = gdal.OpenEx(source_dataset_path, gdal.OF_VECTOR)
     layer = vector.GetLayer()
@@ -203,7 +204,8 @@ def describe_vector(source_dataset_path):
 
 
 def describe_raster(source_dataset_path):
-    description = frictionless.describe(source_dataset_path).to_dict()
+    description = frictionless.describe(
+        source_dataset_path, stats=True).to_dict()
 
     bands = []
     info = pygeoprocessing.get_raster_info(source_dataset_path)
@@ -228,7 +230,9 @@ def describe_raster(source_dataset_path):
 
 
 def describe_table(source_dataset_path):
-    return frictionless.describe(source_dataset_path).to_dict()
+    # frictionless.describe works
+    return frictionless.describe(
+        source_dataset_path, stats=True).to_dict()
 
 
 DESRCIBE_FUNCS = {

From 242c93153ce687703c86584dd45152bd37cc2792 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Wed, 17 Jul 2024 16:57:38 -0400
Subject: [PATCH 05/15] integrating models with existing MetadataControl class

---
 src/geometamaker/__init__.py     |   1 -
 src/geometamaker/geometamaker.py | 584 ++++++++++---------------------
 src/geometamaker/models.py       | 197 ++---------
 3 files changed, 207 insertions(+), 575 deletions(-)

diff --git a/src/geometamaker/__init__.py b/src/geometamaker/__init__.py
index 3b8ae6c..9f56a76 100644
--- a/src/geometamaker/__init__.py
+++ b/src/geometamaker/__init__.py
@@ -1,2 +1 @@
 from .geometamaker import MetadataControl
-from .geometamaker import MCF_SCHEMA
diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py
index 99b45fa..695a40b 100644
--- a/src/geometamaker/geometamaker.py
+++ b/src/geometamaker/geometamaker.py
@@ -1,19 +1,19 @@
+import dataclasses
 import logging
 import os
 import uuid
 from datetime import datetime
 
+import frictionless
 import fsspec
-import jsonschema
-from jsonschema.exceptions import ValidationError
-import pygeometa.core
-from pygeometa.schemas import load_schema
 import pygeoprocessing
 from osgeo import gdal
 from osgeo import ogr
 from osgeo import osr
 import yaml
 
+from . import models
+
 
 # https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml
 class _NoAliasDumper(yaml.SafeDumper):
@@ -25,151 +25,92 @@ def ignore_aliases(self, data):
 
 LOGGER = logging.getLogger(__name__)
 
-MCF_SCHEMA_FILE = os.path.join(
-    pygeometa.core.SCHEMAS, 'mcf', 'core.yaml')
-with open(MCF_SCHEMA_FILE, 'r') as schema_file:
-    MCF_SCHEMA = pygeometa.core.yaml_load(schema_file)
-
-# modify the core MCF schema so that our default
-# template MCFs have all the properties we expect
-# users to use.
-MCF_SCHEMA['required'].append('content_info')
-MCF_SCHEMA['required'].append('dataquality')
-MCF_SCHEMA['properties']['identification']['properties'][
-    'citation'] = {
-        'type': 'string',
-        'description': 'a biobliographic citation for the dataset'
-    }
-MCF_SCHEMA['properties']['identification']['required'].append('citation')
-MCF_SCHEMA['properties']['identification']['properties'][
-    'keywords']['patternProperties']['^.*'][
-    'required'] = ['keywords', 'keywords_type']
+# MCF_SCHEMA['properties']['identification']['properties'][
+#     'keywords']['patternProperties']['^.*'][
+#     'required'] = ['keywords', 'keywords_type']
 # to accomodate tables that do not represent spatial content:
-NO_GEOM_TYPE = 'none'
-MCF_SCHEMA['properties']['spatial']['properties'][
-    'geomtype']['enum'].append(NO_GEOM_TYPE)
-TABLE_CONTENT_TYPE = 'table'
-MCF_SCHEMA['properties']['content_info']['properties'][
-    'type']['enum'].append(TABLE_CONTENT_TYPE)
-
-OGR_MCF_ATTR_TYPE_MAP = {
-    ogr.OFTInteger: 'integer',
-    ogr.OFTInteger64: 'integer',
-    ogr.OFTReal: 'number',
-    ogr.OFTString: 'string'
-}
-
-
-def _get_default(item):
-    """Return a default value for a property.
-
-    Args:
-        item (dict): a jsonschema definition of a property with no children.
 
-    Return:
-        a value from DEFAULT_VALUES
-
-    Raises:
-        KeyError if ``item`` does not include an
-        'enum', 'type', or '$ref' property.
-
-    """
-    # TODO: read types from the #/definitions found in MCF_SCHEMA
-    # instead of hardcoding values here
-    # TODO: support i18n properly by using objects
-    # keyed by country codes to contain the array of strings
-    default_values = {
-        'string': str(),
-        'int': int(),
-        'integer': int(),
-        'number': float(),
-        'boolean': False,
-        '#/definitions/date_or_datetime_string': str(),
-        '#/definitions/i18n_string': str(),
-        '#/definitions/i18n_array': list(),
-        '#/definitions/any_type': str(),
+def get_file_type(filepath):
+    # TODO: guard against classifying netCDF, HDF5, etc as GDAL rasters,
+    # we'll want a different data model for multi-dimensional arrays.
+
+    # GDAL considers CSV a vector, so check against frictionless
+    # first.
+    filetype = frictionless.describe(filepath).type
+    if filetype == 'table':
+        return filetype
+    gis_type = pygeoprocessing.get_gis_type(filepath)
+    if gis_type == pygeoprocessing.VECTOR_TYPE:
+        return 'vector'
+    if gis_type == pygeoprocessing.RASTER_TYPE:
+        return 'raster'
+    raise ValueError()
+
+
+def describe_vector(source_dataset_path):
+    description = frictionless.describe(
+        source_dataset_path, stats=True).to_dict()
+    fields = []
+    vector = gdal.OpenEx(source_dataset_path, gdal.OF_VECTOR)
+    layer = vector.GetLayer()
+    for fld in layer.schema:
+        fields.append(
+            models.FieldSchema(name=fld.name, type=fld.type))
+    vector = layer = None
+    description['schema'] = models.TableSchema(fields=fields)
+
+    info = pygeoprocessing.get_vector_info(source_dataset_path)
+    spatial = {
+        'bounding_box': info['bounding_box'],
+        'crs': info['projection_wkt']
     }
+    description['spatial'] = models.SpatialSchema(**spatial)
+    description['sources'] = info['file_list']
+    return description
+
+
+def describe_raster(source_dataset_path):
+    description = frictionless.describe(
+        source_dataset_path, stats=True).to_dict()
+
+    bands = []
+    info = pygeoprocessing.get_raster_info(source_dataset_path)
+    for i in range(info['n_bands']):
+        b = i + 1
+        # band = raster.GetRasterBand(b)
+        # datatype = 'integer' if band.DataType < 6 else 'number'
+        bands.append(models.BandSchema(
+            index=b,
+            gdal_type=info['datatype'],
+            numpy_type=info['numpy_type'],
+            nodata=info['nodata'][i]))
+    description['schema'] = models.RasterSchema(
+        bands=bands,
+        pixel_size=info['pixel_size'],
+        raster_size=info['raster_size'])
+    description['spatial'] = models.SpatialSchema(
+        bounding_box=info['bounding_box'],
+        crs=info['projection_wkt'])
+    description['sources'] = info['file_list']
+    return description
+
+
+def describe_table(source_dataset_path):
+    return frictionless.describe(
+        source_dataset_path, stats=True).to_dict()
+
+
+DESRCIBE_FUNCS = {
+    'table': describe_table,
+    'vector': describe_vector,
+    'raster': describe_raster
+}
 
-    # If there are enumerated values which must be used
-    try:
-        fixed_values = item['enum']
-        # TODO: find a better way to choose the default
-        return fixed_values[0]
-    except KeyError:
-        pass
-
-    # If no enumerated values, get a default value based on type
-    try:
-        t = item['type']
-    except KeyError:
-        # When 'type' is missing, a $ref to another schema is present
-        try:
-            t = item['$ref']
-        except KeyError:
-            raise KeyError(
-                f'schema has no type and no reference to a type definition\n'
-                f'{item}')
-
-    return default_values[t]
-
-
-def _get_template(schema):
-    """Create a minimal dictionary that is valid against ``schema``.
-
-    The dict will ontain only the 'required' properties.
-
-    Args:
-        schema (dict): a jsonschema definition.
-
-    Return:
-        dict that is valid against ``schema``
-
-    Raises:
-        KeyError if a penultimate property in a schema branch
-        does not include an 'enum', 'type', or '$ref' property.
-
-    """
-    template = {}
-    if 'type' in schema and schema['type'] == 'object':
-        for prop, sch in schema['properties'].items():
-            if 'required' in schema and prop not in schema['required']:
-                continue
-            if 'patternProperties' in sch:
-                # this item's properties can have any name matching the pattern.
-                # assign the name 'default' and overwite the current schema
-                # with a new one that explicitly includes the 'default' property.
-                example_sch = {
-                    'type': 'object',
-                    'required': ['default'],
-                    'properties': {
-                        'default': sch['patternProperties']['^.*']
-                    }
-                }
-                sch = example_sch
-
-            if 'properties' in sch and 'anyOf' in sch['properties']:
-                # if 'anyOf' is a property, then we effectively want to
-                # treat the children of 'anyOf' as the properties instead.
-                template[prop] = {
-                    p: _get_template(s)
-                    for p, s in sch['properties']['anyOf'].items()
-                }
-            else:
-                template[prop] = _get_template(sch)
-        return template
-
-    elif 'type' in schema and schema['type'] == 'array':
-        if 'properties' in schema:
-            # for the weird case where identification.extents.spatial
-            # is type: array but contains 'properties' instead of 'items'
-            return [{
-                p: _get_template(s)
-                for p, s in schema['properties'].items()
-                if p in schema['required']
-            }]
-        return [_get_template(schema['items'])]
-    else:
-        return _get_default(schema)
+RESOURCE_MODELS = {
+    'table': models.TableResource,
+    'vector': models.VectorResource,
+    'raster': models.RasterResource
+}
 
 
 class MetadataControl(object):
@@ -200,51 +141,42 @@ def __init__(self, source_dataset_path=None):
                 metadata applies
 
         """
-        self.mcf = None
-        if source_dataset_path is not None:
-            self.datasource = source_dataset_path
-            self.mcf_path = f'{self.datasource}.yml'
-
-            # Despite naming, this does not open a resource that must be closed
-            of = fsspec.open(self.datasource)
-            if not of.fs.exists(self.datasource):
-                raise FileNotFoundError(f'{self.datasource} does not exist')
-
-            try:
-                with fsspec.open(self.mcf_path, 'r') as file:
-                    yaml_string = file.read()
-
-                # pygeometa.core.read_mcf can parse nested MCF documents,
-                # where one MCF refers to another
-                self.mcf = pygeometa.core.read_mcf(yaml_string)
-                LOGGER.info(f'loaded existing metadata from {self.mcf_path}')
-                self.validate()
-
-            # Common path: MCF often does not already exist
-            except FileNotFoundError as err:
-                LOGGER.debug(err)
-
-            # Uncommon path: MCF already exists but cannot be used
-            except (pygeometa.core.MCFReadError,
-                    ValidationError, AttributeError) as err:
-                # AttributeError in read_mcf not caught by pygeometa
-                LOGGER.warning(err)
-                self.mcf = None
-
-            if self.mcf is None:
-                self.mcf = _get_template(MCF_SCHEMA)
-                self.mcf['metadata']['identifier'] = str(uuid.uuid4())
-
-            # fill all values that can be derived from the dataset
-            LOGGER.debug(f'getting properties from {source_dataset_path}')
-            self._set_spatial_info()
 
-        else:
-            self.mcf = _get_template(MCF_SCHEMA)
+        # if source_dataset_path is not None:
+        self.datasource = source_dataset_path
+        self.data_package_path = f'{self.datasource}.yml'
+
+        # Despite naming, this does not open a resource that must be closed
+        of = fsspec.open(self.datasource)
+        if not of.fs.exists(self.datasource):
+            raise FileNotFoundError(f'{self.datasource} does not exist')
 
-        self.mcf['mcf']['version'] = \
-            MCF_SCHEMA['properties']['mcf'][
-                'properties']['version']['const']
+        resource_type = get_file_type(source_dataset_path)
+        description = DESRCIBE_FUNCS[resource_type](source_dataset_path)
+        # this is nice for autodetect of field types, but sometimes
+        # we will know the table schema (invest MODEL_SPEC).
+        # Is there any benefit to passing in the known schema? Maybe not
+        # Can also just overwrite the schema attribute with known data after.
+
+        # Load existing metadata file
+        try:
+            with fsspec.open(self.data_package_path, 'r') as file:
+                yaml_string = file.read()
+
+            # This validates the existing yaml against our dataclasses.
+            existing_resource = RESOURCE_MODELS[resource_type](
+                **yaml.safe_load(yaml_string))
+            # overwrite properties that are intrinsic to the dataset,
+            # which is everything from `description` other than schema.
+            # Some parts of schema are intrinsic, but others are human-input
+            # so replace the whole thing for now.
+            del description['schema']
+            self.metadata = dataclasses.replace(
+                existing_resource, **description)
+
+        # Common path: metadata file does not already exist
+        except FileNotFoundError as err:
+            self.metadata = RESOURCE_MODELS[resource_type](**description)
 
     def set_title(self, title):
         """Add a title for the dataset.
@@ -253,24 +185,24 @@ def set_title(self, title):
             title (str)
 
         """
-        self.mcf['identification']['title'] = title
+        self.metadata.title = title
 
     def get_title(self):
         """Get the title for the dataset."""
-        return self.mcf['identification']['title']
+        return self.metadata.title
 
-    def set_abstract(self, abstract):
-        """Add an abstract for the dataset.
+    def set_description(self, description):
+        """Add an description for the dataset.
 
         Args:
-            abstract (str)
+            description (str)
 
         """
-        self.mcf['identification']['abstract'] = abstract
+        self.metadata.description = description
 
-    def get_abstract(self):
-        """Get the abstract for the dataset."""
-        return self.mcf['identification']['abstract']
+    def get_description(self):
+        """Get the description for the dataset."""
+        return self.metadata.description
 
     def set_citation(self, citation):
         """Add a citation string for the dataset.
@@ -279,53 +211,41 @@ def set_citation(self, citation):
             citation (str)
 
         """
-        self.mcf['identification']['citation'] = citation
+        self.metadata.citation = citation
 
     def get_citation(self):
         """Get the citation for the dataset."""
-        return self.mcf['identification']['citation']
+        return self.metadata.citation
 
-    def set_contact(self, organization=None, individualname=None, positionname=None,
-                    email=None, section='default', **kwargs):
+    def set_contact(self, organization=None, individual_name=None,
+                    position_name=None, email=None):
         """Add a contact section.
 
         Args:
             organization (str): name of the responsible organization
-            individualname (str): name of the responsible person
-            positionname (str): role or position of the responsible person
-            email (str): email address of the responsible organization or individual
-            section (str): a header for the contact section under which to
-                apply the other args, since there can be more than one.
-            kwargs (dict): key-value pairs for any other properties listed in
-                the contact section of the core MCF schema.
+            individual_name (str): name of the responsible person
+            position_name (str): role or position of the responsible person
+            email (str): address of the responsible organization or individual
 
         """
 
         if organization:
-            self.mcf['contact'][section]['organization'] = organization
-        if individualname:
-            self.mcf['contact'][section]['individualname'] = individualname
-        if positionname:
-            self.mcf['contact'][section]['positionname'] = positionname
+            self.metadata.contact.organization = organization
+        if individual_name:
+            self.metadata.contact.individualname = individual_name
+        if position_name:
+            self.metadata.contact.positionname = position_name
         if email:
-            self.mcf['contact'][section]['email'] = email
-        if kwargs:
-            for k, v in kwargs.items():
-                self.mcf['contact'][section][k] = v
-
-        self.validate()
+            self.metadata.contact.email = email
 
-    def get_contact(self, section='default'):
+    def get_contact(self):
         """Get metadata from a contact section.
 
-        Args:
-            section (str): a header for the contact section under which to
-                    apply the other args, since there can be more than one.
         Returns:
-            A dict or ``None`` if ``section`` does not exist.
+            ContactSchema
 
         """
-        return self.mcf['contact'].get(section)
+        return self.metadata.contact
 
     def set_doi(self, doi):
         """Add a doi string for the dataset.
@@ -334,11 +254,11 @@ def set_doi(self, doi):
             doi (str)
 
         """
-        self.mcf['identification']['doi'] = doi
+        self.metadata.doi = doi
 
     def get_doi(self):
         """Get the doi for the dataset."""
-        return self.mcf['identification']['doi']
+        return self.metadata.doi
 
     def set_edition(self, edition):
         """Set the edition for the dataset.
@@ -347,8 +267,7 @@ def set_edition(self, edition):
             edition (str): version of the cited resource
 
         """
-        self.mcf['identification']['edition'] = edition
-        self.validate()
+        self.metadata.edition = edition
 
     def get_edition(self):
         """Get the edition of the dataset.
@@ -357,7 +276,7 @@ def get_edition(self):
             str or ``None`` if ``edition`` does not exist.
 
         """
-        return self.mcf['identification'].get('edition')
+        return self.metadata.edition
 
     def set_keywords(self, keywords, section='default', keywords_type='theme',
                      vocabulary=None):
@@ -393,58 +312,56 @@ def set_keywords(self, keywords, section='default', keywords_type='theme',
     def get_keywords(self, section='default'):
         return self.mcf['identification']['keywords'][section]
 
-    def set_license(self, name=None, url=None):
+    def set_license(self, title=None, path=None):
         """Add a license for the dataset.
 
-        Either or both name and url are required if there is a license.
+        Either or both title and path are required if there is a license.
         Call with no arguments to remove access constraints and license
         info.
 
         Args:
-            name (str): name of the license of the source dataset
-            url (str): url for the license
+            title (str): human-readable title of the license
+            path (str): url for the license
 
         """
-        # MCF spec says use 'otherRestrictions' to mean no restrictions
-        constraints = 'otherRestrictions'
-        if name or url:
-            constraints = 'license'
-
         license_dict = {}
-        license_dict['name'] = name if name else ''
-        license_dict['url'] = url if url else ''
-        self.mcf['identification']['license'] = license_dict
-        self.mcf['identification']['accessconstraints'] = constraints
-        self.validate()
+        license_dict['title'] = title if title else ''
+        license_dict['path'] = path if path else ''
+
+        # TODO: DataPackage/Resource allows for a list of licenses.
+        # So far we only support one license per resource.
+        self.licenses = [models.License(**license_dict)]
 
     def get_license(self):
         """Get ``license`` for the dataset.
 
         Returns:
-            dict or ``None`` if ``license`` does not exist.
+            models.License
 
         """
-        return self.mcf['identification'].get('license')
+        # TODO: DataPackage/Resource allows for a list of licenses.
+        # So far we only support one license per resource.
+        if self.licenses:
+            return self.licenses[0]
 
     def set_lineage(self, statement):
         """Set the lineage statement for the dataset.
 
         Args:
-            statement (str): general explanation describing the lineage or provenance
-                of the dataset
+            statement (str): general explanation describing the lineage or
+                provenance of the dataset
 
         """
-        self.mcf['dataquality']['lineage']['statement'] = statement
-        self.validate()
+        self.metadata.lineage = statement
 
     def get_lineage(self):
         """Get the lineage statement of the dataset.
 
         Returns:
-            str or ``None`` if ``lineage`` does not exist.
+            str
 
         """
-        return self.mcf['dataquality']['lineage'].get('statement')
+        return self.metadata.lineage
 
     def set_purpose(self, purpose):
         """Add a purpose for the dataset.
@@ -453,21 +370,16 @@ def set_purpose(self, purpose):
             purpose (str): description of the purpose of the source dataset
 
         """
-        # 'Purpose' is not supported in the core MCF spec, probably because
-        # `<gmd:purpose>` was added to ISO-19115 in 2014, and MCF still only
-        # supports 2015. For now, we can add `purpose` in `identification`.
-        # Later we can move it elsewhere if it becomes formally supported.
-        self.mcf['identification']['purpose'] = purpose
-        self.validate()
+        self.metadata.purpose = purpose
 
     def get_purpose(self):
         """Get ``purpose`` for the dataset.
 
         Returns:
-            str or ``None`` if ``purpose`` does not exist.
+            str
 
         """
-        return self.mcf['identification'].get('purpose')
+        return self.metadata.purpose
 
     def set_url(self, url):
         """Add a url for the dataset.
@@ -476,11 +388,11 @@ def set_url(self, url):
             url (str)
 
         """
-        self.mcf['identification']['url'] = url
+        self.metadata.url = url
 
     def get_url(self):
         """Get the url for the dataset."""
-        return self.mcf['identification']['url']
+        return self.metadata.url
 
     def set_band_description(self, band_number, name=None, title=None,
                              abstract=None, units=None, type=None):
@@ -582,19 +494,14 @@ def get_field_description(self, name):
         idx, attribute = self._get_attr(name)
         return attribute
 
-    def _write_mcf(self, target_path):
-        with open(target_path, 'w') as file:
-            file.write(yaml.dump(self.mcf, Dumper=_NoAliasDumper))
-
     def write(self, workspace=None):
-        """Write MCF and ISO-19139 XML to disk.
+        """Write datapackage yaml to disk.
 
-        This creates sidecar files with '.yml' and '.xml' extensions
+        This creates sidecar files with '.yml'
         appended to the full filename of the data source. For example,
 
         - 'myraster.tif'
         - 'myraster.tif.yml'
-        - 'myraster.tif.xml'
 
         Args:
             workspace (str): if ``None``, files write to the same location
@@ -605,141 +512,14 @@ def write(self, workspace=None):
 
         """
         if workspace is None:
-            target_mcf_path = self.mcf_path
-            target_xml_path = f'{self.datasource}.xml'
+            target_path = self.data_package_path
         else:
-            target_mcf_path = os.path.join(
+            target_path = os.path.join(
                 workspace, f'{os.path.basename(self.datasource)}.yml')
-            target_xml_path = os.path.join(
-                workspace, f'{os.path.basename(self.datasource)}.xml')
-
-        self.mcf['metadata']['datestamp'] = datetime.utcnow().strftime(
-                '%Y-%m-%d')
-        self._write_mcf(target_mcf_path)
-
-        schema_obj = load_schema('iso19139')
-        xml_string = schema_obj.write(self.mcf)
-        with open(target_xml_path, 'w') as xmlfile:
-            xmlfile.write(xml_string)
-
-    def validate(self):
-        """Validate MCF against a jsonschema object."""
-        # validate against our own schema, which could
-        # be a superset of the core MCF schema.
-        # If we wanted to validate against core MCF,
-        # we could use pygeometa.core.validate_mcf
-        jsonschema.validate(self.mcf, MCF_SCHEMA)
+
+        with open(target_path, 'w') as file:
+            file.write(yaml.dump(
+                dataclasses.asdict(self.metadata), Dumper=_NoAliasDumper))
 
     def to_string(self):
         pass
-
-    def _set_spatial_info(self):
-        """Populate the MCF using spatial properties of the dataset."""
-        gis_type = pygeoprocessing.get_gis_type(self.datasource)
-        self.mcf['metadata']['hierarchylevel'] = 'dataset'
-
-        if gis_type == pygeoprocessing.VECTOR_TYPE:
-            LOGGER.debug('opening as GDAL vector')
-            self.mcf['content_info']['type'] = 'coverage'
-            self.mcf['spatial']['datatype'] = 'vector'
-            open_options = []
-
-            if os.path.splitext(self.datasource)[1] == '.csv':
-                self.mcf['spatial']['datatype'] = 'textTable'
-                open_options.append('AUTODETECT_TYPE=YES')
-
-            vector = gdal.OpenEx(self.datasource, gdal.OF_VECTOR,
-                                 open_options=open_options)
-            layer = vector.GetLayer()
-            layer_defn = layer.GetLayerDefn()
-            geomname = ogr.GeometryTypeToName(layer_defn.GetGeomType())
-            geomtype = NO_GEOM_TYPE
-            # https://www.fgdc.gov/nap/metadata/register/codelists.html
-            if 'Point' in geomname:
-                geomtype = 'point'
-            if 'Polygon' in geomname:
-                geomtype = 'surface'
-            if 'Line' in geomname:
-                geomtype = 'curve'
-            if 'Collection' in geomname:
-                geomtype = 'complex'
-            self.mcf['spatial']['geomtype'] = geomtype
-
-            if len(layer.schema) and 'attributes' not in self.mcf['content_info']:
-                self.mcf['content_info']['attributes'] = []
-
-            for field in layer.schema:
-                try:
-                    idx, attribute = self._get_attr(field.name)
-                except KeyError:
-                    attribute = _get_template(
-                        MCF_SCHEMA['properties']['content_info']['properties'][
-                            'attributes'])[0]
-                    attribute['name'] = field.name
-                    self.mcf['content_info']['attributes'].append(
-                        attribute)
-
-                try:
-                    datatype = OGR_MCF_ATTR_TYPE_MAP[field.type]
-                except KeyError:
-                    LOGGER.warning(
-                        f'{field.type} is missing in the OGR-to-MCF '
-                        f'attribute type map; attribute type for field '
-                        f'{field.name} will be "object".')
-                    datatype = 'object'
-                self.set_field_description(field.name, type=datatype)
-
-            vector = None
-            layer = None
-
-            gis_info = pygeoprocessing.get_vector_info(self.datasource)
-
-        if gis_type == pygeoprocessing.RASTER_TYPE:
-            LOGGER.debug('opening as GDAL raster')
-            self.mcf['spatial']['datatype'] = 'grid'
-            self.mcf['spatial']['geomtype'] = 'surface'
-            self.mcf['content_info']['type'] = 'image'
-
-            raster = gdal.OpenEx(self.datasource, gdal.OF_RASTER)
-
-            attr = _get_template(
-                    MCF_SCHEMA['properties']['content_info']['properties'][
-                        'attributes'])[0]
-
-            if 'attributes' not in self.mcf['content_info']:
-                self.mcf['content_info']['attributes'] = [attr]*raster.RasterCount
-            else:
-                n_attrs = len(self.mcf['content_info']['attributes'])
-                if n_attrs < raster.RasterCount:
-                    extend_n = raster.RasterCount - n_attrs
-                    self.mcf['content_info']['attributes'].extend(
-                        [attr]*extend_n)
-
-            for i in range(raster.RasterCount):
-                b = i + 1
-                band = raster.GetRasterBand(b)
-                datatype = 'integer' if band.DataType < 6 else 'number'
-                self.set_band_description(b, type=datatype)
-            band = None
-            raster = None
-
-            gis_info = pygeoprocessing.get_raster_info(self.datasource)
-
-        if gis_info['projection_wkt']:
-            try:
-                srs = osr.SpatialReference()
-                srs.ImportFromWkt(gis_info['projection_wkt'])
-                epsg = srs.GetAttrValue('AUTHORITY', 1)
-            except TypeError:
-                LOGGER.warning(
-                    f'could not import a spatial reference system from '
-                    f'"projection_wkt" in {gis_info}')
-                epsg = ''
-            # for human-readable values after yaml dump, use python types
-            # instead of numpy types
-            bbox = [float(x) for x in gis_info['bounding_box']]
-            spatial_info = [{
-                'bbox': bbox,
-                'crs': epsg  # MCF does not support WKT here
-            }]
-            self.mcf['identification']['extents']['spatial'] = spatial_info
diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index f4cbf27..23a0039 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -1,14 +1,8 @@
-import dataclasses
 from dataclasses import dataclass, field
 import logging
-import os
 import pprint
 
-import frictionless
-import fsspec
-import pygeoprocessing
 import yaml
-from osgeo import gdal
 
 
 LOGGER = logging.getLogger(__name__)
@@ -43,8 +37,21 @@ class ContactSchema:
 
     email: str = ''
     organization: str = ''
-    individualname: str = ''
-    positionname: str = ''
+    individual_name: str = ''
+    position_name: str = ''
+
+
+@dataclass
+class License:
+    """Class for storing license info."""
+
+    # https://datapackage.org/profiles/2.0/dataresource.json
+    # This profile also includes `name`, described as:
+    # "MUST be an Open Definition license identifier",
+    # see http://licenses.opendefinition.org/"
+    # I don't think that's useful to us yet.
+    path: str
+    title: str
 
 
 @dataclass
@@ -104,6 +111,10 @@ class Resource:
     that are important to us.
     """
 
+    # TODO: DP includes `sources` as list of source files
+    # with some amount of metadata for each item. For our
+    # use-case, I think a list of filenames is good enough.
+
     path: str = ''
     type: str = ''
     scheme: str = ''
@@ -116,8 +127,13 @@ class Resource:
     title: str = ''
     description: str = ''
     sources: list = field(default_factory=list)
-    # schema: dict = field(init=False)
     licenses: list = field(default_factory=list)
+    citation: str = ''
+    doi: str = ''
+    url: str = ''
+    edition: str = ''
+    lineage: str = ''
+    purpose: str = ''
     contact: ContactSchema = ContactSchema()
 
     # def __post_init__(self):
@@ -162,166 +178,3 @@ def __post_init__(self):
         if isinstance(self.schema, RasterSchema):
             return
         self.schema = RasterSchema(**self.schema)
-
-
-def get_file_type(filepath):
-    # TODO: guard against classifying netCDF, HDF5, etc as GDAL rasters,
-    # we'll want a different data model for multi-dimensional arrays.
-
-    # GDAL considers CSV a vector, so check against frictionless
-    # first.
-    filetype = frictionless.describe(filepath).type
-    if filetype == 'table':
-        return filetype
-    gis_type = pygeoprocessing.get_gis_type(filepath)
-    if gis_type == pygeoprocessing.VECTOR_TYPE:
-        return 'vector'
-    if gis_type == pygeoprocessing.RASTER_TYPE:
-        return 'raster'
-    raise ValueError()
-
-
-def describe_vector(source_dataset_path):
-    description = frictionless.describe(
-        source_dataset_path, stats=True).to_dict()
-    fields = []
-    vector = gdal.OpenEx(source_dataset_path, gdal.OF_VECTOR)
-    layer = vector.GetLayer()
-    for fld in layer.schema:
-        fields.append(
-            FieldSchema(name=fld.name, type=fld.type))
-    vector = layer = None
-    description['schema'] = TableSchema(fields=fields)
-
-    info = pygeoprocessing.get_vector_info(source_dataset_path)
-    spatial = {
-        'bounding_box': info['bounding_box'],
-        'crs': info['projection_wkt']
-    }
-    description['spatial'] = SpatialSchema(**spatial)
-    description['sources'] = info['file_list']
-    return description
-
-
-def describe_raster(source_dataset_path):
-    description = frictionless.describe(
-        source_dataset_path, stats=True).to_dict()
-
-    bands = []
-    info = pygeoprocessing.get_raster_info(source_dataset_path)
-    for i in range(info['n_bands']):
-        b = i + 1
-        # band = raster.GetRasterBand(b)
-        # datatype = 'integer' if band.DataType < 6 else 'number'
-        bands.append(BandSchema(
-            index=b,
-            gdal_type=info['datatype'],
-            numpy_type=info['numpy_type'],
-            nodata=info['nodata'][i]))
-    description['schema'] = RasterSchema(
-        bands=bands,
-        pixel_size=info['pixel_size'],
-        raster_size=info['raster_size'])
-    description['spatial'] = SpatialSchema(
-        bounding_box=info['bounding_box'],
-        crs=info['projection_wkt'])
-    description['sources'] = info['file_list']
-    return description
-
-
-def describe_table(source_dataset_path):
-    # frictionless.describe works
-    return frictionless.describe(
-        source_dataset_path, stats=True).to_dict()
-
-
-DESRCIBE_FUNCS = {
-    'table': describe_table,
-    'vector': describe_vector,
-    'raster': describe_raster
-}
-
-RESOURCE_MODELS = {
-    'table': TableResource,
-    'vector': VectorResource,
-    'raster': RasterResource
-}
-
-
-class MetadataControl(object):
-
-    def __init__(self, source_dataset_path):
-        # if source_dataset_path is not None:
-        self.datasource = source_dataset_path
-        self.data_package_path = f'{self.datasource}.dp.yml'
-
-        # Despite naming, this does not open a resource that must be closed
-        of = fsspec.open(self.datasource)
-        if not of.fs.exists(self.datasource):
-            raise FileNotFoundError(f'{self.datasource} does not exist')
-
-        resource_type = get_file_type(source_dataset_path)
-        description = DESRCIBE_FUNCS[resource_type](source_dataset_path)
-        # this is nice for autodetect of field types, but sometimes
-        # we will know the table schema (invest MODEL_SPEC).
-        # Is there any benefit to passing in the known schema? Maybe not
-        # Can also just overwrite the schema attribute with known data after.
-
-        # Load existing metadata file
-        try:
-            with fsspec.open(self.data_package_path, 'r') as file:
-                yaml_string = file.read()
-
-            # This validates the existing yaml against our dataclasses.
-            existing_resource = RESOURCE_MODELS[resource_type](
-                **yaml.safe_load(yaml_string))
-            # overwrite properties that are intrinsic to the dataset,
-            # which is everything from `description` other than schema.
-            # Some parts of schema are intrinsic, but others are human-input
-            # so replace the whole thing for now.
-            del description['schema']
-            self.metadata = dataclasses.replace(
-                existing_resource, **description)
-
-        # Common path: metadata file does not already exist
-        except FileNotFoundError as err:
-            self.metadata = RESOURCE_MODELS[resource_type](**description)
-
-    def write(self, workspace=None):
-        """Write datapackage yaml to disk.
-
-        This creates sidecar files with '.yml'
-        appended to the full filename of the data source. For example,
-
-        - 'myraster.tif'
-        - 'myraster.tif.yml'
-
-        Args:
-            workspace (str): if ``None``, files write to the same location
-                as the source data. If not ``None``, a path to a local directory
-                to write files. They will still be named to match the source
-                filename. Use this option if the source data is not on the local
-                filesystem.
-
-        """
-        if workspace is None:
-            target_path = self.data_package_path
-        else:
-            target_path = os.path.join(
-                workspace, f'{os.path.basename(self.datasource)}.dp.yml')
-
-        with open(target_path, 'w') as file:
-            file.write(yaml.dump(
-                dataclasses.asdict(self.metadata), Dumper=_NoAliasDumper))
-
-
-if __name__ == "__main__":
-    # from natcap.invest import carbon
-    # arg_spec = carbon.MODEL_SPEC['args']['carbon_pools_path']
-
-    # filepath = 'C:/Users/dmf/projects/geometamaker/data/carbon_pools.csv'
-    # filepath = 'C:/Users/dmf/projects/geometamaker/data/watershed_gura.shp'
-    filepath = 'C:/Users/dmf/projects/geometamaker/data/DEM_gura.tif'
-    mc = MetadataControl(filepath)
-    pprint.pprint(dataclasses.asdict(mc.metadata))
-    # mc.write()

From 539fc5163987fe28ae46938242352f5542bd4cb9 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Thu, 18 Jul 2024 14:40:56 -0400
Subject: [PATCH 06/15] more integration, moving methods onto the Resource
 classes.

---
 src/geometamaker/geometamaker.py | 352 +------------------------------
 src/geometamaker/models.py       | 348 ++++++++++++++++++++++++++++--
 2 files changed, 338 insertions(+), 362 deletions(-)

diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py
index 695a40b..e3a6c7b 100644
--- a/src/geometamaker/geometamaker.py
+++ b/src/geometamaker/geometamaker.py
@@ -25,12 +25,11 @@ def ignore_aliases(self, data):
 
 LOGGER = logging.getLogger(__name__)
 
-# MCF_SCHEMA['properties']['identification']['properties'][
-#     'keywords']['patternProperties']['^.*'][
-#     'required'] = ['keywords', 'keywords_type']
-# to accomodate tables that do not represent spatial content:
 
 def get_file_type(filepath):
+    # TODO: zip, or other archives. Can they be represented as a Resource?
+    # or do they need to be a Package?
+
     # TODO: guard against classifying netCDF, HDF5, etc as GDAL rasters,
     # we'll want a different data model for multi-dimensional arrays.
 
@@ -178,348 +177,3 @@ def __init__(self, source_dataset_path=None):
         except FileNotFoundError as err:
             self.metadata = RESOURCE_MODELS[resource_type](**description)
 
-    def set_title(self, title):
-        """Add a title for the dataset.
-
-        Args:
-            title (str)
-
-        """
-        self.metadata.title = title
-
-    def get_title(self):
-        """Get the title for the dataset."""
-        return self.metadata.title
-
-    def set_description(self, description):
-        """Add an description for the dataset.
-
-        Args:
-            description (str)
-
-        """
-        self.metadata.description = description
-
-    def get_description(self):
-        """Get the description for the dataset."""
-        return self.metadata.description
-
-    def set_citation(self, citation):
-        """Add a citation string for the dataset.
-
-        Args:
-            citation (str)
-
-        """
-        self.metadata.citation = citation
-
-    def get_citation(self):
-        """Get the citation for the dataset."""
-        return self.metadata.citation
-
-    def set_contact(self, organization=None, individual_name=None,
-                    position_name=None, email=None):
-        """Add a contact section.
-
-        Args:
-            organization (str): name of the responsible organization
-            individual_name (str): name of the responsible person
-            position_name (str): role or position of the responsible person
-            email (str): address of the responsible organization or individual
-
-        """
-
-        if organization:
-            self.metadata.contact.organization = organization
-        if individual_name:
-            self.metadata.contact.individualname = individual_name
-        if position_name:
-            self.metadata.contact.positionname = position_name
-        if email:
-            self.metadata.contact.email = email
-
-    def get_contact(self):
-        """Get metadata from a contact section.
-
-        Returns:
-            ContactSchema
-
-        """
-        return self.metadata.contact
-
-    def set_doi(self, doi):
-        """Add a doi string for the dataset.
-
-        Args:
-            doi (str)
-
-        """
-        self.metadata.doi = doi
-
-    def get_doi(self):
-        """Get the doi for the dataset."""
-        return self.metadata.doi
-
-    def set_edition(self, edition):
-        """Set the edition for the dataset.
-
-        Args:
-            edition (str): version of the cited resource
-
-        """
-        self.metadata.edition = edition
-
-    def get_edition(self):
-        """Get the edition of the dataset.
-
-        Returns:
-            str or ``None`` if ``edition`` does not exist.
-
-        """
-        return self.metadata.edition
-
-    def set_keywords(self, keywords, section='default', keywords_type='theme',
-                     vocabulary=None):
-        """Describe a dataset with a list of keywords.
-
-        Keywords are grouped into sections for the purpose of complying with
-        pre-exising keyword schema. A section will be overwritten if it
-        already exists.
-
-        Args:
-            keywords (list): sequence of strings
-            section (string): the name of a keywords section
-            keywords_type (string): subject matter used to group similar
-                keywords. Must be one of,
-                ('discipline', 'place', 'stratum', 'temporal', 'theme')
-            vocabulary (dict): a dictionary with 'name' and 'url' (optional)
-                keys. Used to describe the source (thesaurus) of keywords
-
-        Raises:
-            ValidationError
-
-        """
-        section_dict = {
-            'keywords': keywords,
-            'keywords_type': keywords_type
-        }
-
-        if vocabulary:
-            section_dict['vocabulary'] = vocabulary
-        self.mcf['identification']['keywords'][section] = section_dict
-        self.validate()
-
-    def get_keywords(self, section='default'):
-        return self.mcf['identification']['keywords'][section]
-
-    def set_license(self, title=None, path=None):
-        """Add a license for the dataset.
-
-        Either or both title and path are required if there is a license.
-        Call with no arguments to remove access constraints and license
-        info.
-
-        Args:
-            title (str): human-readable title of the license
-            path (str): url for the license
-
-        """
-        license_dict = {}
-        license_dict['title'] = title if title else ''
-        license_dict['path'] = path if path else ''
-
-        # TODO: DataPackage/Resource allows for a list of licenses.
-        # So far we only support one license per resource.
-        self.licenses = [models.License(**license_dict)]
-
-    def get_license(self):
-        """Get ``license`` for the dataset.
-
-        Returns:
-            models.License
-
-        """
-        # TODO: DataPackage/Resource allows for a list of licenses.
-        # So far we only support one license per resource.
-        if self.licenses:
-            return self.licenses[0]
-
-    def set_lineage(self, statement):
-        """Set the lineage statement for the dataset.
-
-        Args:
-            statement (str): general explanation describing the lineage or
-                provenance of the dataset
-
-        """
-        self.metadata.lineage = statement
-
-    def get_lineage(self):
-        """Get the lineage statement of the dataset.
-
-        Returns:
-            str
-
-        """
-        return self.metadata.lineage
-
-    def set_purpose(self, purpose):
-        """Add a purpose for the dataset.
-
-        Args:
-            purpose (str): description of the purpose of the source dataset
-
-        """
-        self.metadata.purpose = purpose
-
-    def get_purpose(self):
-        """Get ``purpose`` for the dataset.
-
-        Returns:
-            str
-
-        """
-        return self.metadata.purpose
-
-    def set_url(self, url):
-        """Add a url for the dataset.
-
-        Args:
-            url (str)
-
-        """
-        self.metadata.url = url
-
-    def get_url(self):
-        """Get the url for the dataset."""
-        return self.metadata.url
-
-    def set_band_description(self, band_number, name=None, title=None,
-                             abstract=None, units=None, type=None):
-        """Define metadata for a raster band.
-
-        Args:
-            band_number (int): a raster band index, starting at 1
-            name (str): name for the raster band
-            title (str): title for the raster band
-            abstract (str): description of the raster band
-            units (str): unit of measurement for the band's pixel values
-            type (str): of the band's values, either 'integer' or 'number'
-
-        """
-        idx = band_number - 1
-        attribute = self.mcf['content_info']['attributes'][idx]
-
-        if name is not None:
-            attribute['name'] = name
-        if title is not None:
-            attribute['title'] = title
-        if abstract is not None:
-            attribute['abstract'] = abstract
-        if units is not None:
-            attribute['units'] = units
-        if type is not None:
-            attribute['type'] = type
-
-        self.mcf['content_info']['attributes'][idx] = attribute
-
-    def get_band_description(self, band_number):
-        """Get the attribute metadata for a band.
-
-        Args:
-            band_number (int): a raster band index, starting at 1
-
-        Returns:
-            dict
-        """
-        return self.mcf['content_info']['attributes'][band_number - 1]
-
-    def _get_attr(self, name):
-        """Get an attribute by its name property.
-
-        Args:
-            name (string): to match the value of the 'name' key in a dict
-
-        Returns:
-            tuple of (list index of the matching attribute, the attribute
-                dict)
-
-        Raises:
-            KeyError if no attributes exist in the MCF or if the named
-                attribute does not exist.
-
-        """
-        if len(self.mcf['content_info']['attributes']) == 0:
-            raise KeyError(
-                f'{self.datasource} MCF has not attributes')
-        for idx, attr in enumerate(self.mcf['content_info']['attributes']):
-            if attr['name'] == name:
-                return idx, attr
-        raise KeyError(
-            f'{self.datasource} has no attribute named {name}')
-
-    def set_field_description(self, name, title=None, abstract=None,
-                              units=None, type=None):
-        """Define metadata for a tabular field.
-
-        Args:
-            name (str): name and unique identifier of the field
-            title (str): title for the field
-            abstract (str): description of the field
-            units (str): unit of measurement for the field's values
-
-        """
-        idx, attribute = self._get_attr(name)
-
-        if title is not None:
-            attribute['title'] = title
-        if abstract is not None:
-            attribute['abstract'] = abstract
-        if units is not None:
-            attribute['units'] = units
-        if type is not None:
-            attribute['type'] = type
-
-        self.mcf['content_info']['attributes'][idx] = attribute
-
-    def get_field_description(self, name):
-        """Get the attribute metadata for a field.
-
-        Args:
-            name (str): name and unique identifier of the field
-
-        Returns:
-            dict
-        """
-        idx, attribute = self._get_attr(name)
-        return attribute
-
-    def write(self, workspace=None):
-        """Write datapackage yaml to disk.
-
-        This creates sidecar files with '.yml'
-        appended to the full filename of the data source. For example,
-
-        - 'myraster.tif'
-        - 'myraster.tif.yml'
-
-        Args:
-            workspace (str): if ``None``, files write to the same location
-                as the source data. If not ``None``, a path to a local directory
-                to write files. They will still be named to match the source
-                filename. Use this option if the source data is not on the local
-                filesystem.
-
-        """
-        if workspace is None:
-            target_path = self.data_package_path
-        else:
-            target_path = os.path.join(
-                workspace, f'{os.path.basename(self.datasource)}.yml')
-
-        with open(target_path, 'w') as file:
-            file.write(yaml.dump(
-                dataclasses.asdict(self.metadata), Dumper=_NoAliasDumper))
-
-    def to_string(self):
-        pass
diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index 23a0039..fde63db 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -1,5 +1,7 @@
-from dataclasses import dataclass, field
+import dataclasses
+from dataclasses import dataclass
 import logging
+import os
 import pprint
 
 import yaml
@@ -73,12 +75,10 @@ class TableSchema:
     """Class for metadata for tables."""
 
     # https://datapackage.org/standard/table-schema/
-    fields: list = field(default_factory=FieldSchema)
-    missingValues: list = field(default_factory=list)
-    primaryKey: list = field(default_factory=list)
-    foreignKeys: list = field(default_factory=list)
-
-    # def get_field():
+    fields: list = dataclasses.field(default_factory=FieldSchema)
+    missingValues: list = dataclasses.field(default_factory=list)
+    primaryKey: list = dataclasses.field(default_factory=list)
+    foreignKeys: list = dataclasses.field(default_factory=list)
 
 
 @dataclass
@@ -126,8 +126,9 @@ class Resource:
     name: str = ''
     title: str = ''
     description: str = ''
-    sources: list = field(default_factory=list)
-    licenses: list = field(default_factory=list)
+    keywords: list = []
+    sources: list = dataclasses.field(default_factory=list)
+    licenses: list = dataclasses.field(default_factory=list)
     citation: str = ''
     doi: str = ''
     url: str = ''
@@ -136,8 +137,229 @@ class Resource:
     purpose: str = ''
     contact: ContactSchema = ContactSchema()
 
-    # def __post_init__(self):
-    #     self.schema = 
+    def set_title(self, title):
+        """Add a title for the dataset.
+
+        Args:
+            title (str)
+
+        """
+        self.metadata.title = title
+
+    def get_title(self):
+        """Get the title for the dataset."""
+        return self.metadata.title
+
+    def set_description(self, description):
+        """Add an description for the dataset.
+
+        Args:
+            description (str)
+
+        """
+        self.metadata.description = description
+
+    def get_description(self):
+        """Get the description for the dataset."""
+        return self.metadata.description
+
+    def set_citation(self, citation):
+        """Add a citation string for the dataset.
+
+        Args:
+            citation (str)
+
+        """
+        self.metadata.citation = citation
+
+    def get_citation(self):
+        """Get the citation for the dataset."""
+        return self.metadata.citation
+
+    def set_contact(self, organization=None, individual_name=None,
+                    position_name=None, email=None):
+        """Add a contact section.
+
+        Args:
+            organization (str): name of the responsible organization
+            individual_name (str): name of the responsible person
+            position_name (str): role or position of the responsible person
+            email (str): address of the responsible organization or individual
+
+        """
+
+        if organization is not None:
+            self.metadata.contact.organization = organization
+        if individual_name is not None:
+            self.metadata.contact.individualname = individual_name
+        if position_name is not None:
+            self.metadata.contact.positionname = position_name
+        if email is not None:
+            self.metadata.contact.email = email
+
+    def get_contact(self):
+        """Get metadata from a contact section.
+
+        Returns:
+            ContactSchema
+
+        """
+        return self.metadata.contact
+
+    def set_doi(self, doi):
+        """Add a doi string for the dataset.
+
+        Args:
+            doi (str)
+
+        """
+        self.metadata.doi = doi
+
+    def get_doi(self):
+        """Get the doi for the dataset."""
+        return self.metadata.doi
+
+    def set_edition(self, edition):
+        """Set the edition for the dataset.
+
+        Args:
+            edition (str): version of the cited resource
+
+        """
+        self.metadata.edition = edition
+
+    def get_edition(self):
+        """Get the edition of the dataset.
+
+        Returns:
+            str or ``None`` if ``edition`` does not exist.
+
+        """
+        return self.metadata.edition
+
+    def set_keywords(self, keywords):
+        """Describe a dataset with a list of keywords.
+
+        Args:
+            keywords (list): sequence of strings
+
+        """
+        self.metadata.keywords = keywords
+
+    def get_keywords(self):
+        return self.metadata.keywords
+
+    def set_license(self, title=None, path=None):
+        """Add a license for the dataset.
+
+        Either or both title and path are required if there is a license.
+        Call with no arguments to remove access constraints and license
+        info.
+
+        Args:
+            title (str): human-readable title of the license
+            path (str): url for the license
+
+        """
+        license_dict = {}
+        license_dict['title'] = title if title else ''
+        license_dict['path'] = path if path else ''
+
+        # TODO: DataPackage/Resource allows for a list of licenses.
+        # So far we only support one license per resource.
+        self.licenses = [License(**license_dict)]
+
+    def get_license(self):
+        """Get ``license`` for the dataset.
+
+        Returns:
+            models.License
+
+        """
+        # TODO: DataPackage/Resource allows for a list of licenses.
+        # So far we only support one license per resource.
+        if self.licenses:
+            return self.licenses[0]
+
+    def set_lineage(self, statement):
+        """Set the lineage statement for the dataset.
+
+        Args:
+            statement (str): general explanation describing the lineage or
+                provenance of the dataset
+
+        """
+        self.metadata.lineage = statement
+
+    def get_lineage(self):
+        """Get the lineage statement of the dataset.
+
+        Returns:
+            str
+
+        """
+        return self.metadata.lineage
+
+    def set_purpose(self, purpose):
+        """Add a purpose for the dataset.
+
+        Args:
+            purpose (str): description of the purpose of the source dataset
+
+        """
+        self.metadata.purpose = purpose
+
+    def get_purpose(self):
+        """Get ``purpose`` for the dataset.
+
+        Returns:
+            str
+
+        """
+        return self.metadata.purpose
+
+    def set_url(self, url):
+        """Add a url for the dataset.
+
+        Args:
+            url (str)
+
+        """
+        self.metadata.url = url
+
+    def get_url(self):
+        """Get the url for the dataset."""
+        return self.metadata.url
+
+    def write(self, workspace=None):
+        """Write datapackage yaml to disk.
+
+        This creates sidecar files with '.yml'
+        appended to the full filename of the data source. For example,
+
+        - 'myraster.tif'
+        - 'myraster.tif.yml'
+
+        Args:
+            workspace (str): if ``None``, files write to the same location
+                as the source data. If not ``None``, a path to a local directory
+                to write files. They will still be named to match the source
+                filename. Use this option if the source data is not on the local
+                filesystem.
+
+        """
+        if workspace is None:
+            target_path = self.data_package_path
+        else:
+            target_path = os.path.join(
+                workspace, f'{os.path.basename(self.datasource)}.yml')
+
+        with open(target_path, 'w') as file:
+            file.write(yaml.dump(
+                dataclasses.asdict(self.metadata), Dumper=_NoAliasDumper))
+
+    def to_string(self):
+        pass
 
 
 @dataclass(kw_only=True)
@@ -145,8 +367,7 @@ class TableResource(Resource):
     """Class for metadata for a table resource."""
 
     # without post-init, schema ends up as a dict, or whatever is passed in.
-    schema: TableSchema = field(default_factory=TableSchema)
-    # type: str = 'table'
+    schema: TableSchema = dataclasses.field(default_factory=TableSchema)
 
     def __post_init__(self):
         # Allow init of the resource with a schema of type
@@ -156,6 +377,71 @@ def __post_init__(self):
             return
         self.schema = TableSchema(**self.schema)
 
+    def _get_field(self, name):
+        """Get an attribute by its name property.
+
+        Args:
+            name (string): to match the value of the 'name' key in a dict
+
+        Returns:
+            tuple of (list index of the matching attribute, the attribute
+                dict)
+
+        Raises:
+            KeyError if no attributes exist in the MCF or if the named
+                attribute does not exist.
+
+        """
+        if len(self.schema.fields) == 0:
+            raise KeyError(
+                f'{self.schema} has no fields')
+        for idx, field in enumerate(self.schema.fields):
+            if field['name'] == name:
+                return idx, field
+        raise KeyError(
+            f'{self.schema} has no field named {name}')
+
+    def set_field_description(self, name, title=None, description=None,
+                              units=None, type=None, format=None,
+                              example=None):
+        """Define metadata for a tabular field.
+
+        Args:
+            name (str): name and unique identifier of the field
+            title (str): title for the field
+            abstract (str): description of the field
+            units (str): unit of measurement for the field's values
+
+        """
+        idx, field = self._get_field(name)
+
+        if title is not None:
+            field.title = title
+        if description is not None:
+            field.description = description
+        if units is not None:
+            field.units = units
+        if type is not None:
+            field.type = type
+        if format is not None:
+            field.format = format
+        if example is not None:
+            field.example = example
+
+        self.schema.fields[idx] = field
+
+    def get_field_description(self, name):
+        """Get the attribute metadata for a field.
+
+        Args:
+            name (str): name and unique identifier of the field
+
+        Returns:
+            dict
+        """
+        idx, field = self._get_field(name)
+        return field
+
 
 @dataclass(kw_only=True)
 class VectorResource(TableResource):
@@ -178,3 +464,39 @@ def __post_init__(self):
         if isinstance(self.schema, RasterSchema):
             return
         self.schema = RasterSchema(**self.schema)
+
+    def set_band_description(self, band_number, title=None,
+                             description=None, units=None):
+        """Define metadata for a raster band.
+
+        Args:
+            band_number (int): a raster band index, starting at 1
+            name (str): name for the raster band
+            title (str): title for the raster band
+            abstract (str): description of the raster band
+            units (str): unit of measurement for the band's pixel values
+            type (str): of the band's values, either 'integer' or 'number'
+
+        """
+        idx = band_number - 1
+        band = self.schema.bands[idx]
+
+        if title is not None:
+            band.title = title
+        if description is not None:
+            band.description = description
+        if units is not None:
+            band.units = units
+
+        self.schema.bands[idx] = band
+
+    def get_band_description(self, band_number):
+        """Get the attribute metadata for a band.
+
+        Args:
+            band_number (int): a raster band index, starting at 1
+
+        Returns:
+            dict
+        """
+        return self.schema.bands[band_number - 1]

From 1aa4c103a5214a83b65dcca82d8029f6f78f94e0 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Fri, 19 Jul 2024 11:35:37 -0400
Subject: [PATCH 07/15] added support for compressed files; updated some tests

---
 src/geometamaker/__init__.py     |   2 +-
 src/geometamaker/geometamaker.py | 145 +++++------
 src/geometamaker/models.py       |  83 ++++---
 tests/test_geometamaker.py       | 397 ++++++++++++-------------------
 4 files changed, 284 insertions(+), 343 deletions(-)

diff --git a/src/geometamaker/__init__.py b/src/geometamaker/__init__.py
index 9f56a76..af30c26 100644
--- a/src/geometamaker/__init__.py
+++ b/src/geometamaker/__init__.py
@@ -1 +1 @@
-from .geometamaker import MetadataControl
+from .geometamaker import describe
diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py
index e3a6c7b..ae9bb71 100644
--- a/src/geometamaker/geometamaker.py
+++ b/src/geometamaker/geometamaker.py
@@ -6,27 +6,20 @@
 
 import frictionless
 import fsspec
-import pygeoprocessing
+import numpy
 from osgeo import gdal
 from osgeo import ogr
 from osgeo import osr
+import pygeoprocessing
 import yaml
 
 from . import models
 
 
-# https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml
-class _NoAliasDumper(yaml.SafeDumper):
-    """Keep the yaml human-readable by avoiding anchors and aliases."""
-
-    def ignore_aliases(self, data):
-        return True
-
-
 LOGGER = logging.getLogger(__name__)
 
 
-def get_file_type(filepath):
+def detect_file_type(filepath):
     # TODO: zip, or other archives. Can they be represented as a Resource?
     # or do they need to be a Package?
 
@@ -35,9 +28,11 @@ def get_file_type(filepath):
 
     # GDAL considers CSV a vector, so check against frictionless
     # first.
-    filetype = frictionless.describe(filepath).type
-    if filetype == 'table':
-        return filetype
+    desc = frictionless.describe(filepath)
+    if desc.type == 'table':
+        return 'table'
+    if desc.compression:
+        return 'archive'
     gis_type = pygeoprocessing.get_gis_type(filepath)
     if gis_type == pygeoprocessing.VECTOR_TYPE:
         return 'vector'
@@ -46,17 +41,25 @@ def get_file_type(filepath):
     raise ValueError()
 
 
+def describe_archive(source_dataset_path):
+    description = frictionless.describe(
+        source_dataset_path, stats=True).to_dict()
+    return description
+
+
 def describe_vector(source_dataset_path):
     description = frictionless.describe(
         source_dataset_path, stats=True).to_dict()
     fields = []
     vector = gdal.OpenEx(source_dataset_path, gdal.OF_VECTOR)
     layer = vector.GetLayer()
+    description['rows'] = layer.GetFeatureCount()
     for fld in layer.schema:
         fields.append(
             models.FieldSchema(name=fld.name, type=fld.type))
     vector = layer = None
     description['schema'] = models.TableSchema(fields=fields)
+    description['fields'] = len(fields)
 
     info = pygeoprocessing.get_vector_info(source_dataset_path)
     spatial = {
@@ -74,38 +77,42 @@ def describe_raster(source_dataset_path):
 
     bands = []
     info = pygeoprocessing.get_raster_info(source_dataset_path)
+    # Some values of raster info are numpy types, which the
+    # yaml dumper doesn't know how to represent.
     for i in range(info['n_bands']):
         b = i + 1
-        # band = raster.GetRasterBand(b)
-        # datatype = 'integer' if band.DataType < 6 else 'number'
         bands.append(models.BandSchema(
             index=b,
             gdal_type=info['datatype'],
-            numpy_type=info['numpy_type'],
+            numpy_type=numpy.dtype(info['numpy_type']).name,
             nodata=info['nodata'][i]))
     description['schema'] = models.RasterSchema(
         bands=bands,
         pixel_size=info['pixel_size'],
         raster_size=info['raster_size'])
     description['spatial'] = models.SpatialSchema(
-        bounding_box=info['bounding_box'],
+        bounding_box=[float(x) for x in info['bounding_box']],
         crs=info['projection_wkt'])
     description['sources'] = info['file_list']
     return description
 
 
 def describe_table(source_dataset_path):
-    return frictionless.describe(
+    description = frictionless.describe(
         source_dataset_path, stats=True).to_dict()
+    description['schema'] = models.TableSchema(**description['schema'])
+    return description
 
 
 DESRCIBE_FUNCS = {
+    'archive': describe_archive,
     'table': describe_table,
     'vector': describe_vector,
     'raster': describe_raster
 }
 
 RESOURCE_MODELS = {
+    'archive': models.ArchiveResource,
     'table': models.TableResource,
     'vector': models.VectorResource,
     'raster': models.RasterResource
@@ -125,55 +132,55 @@ class MetadataControl(object):
 
     """
 
-    def __init__(self, source_dataset_path=None):
-        """Create an MCF instance, populated with properties of the dataset.
-
-        The MCF will be valid according to the pygeometa schema. It has
-        all required properties. Properties of the dataset are used to
-        populate as many MCF properties as possible. Default/placeholder
-        values are used for properties that require user input.
-
-        Instantiating without a ``source_dataset_path`` creates an MCF template.
-
-        Args:
-            source_dataset_path (string): path or URL to dataset to which the
-                metadata applies
-
-        """
-
-        # if source_dataset_path is not None:
-        self.datasource = source_dataset_path
-        self.data_package_path = f'{self.datasource}.yml'
-
-        # Despite naming, this does not open a resource that must be closed
-        of = fsspec.open(self.datasource)
-        if not of.fs.exists(self.datasource):
-            raise FileNotFoundError(f'{self.datasource} does not exist')
-
-        resource_type = get_file_type(source_dataset_path)
-        description = DESRCIBE_FUNCS[resource_type](source_dataset_path)
-        # this is nice for autodetect of field types, but sometimes
-        # we will know the table schema (invest MODEL_SPEC).
-        # Is there any benefit to passing in the known schema? Maybe not
-        # Can also just overwrite the schema attribute with known data after.
-
-        # Load existing metadata file
-        try:
-            with fsspec.open(self.data_package_path, 'r') as file:
-                yaml_string = file.read()
-
-            # This validates the existing yaml against our dataclasses.
-            existing_resource = RESOURCE_MODELS[resource_type](
-                **yaml.safe_load(yaml_string))
-            # overwrite properties that are intrinsic to the dataset,
-            # which is everything from `description` other than schema.
-            # Some parts of schema are intrinsic, but others are human-input
-            # so replace the whole thing for now.
-            del description['schema']
-            self.metadata = dataclasses.replace(
-                existing_resource, **description)
-
-        # Common path: metadata file does not already exist
-        except FileNotFoundError as err:
-            self.metadata = RESOURCE_MODELS[resource_type](**description)
+
+def describe(source_dataset_path):
+    """Create a metadata resource instance with properties of the dataset.
+
+    Properties of the dataset are used to populate as many metadata
+    properties as possible. Default/placeholder
+    values are used for properties that require user input.
+
+    Args:
+        source_dataset_path (string): path or URL to dataset to which the
+            metadata applies
+
+    Returns
+        one of TableResource, VectorResource, RasterResource
+    """
+
+    data_package_path = f'{source_dataset_path}.yml'
+
+    # Despite naming, this does not open a resource that must be closed
+    of = fsspec.open(source_dataset_path)
+    if not of.fs.exists(source_dataset_path):
+        raise FileNotFoundError(f'{source_dataset_path} does not exist')
+
+    resource_type = detect_file_type(source_dataset_path)
+    description = DESRCIBE_FUNCS[resource_type](source_dataset_path)
+    # this is nice for autodetect of field types, but sometimes
+    # we will know the table schema (invest MODEL_SPEC).
+    # Is there any benefit to passing in the known schema? Maybe not
+    # Can also just overwrite the schema attribute with known data after.
+
+    # Load existing metadata file
+    try:
+        with fsspec.open(data_package_path, 'r') as file:
+            yaml_string = file.read()
+
+        # This validates the existing yaml against our dataclasses.
+        existing_resource = RESOURCE_MODELS[resource_type](
+            **yaml.safe_load(yaml_string))
+        # overwrite properties that are intrinsic to the dataset,
+        # which is everything from `description` other than schema.
+        # Some parts of schema are intrinsic, but others are human-input
+        # so replace the whole thing for now.
+        del description['schema']
+        resource = dataclasses.replace(
+            existing_resource, **description)
+
+    # Common path: metadata file does not already exist
+    except FileNotFoundError as err:
+        resource = RESOURCE_MODELS[resource_type](**description)
+
+    return resource
 
diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index fde63db..42e0ee2 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -2,13 +2,13 @@
 from dataclasses import dataclass
 import logging
 import os
-import pprint
 
 import yaml
 
 
 LOGGER = logging.getLogger(__name__)
 
+
 # https://stackoverflow.com/questions/13518819/avoid-references-in-pyyaml
 class _NoAliasDumper(yaml.SafeDumper):
     """Keep the yaml human-readable by avoiding anchors and aliases."""
@@ -80,6 +80,18 @@ class TableSchema:
     primaryKey: list = dataclasses.field(default_factory=list)
     foreignKeys: list = dataclasses.field(default_factory=list)
 
+    def __post_init__(self):
+        field_schemas = []
+        for field in self.fields:
+            # Allow init of the resource with a schema of type
+            # FieldSchema, or type dict. Mostly because dataclasses.replace
+            # calls init, but the base object will have already been initialized.
+            if isinstance(field, FieldSchema):
+                field_schemas.append(field)
+            else:
+                field_schemas.append(FieldSchema(**field))
+        self.fields = field_schemas
+
 
 @dataclass
 class BandSchema:
@@ -126,7 +138,7 @@ class Resource:
     name: str = ''
     title: str = ''
     description: str = ''
-    keywords: list = []
+    keywords: list = dataclasses.field(default_factory=list)
     sources: list = dataclasses.field(default_factory=list)
     licenses: list = dataclasses.field(default_factory=list)
     citation: str = ''
@@ -137,6 +149,9 @@ class Resource:
     purpose: str = ''
     contact: ContactSchema = ContactSchema()
 
+    def __post_init__(self):
+        self.metadata_path = f'{self.path}.yml'
+
     def set_title(self, title):
         """Add a title for the dataset.
 
@@ -144,11 +159,11 @@ def set_title(self, title):
             title (str)
 
         """
-        self.metadata.title = title
+        self.title = title
 
     def get_title(self):
         """Get the title for the dataset."""
-        return self.metadata.title
+        return self.title
 
     def set_description(self, description):
         """Add an description for the dataset.
@@ -157,11 +172,11 @@ def set_description(self, description):
             description (str)
 
         """
-        self.metadata.description = description
+        self.description = description
 
     def get_description(self):
         """Get the description for the dataset."""
-        return self.metadata.description
+        return self.description
 
     def set_citation(self, citation):
         """Add a citation string for the dataset.
@@ -170,11 +185,11 @@ def set_citation(self, citation):
             citation (str)
 
         """
-        self.metadata.citation = citation
+        self.citation = citation
 
     def get_citation(self):
         """Get the citation for the dataset."""
-        return self.metadata.citation
+        return self.citation
 
     def set_contact(self, organization=None, individual_name=None,
                     position_name=None, email=None):
@@ -189,13 +204,13 @@ def set_contact(self, organization=None, individual_name=None,
         """
 
         if organization is not None:
-            self.metadata.contact.organization = organization
+            self.contact.organization = organization
         if individual_name is not None:
-            self.metadata.contact.individualname = individual_name
+            self.contact.individual_name = individual_name
         if position_name is not None:
-            self.metadata.contact.positionname = position_name
+            self.contact.position_name = position_name
         if email is not None:
-            self.metadata.contact.email = email
+            self.contact.email = email
 
     def get_contact(self):
         """Get metadata from a contact section.
@@ -204,7 +219,7 @@ def get_contact(self):
             ContactSchema
 
         """
-        return self.metadata.contact
+        return self.contact
 
     def set_doi(self, doi):
         """Add a doi string for the dataset.
@@ -213,11 +228,11 @@ def set_doi(self, doi):
             doi (str)
 
         """
-        self.metadata.doi = doi
+        self.doi = doi
 
     def get_doi(self):
         """Get the doi for the dataset."""
-        return self.metadata.doi
+        return self.doi
 
     def set_edition(self, edition):
         """Set the edition for the dataset.
@@ -226,7 +241,7 @@ def set_edition(self, edition):
             edition (str): version of the cited resource
 
         """
-        self.metadata.edition = edition
+        self.edition = edition
 
     def get_edition(self):
         """Get the edition of the dataset.
@@ -235,7 +250,7 @@ def get_edition(self):
             str or ``None`` if ``edition`` does not exist.
 
         """
-        return self.metadata.edition
+        return self.edition
 
     def set_keywords(self, keywords):
         """Describe a dataset with a list of keywords.
@@ -244,10 +259,10 @@ def set_keywords(self, keywords):
             keywords (list): sequence of strings
 
         """
-        self.metadata.keywords = keywords
+        self.keywords = keywords
 
     def get_keywords(self):
-        return self.metadata.keywords
+        return self.keywords
 
     def set_license(self, title=None, path=None):
         """Add a license for the dataset.
@@ -289,7 +304,7 @@ def set_lineage(self, statement):
                 provenance of the dataset
 
         """
-        self.metadata.lineage = statement
+        self.lineage = statement
 
     def get_lineage(self):
         """Get the lineage statement of the dataset.
@@ -298,7 +313,7 @@ def get_lineage(self):
             str
 
         """
-        return self.metadata.lineage
+        return self.lineage
 
     def set_purpose(self, purpose):
         """Add a purpose for the dataset.
@@ -307,7 +322,7 @@ def set_purpose(self, purpose):
             purpose (str): description of the purpose of the source dataset
 
         """
-        self.metadata.purpose = purpose
+        self.purpose = purpose
 
     def get_purpose(self):
         """Get ``purpose`` for the dataset.
@@ -316,7 +331,7 @@ def get_purpose(self):
             str
 
         """
-        return self.metadata.purpose
+        return self.purpose
 
     def set_url(self, url):
         """Add a url for the dataset.
@@ -325,11 +340,11 @@ def set_url(self, url):
             url (str)
 
         """
-        self.metadata.url = url
+        self.url = url
 
     def get_url(self):
         """Get the url for the dataset."""
-        return self.metadata.url
+        return self.url
 
     def write(self, workspace=None):
         """Write datapackage yaml to disk.
@@ -349,14 +364,14 @@ def write(self, workspace=None):
 
         """
         if workspace is None:
-            target_path = self.data_package_path
+            target_path = self.metadata_path
         else:
             target_path = os.path.join(
                 workspace, f'{os.path.basename(self.datasource)}.yml')
 
         with open(target_path, 'w') as file:
             file.write(yaml.dump(
-                dataclasses.asdict(self.metadata), Dumper=_NoAliasDumper))
+                dataclasses.asdict(self), Dumper=_NoAliasDumper))
 
     def to_string(self):
         pass
@@ -366,10 +381,13 @@ def to_string(self):
 class TableResource(Resource):
     """Class for metadata for a table resource."""
 
+    fields: int
+    rows: int
     # without post-init, schema ends up as a dict, or whatever is passed in.
     schema: TableSchema = dataclasses.field(default_factory=TableSchema)
 
     def __post_init__(self):
+        super().__post_init__()
         # Allow init of the resource with a schema of type
         # TableSchema, or type dict. Mostly because dataclasses.replace
         # calls init, but the base object will have already been initialized.
@@ -396,7 +414,7 @@ def _get_field(self, name):
             raise KeyError(
                 f'{self.schema} has no fields')
         for idx, field in enumerate(self.schema.fields):
-            if field['name'] == name:
+            if field.name == name:
                 return idx, field
         raise KeyError(
             f'{self.schema} has no field named {name}')
@@ -443,6 +461,14 @@ def get_field_description(self, name):
         return field
 
 
+@dataclass(kw_only=True)
+class ArchiveResource(Resource):
+    """Class for metadata for an archive resource."""
+
+    compression: str
+    innerpath: str
+
+
 @dataclass(kw_only=True)
 class VectorResource(TableResource):
     """Class for metadata for a vector resource."""
@@ -458,6 +484,7 @@ class RasterResource(Resource):
     spatial: SpatialSchema
 
     def __post_init__(self):
+        super().__post_init__()
         # Allow init of the resource with a schema of type
         # RasterSchema, or type dict. Mostly because dataclasses.replace
         # calls init, but the base object will have already been initialized.
diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py
index 0a3d455..5670de9 100644
--- a/tests/test_geometamaker.py
+++ b/tests/test_geometamaker.py
@@ -98,31 +98,31 @@ def tearDown(self):
 
     def test_file_does_not_exist(self):
         """MetadataControl: raises exception if given file does not exist."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         with self.assertRaises(FileNotFoundError):
-            _ = MetadataControl('foo.tif')
+            _ = geometamaker.describe('foo.tif')
 
-    def test_blank_MetadataControl(self):
-        """MetadataControl: template has expected properties."""
-        from geometamaker import MetadataControl
+    # def test_blank_geometamaker.describe(self):
+    #     """MetadataControl: template has expected properties."""
+    #     import geometamaker
 
-        target_filepath = os.path.join(self.workspace_dir, 'mcf.yml')
+    #     target_filepath = os.path.join(self.workspace_dir, 'mcf.yml')
 
-        mc = MetadataControl()
-        mc.validate()
-        mc._write_mcf(target_filepath)
+    #     mc = geometamaker.describe()
+    #     mc.validate()
+    #     mc._write_mcf(target_filepath)
 
-        with open(target_filepath, 'r') as file:
-            actual = yaml.safe_load(file)
-        with open(os.path.join(REGRESSION_DATA, 'template.yml'), 'r') as file:
-            expected = yaml.safe_load(file)
+    #     with open(target_filepath, 'r') as file:
+    #         actual = yaml.safe_load(file)
+    #     with open(os.path.join(REGRESSION_DATA, 'template.yml'), 'r') as file:
+    #         expected = yaml.safe_load(file)
 
-        self.assertEqual(actual, expected)
+    #     self.assertEqual(actual, expected)
 
-    def test_csv_MetadataControl(self):
-        """MetadataControl: validate basic csv MetadataControl."""
-        from geometamaker import MetadataControl
+    def test_describe_csv(self):
+        """Test setting properties on csv."""
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'data.csv')
         field_names = ['Strings', 'Ints', 'Reals']
@@ -132,49 +132,37 @@ def test_csv_MetadataControl(self):
             writer.writerow(field_names)
             writer.writerow(field_values)
 
-        mc = MetadataControl(datasource_path)
-        try:
-            mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
+        resource = geometamaker.describe(datasource_path)
         self.assertEqual(
-            len(mc.mcf['content_info']['attributes']),
+            len(resource.schema.fields),
             len(field_names))
-        self.assertEqual(mc.get_field_description('Strings')['type'], 'string')
-        self.assertEqual(mc.get_field_description('Ints')['type'], 'integer')
-        self.assertEqual(mc.get_field_description('Reals')['type'], 'number')
+        self.assertEqual(resource.get_field_description('Strings').type, 'string')
+        self.assertEqual(resource.get_field_description('Ints').type, 'integer')
+        self.assertEqual(resource.get_field_description('Reals').type, 'number')
 
         title = 'title'
-        abstract = 'some abstract'
+        description = 'some abstract'
         units = 'mm'
-        mc.set_field_description(
+        resource.set_field_description(
             field_names[1],
             title=title,
-            abstract=abstract)
+            description=description)
         # To demonstrate that properties can be added while preserving others
-        mc.set_field_description(
+        resource.set_field_description(
             field_names[1],
             units=units)
-        try:
-            mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
 
-        attr = [attr for attr in mc.mcf['content_info']['attributes']
-                if attr['name'] == field_names[1]][0]
-        self.assertEqual(attr['title'], title)
-        self.assertEqual(attr['abstract'], abstract)
-        self.assertEqual(attr['units'], units)
+        field = [field for field in resource.schema.fields
+                 if field.name == field_names[1]][0]
+        self.assertEqual(field.title, title)
+        self.assertEqual(field.description, description)
+        self.assertEqual(field.units, units)
 
-    def test_bad_csv_MetadataControl(self):
+    def test_describe_bad_csv(self):
         """MetadataControl: CSV with extra item in row does not fail."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
-        datasource_path = os.path.join('data.csv')
+        datasource_path = os.path.join(self.workspace_dir, 'data.csv')
         field_names = ['Strings', 'Ints', 'Reals']
         field_values = ['foo', 1, 0.9, 'extra']
         with open(datasource_path, 'w') as file:
@@ -182,24 +170,19 @@ def test_bad_csv_MetadataControl(self):
             writer.writerow(field_names)
             writer.writerow(field_values)
 
-        mc = MetadataControl(datasource_path)
-        try:
-            mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
-        mc.write()
+        resource = geometamaker.describe(datasource_path)
+
+        resource.write()
         self.assertEqual(
-            len(mc.mcf['content_info']['attributes']),
+            len(resource.schema.fields),
             len(field_names))
-        self.assertEqual(mc.get_field_description('Strings')['type'], 'string')
-        self.assertEqual(mc.get_field_description('Ints')['type'], 'integer')
-        self.assertEqual(mc.get_field_description('Reals')['type'], 'number')
+        self.assertEqual(resource.get_field_description('Strings').type, 'string')
+        self.assertEqual(resource.get_field_description('Ints').type, 'integer')
+        self.assertEqual(resource.get_field_description('Reals').type, 'number')
 
-    def test_vector_MetadataControl(self):
-        """MetadataControl: validate basic vector MetadataControl."""
-        from geometamaker import MetadataControl
+    def test_describe_vector(self):
+        """Test basic vector."""
+        import geometamaker
 
         field_map = {
             f'field_{k}': k
@@ -213,217 +196,141 @@ def test_vector_MetadataControl(self):
                     self.workspace_dir, f'vector.{ext}')
                 create_vector(datasource_path, field_map, driver)
 
-                mc = MetadataControl(datasource_path)
-                try:
-                    mc.validate()
-                except (MCFValidationError, SchemaError) as e:
-                    self.fail(
-                        'unexpected validation error occurred\n'
-                        f'{e}')
-                mc.write()
+                resource = geometamaker.describe(datasource_path)
+                self.assertTrue(isinstance(
+                    resource.spatial, geometamaker.models.SpatialSchema))
+
+                resource.write()
                 self.assertTrue(os.path.exists(f'{datasource_path}.yml'))
 
-    def test_vector_no_fields(self):
-        """MetadataControl: validate MetadataControl for basic vector with no fields."""
-        from geometamaker import MetadataControl
+    def test_describe_vector_no_fields(self):
+        """Test metadata for basic vector with no fields."""
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'vector.geojson')
         create_vector(datasource_path, None)
 
-        mc = MetadataControl(datasource_path)
-        try:
-            mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
-        mc.write()
+        resource = geometamaker.describe(datasource_path)
+        self.assertEqual(len(resource.schema.fields), 0)
 
-    def test_raster_MetadataControl(self):
-        """MetadataControl: validate basic raster MetadataControl."""
-        from geometamaker import MetadataControl
+    def test_describe_raster(self):
+        """Test metadata for basic raster."""
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
 
-        mc = MetadataControl(datasource_path)
-        try:
-            mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
-        mc.write()
-
-    def test_vector_attributes(self):
-        """MetadataControl: validate vector with extra attribute metadata."""
-        from geometamaker import MetadataControl
+        resource = geometamaker.describe(datasource_path)
+        self.assertTrue(isinstance(
+            resource.spatial, geometamaker.models.SpatialSchema))
 
-        datasource_path = os.path.join(self.workspace_dir, 'vector.geojson')
-        field_name = 'foo'
-        field_map = {
-            field_name: list(_OGR_TYPES_VALUES_MAP)[0]}
-        create_vector(datasource_path, field_map)
-
-        mc = MetadataControl(datasource_path)
-        title = 'title'
-        abstract = 'some abstract'
-        units = 'mm'
-        mc.set_field_description(
-            field_name,
-            title=title,
-            abstract=abstract)
-        # To demonstrate that properties can be added while preserving others
-        mc.set_field_description(
-            field_name,
-            units=units)
-        try:
-            mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
-
-        self.assertEqual(
-            len(mc.mcf['content_info']['attributes']),
-            len(field_map))
-        attr = [attr for attr in mc.mcf['content_info']['attributes']
-                if attr['name'] == field_name][0]
-        self.assertEqual(attr['title'], title)
-        self.assertEqual(attr['abstract'], abstract)
-        self.assertEqual(attr['units'], units)
+        resource.write()
+        self.assertTrue(os.path.exists(f'{datasource_path}.yml'))
 
     def test_raster_attributes(self):
-        """MetadataControl: validate raster with extra attribute metadata."""
-        from geometamaker import MetadataControl
+        """Test adding extra attribute metadata to raster."""
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
+        numpy_type = numpy.int16
+        create_raster(numpy_type, datasource_path)
         band_number = 1
 
-        mc = MetadataControl(datasource_path)
-        name = 'name'
+        resource = geometamaker.describe(datasource_path)
         title = 'title'
-        abstract = 'some abstract'
+        description = 'some abstract'
         units = 'mm'
-        mc.set_band_description(
+        resource.set_band_description(
             band_number,
-            name=name,
             title=title,
-            abstract=abstract)
+            description=description)
         # To demonstrate that properties can be added while preserving others
-        mc.set_band_description(
+        resource.set_band_description(
             band_number,
             units=units)
-        try:
-            mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
 
+        raster_info = pygeoprocessing.get_raster_info(datasource_path)
         self.assertEqual(
-            len(mc.mcf['content_info']['attributes']),
-            pygeoprocessing.get_raster_info(datasource_path)['n_bands'])
-        attr = mc.mcf['content_info']['attributes'][band_number - 1]
-        self.assertEqual(attr['name'], name)
-        self.assertEqual(attr['title'], title)
-        self.assertEqual(attr['abstract'], abstract)
-        self.assertEqual(attr['units'], units)
-
-    def test_set_abstract(self):
-        """MetadataControl: set and get an abstract."""
-
-        from geometamaker import MetadataControl
-
-        abstract = 'foo bar'
-        mc = MetadataControl()
-        mc.set_abstract(abstract)
-        self.assertEqual(mc.get_abstract(), abstract)
+            len(resource.schema.bands), raster_info['n_bands'])
+        band_idx = band_number - 1
+        band = resource.schema.bands[band_idx]
+        self.assertEqual(band.title, title)
+        self.assertEqual(band.description, description)
+        self.assertEqual(band.gdal_type, raster_info['datatype'])
+        self.assertEqual(band.numpy_type, numpy.dtype(numpy_type).name)
+        self.assertEqual(band.nodata, raster_info['nodata'][band_idx])
+        self.assertEqual(band.units, units)
+
+    def test_set_description(self):
+        """Test set and get a description for a resource."""
+
+        import geometamaker
+
+        description = 'foo bar'
+        resource = geometamaker.models.Resource()
+        resource.set_description(description)
+        self.assertEqual(resource.get_description(), description)
 
     def test_set_citation(self):
-        """MetadataControl: set and get a citation."""
+        """Test set and get a citation for resource."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         citation = 'foo bar'
-        mc = MetadataControl()
-        mc.set_citation(citation)
-        self.assertEqual(mc.get_citation(), citation)
+        resource = geometamaker.models.Resource()
+        resource.set_citation(citation)
+        self.assertEqual(resource.get_citation(), citation)
 
     def test_set_contact(self):
-        """MetadataControl: set and get a contact section."""
+        """Test set and get a contact section for a resource."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         org = 'natcap'
         name = 'nat'
         position = 'boss'
         email = 'abc@def'
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
-        mc.set_contact(
-            organization=org, individualname=name,
-            positionname=position, email=email)
-        contact_dict = mc.get_contact()
-        self.assertEqual(contact_dict['organization'], org)
-        self.assertEqual(contact_dict['individualname'], name)
-        self.assertEqual(contact_dict['positionname'], position)
-        self.assertEqual(contact_dict['email'], email)
-
-    def test_set_contact_from_dict(self):
-        """MetadataControl: set a contact section from a dict."""
-
-        from geometamaker import MetadataControl
-
-        contact_dict = {
-            'organization': 'natcap',
-            'individualname': 'nat',
-            'positionname': 'boss',
-            'email': 'abc@def',
-            'fax': '555-1234',
-            'postalcode': '01234'
-        }
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
-        mc.set_contact(**contact_dict)
-        actual = mc.get_contact()
-        for k, v in contact_dict.items():
-            self.assertEqual(actual[k], v)
+        resource = geometamaker.models.Resource()
+        resource.set_contact(
+            organization=org, individual_name=name,
+            position_name=position, email=email)
+        contact = resource.get_contact()
+        self.assertEqual(contact.organization, org)
+        self.assertEqual(contact.individual_name, name)
+        self.assertEqual(contact.position_name, position)
+        self.assertEqual(contact.email, email)
 
     def test_set_contact_validates(self):
         """MetadataControl: invalid type raises ValidationError."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         postalcode = 55555  # should be a string
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         with self.assertRaises(ValidationError):
             mc.set_contact(postalcode=postalcode)
 
     def test_set_doi(self):
         """MetadataControl: set and get a doi."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         doi = '10.foo/bar'
-        mc = MetadataControl()
+        mc = geometamaker.describe()
         mc.set_doi(doi)
         self.assertEqual(mc.get_doi(), doi)
 
     def test_set_get_edition(self):
         """MetadataControl: set and get dataset edition."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         version = '3.14'
         mc.set_edition(version)
         self.assertEqual(mc.get_edition(), version)
@@ -431,11 +338,11 @@ def test_set_get_edition(self):
     def test_set_edition_validates(self):
         """MetadataControl: test set edition raises ValidationError."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         version = 3.14  # should be a string
         with self.assertRaises(ValidationError):
             mc.set_edition(version)
@@ -443,11 +350,11 @@ def test_set_edition_validates(self):
     def test_set_keywords(self):
         """MetadataControl: set keywords to default section."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_keywords(['foo', 'bar'])
 
         self.assertEqual(
@@ -457,11 +364,11 @@ def test_set_keywords(self):
     def test_set_keywords_to_section(self):
         """MetadataControl: set keywords to named section."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_keywords(['foo', 'bar'], section='first')
         mc.set_keywords(['baz'], section='second')
 
@@ -475,11 +382,11 @@ def test_set_keywords_to_section(self):
     def test_overwrite_keywords(self):
         """MetadataControl: overwrite keywords in existing section."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_keywords(['foo', 'bar'])
         mc.set_keywords(['baz'])
 
@@ -489,21 +396,21 @@ def test_overwrite_keywords(self):
 
     def test_keywords_raises_validation_error(self):
         """MetadataControl: set keywords validates."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         with self.assertRaises(ValidationError):
             mc.set_keywords('foo', 'bar')
 
     def test_set_and_get_license(self):
         """MetadataControl: set purpose of dataset."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         name = 'CC-BY-4.0'
         url = 'https://creativecommons.org/licenses/by/4.0/'
 
@@ -528,11 +435,11 @@ def test_set_and_get_license(self):
     def test_set_license_validates(self):
         """MetadataControl: test set license raises ValidationError."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         name = 4.0  # should be a string
         with self.assertRaises(ValidationError):
             mc.set_license(name=name)
@@ -541,11 +448,11 @@ def test_set_license_validates(self):
 
     def test_set_and_get_lineage(self):
         """MetadataControl: set lineage of dataset."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         statement = 'a lineage statment'
 
         mc.set_lineage(statement)
@@ -554,22 +461,22 @@ def test_set_and_get_lineage(self):
     def test_set_lineage_validates(self):
         """MetadataControl: test set lineage raises ValidationError."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         lineage = ['some statement']  # should be a string
         with self.assertRaises(ValidationError):
             mc.set_lineage(lineage)
 
     def test_set_and_get_purpose(self):
         """MetadataControl: set purpose of dataset."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         purpose = 'foo'
         mc.set_purpose(purpose)
         self.assertEqual(mc.get_purpose(), purpose)
@@ -577,28 +484,28 @@ def test_set_and_get_purpose(self):
     def test_set_url(self):
         """MetadataControl: set and get a url."""
 
-        from geometamaker import MetadataControl
+        import geometamaker
 
         url = 'http://foo/bar'
-        mc = MetadataControl()
+        mc = geometamaker.describe()
         mc.set_url(url)
         self.assertEqual(mc.get_url(), url)
 
     def test_preexisting_mc_raster(self):
         """MetadataControl: test reading and ammending an existing MCF raster."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         title = 'Title'
         keyword = 'foo'
         band_name = 'The Band'
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_title(title)
         mc.set_band_description(1, name=band_name)
         mc.write()
 
-        new_mc = MetadataControl(datasource_path)
+        new_mc = geometamaker.describe(datasource_path)
         new_mc.set_keywords([keyword])
 
         self.assertEqual(new_mc.mcf['metadata']['hierarchylevel'], 'dataset')
@@ -611,12 +518,12 @@ def test_preexisting_mc_raster(self):
 
     def test_preexisting_mc_raster_new_bands(self):
         """MetadataControl: test existing MCF when the raster has new bands."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         band_name = 'The Band'
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path, n_bands=1)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_band_description(1, name=band_name)
         self.assertEqual(mc.get_band_description(1)['type'], 'integer')
         mc.write()
@@ -625,7 +532,7 @@ def test_preexisting_mc_raster_new_bands(self):
         # There's an extra band, and the datatype has changed
         create_raster(numpy.float32, datasource_path, n_bands=2)
 
-        new_mc = MetadataControl(datasource_path)
+        new_mc = geometamaker.describe(datasource_path)
 
         band1 = new_mc.get_band_description(1)
         self.assertEqual(band1['name'], band_name)
@@ -636,7 +543,7 @@ def test_preexisting_mc_raster_new_bands(self):
 
     def test_preexisting_mc_vector(self):
         """MetadataControl: test reading and ammending an existing MCF vector."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         title = 'Title'
         datasource_path = os.path.join(self.workspace_dir, 'vector.geojson')
@@ -645,12 +552,12 @@ def test_preexisting_mc_vector(self):
         field_map = {
             field_name: list(_OGR_TYPES_VALUES_MAP)[0]}
         create_vector(datasource_path, field_map)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_title(title)
         mc.set_field_description(field_name, abstract=description)
         mc.write()
 
-        new_mc = MetadataControl(datasource_path)
+        new_mc = geometamaker.describe(datasource_path)
 
         self.assertEqual(new_mc.mcf['metadata']['hierarchylevel'], 'dataset')
         self.assertEqual(
@@ -660,7 +567,7 @@ def test_preexisting_mc_vector(self):
 
     def test_preexisting_mc_vector_new_fields(self):
         """MetadataControl: test an existing MCF for vector with new fields."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'vector.geojson')
         field1_name = 'foo'
@@ -668,7 +575,7 @@ def test_preexisting_mc_vector_new_fields(self):
         field_map = {
             field1_name: list(_OGR_TYPES_VALUES_MAP)[0]}
         create_vector(datasource_path, field_map)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_field_description(field1_name, abstract=description)
         self.assertEqual(
             mc.get_field_description(field1_name)['type'], 'integer')
@@ -681,7 +588,7 @@ def test_preexisting_mc_vector_new_fields(self):
             field1_name: list(_OGR_TYPES_VALUES_MAP)[2],
             field2_name: list(_OGR_TYPES_VALUES_MAP)[3]}
         create_vector(datasource_path, new_field_map)
-        new_mc = MetadataControl(datasource_path)
+        new_mc = geometamaker.describe(datasource_path)
 
         field1 = new_mc.get_field_description(field1_name)
         self.assertEqual(field1['abstract'], description)
@@ -691,11 +598,11 @@ def test_preexisting_mc_vector_new_fields(self):
 
     def test_invalid_preexisting_mcf(self):
         """MetadataControl: test overwriting an existing invalid MetadataControl."""
-        from geometamaker import MetadataControl
+        import geometamaker
         title = 'Title'
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
         mc.set_title(title)
 
         # delete a required property and ensure invalid MetadataControl
@@ -704,7 +611,7 @@ def test_invalid_preexisting_mcf(self):
             mc.validate()
         mc.write()  # intentionally writing an invalid MetadataControl
 
-        new_mc = MetadataControl(datasource_path)
+        new_mc = geometamaker.describe(datasource_path)
 
         # The new MetadataControl should not have values from the invalid MetadataControl
         self.assertEqual(
@@ -725,11 +632,11 @@ def test_invalid_preexisting_mcf(self):
 
     def test_write_to_local_workspace(self):
         """MetadataControl: test write metadata to a different location."""
-        from geometamaker import MetadataControl
+        import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = MetadataControl(datasource_path)
+        mc = geometamaker.describe(datasource_path)
 
         temp_dir = tempfile.mkdtemp(dir=self.workspace_dir)
         mc.write(workspace=temp_dir)

From 2e0b00eb50ee7ded46aedacfe4f68ba92ecf8364 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Mon, 22 Jul 2024 11:54:23 -0400
Subject: [PATCH 08/15] tests passing

---
 src/geometamaker/geometamaker.py |  67 +++---
 src/geometamaker/models.py       |  16 +-
 tests/test_geometamaker.py       | 358 ++++++++++---------------------
 3 files changed, 169 insertions(+), 272 deletions(-)

diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py
index ae9bb71..dfd3324 100644
--- a/src/geometamaker/geometamaker.py
+++ b/src/geometamaker/geometamaker.py
@@ -56,7 +56,7 @@ def describe_vector(source_dataset_path):
     description['rows'] = layer.GetFeatureCount()
     for fld in layer.schema:
         fields.append(
-            models.FieldSchema(name=fld.name, type=fld.type))
+            models.FieldSchema(name=fld.name, type=fld.GetTypeName()))
     vector = layer = None
     description['schema'] = models.TableSchema(fields=fields)
     description['fields'] = len(fields)
@@ -119,20 +119,6 @@ def describe_table(source_dataset_path):
 }
 
 
-class MetadataControl(object):
-    """Encapsulates the Metadata Control File and methods for populating it.
-
-    A Metadata Control File (MCF) is a YAML file that complies with the
-    MCF specification defined by pygeometa.
-    https://github.com/geopython/pygeometa
-
-    Attributes:
-        datasource (string): path to dataset to which the metadata applies
-        mcf (dict): dict representation of the Metadata Control File
-
-    """
-
-
 def describe(source_dataset_path):
     """Create a metadata resource instance with properties of the dataset.
 
@@ -145,36 +131,63 @@ def describe(source_dataset_path):
             metadata applies
 
     Returns
-        one of TableResource, VectorResource, RasterResource
+        instance of
+            ArchiveResource, TableResource,
+            VectorResource, RasterResource
     """
 
     data_package_path = f'{source_dataset_path}.yml'
 
-    # Despite naming, this does not open a resource that must be closed
+    # Despite naming, this does not open a file that must be closed
     of = fsspec.open(source_dataset_path)
     if not of.fs.exists(source_dataset_path):
         raise FileNotFoundError(f'{source_dataset_path} does not exist')
 
     resource_type = detect_file_type(source_dataset_path)
     description = DESRCIBE_FUNCS[resource_type](source_dataset_path)
-    # this is nice for autodetect of field types, but sometimes
-    # we will know the table schema (invest MODEL_SPEC).
-    # Is there any benefit to passing in the known schema? Maybe not
-    # Can also just overwrite the schema attribute with known data after.
 
     # Load existing metadata file
     try:
         with fsspec.open(data_package_path, 'r') as file:
             yaml_string = file.read()
 
-        # This validates the existing yaml against our dataclasses.
         existing_resource = RESOURCE_MODELS[resource_type](
             **yaml.safe_load(yaml_string))
-        # overwrite properties that are intrinsic to the dataset,
-        # which is everything from `description` other than schema.
-        # Some parts of schema are intrinsic, but others are human-input
-        # so replace the whole thing for now.
-        del description['schema']
+        if 'schema' in description:
+            if isinstance(description['schema'], models.RasterSchema):
+                # If existing band metadata still matches schema of the file
+                # carry over metadata from the existing file because it could
+                # include human-defined properties.
+                new_bands = []
+                for band in description['schema'].bands:
+                    try:
+                        eband = existing_resource.get_band_description(band.index)
+                        # TODO: rewrite this as __eq__ of BandSchema?
+                        if (band.numpy_type, band.gdal_type, band.nodata) == (
+                                eband.numpy_type, eband.gdal_type, eband.nodata):
+                            band = dataclasses.replace(band, **eband.__dict__)
+                    except IndexError:
+                        pass
+                    new_bands.append(band)
+                description['schema'].bands = new_bands
+            if isinstance(description['schema'], models.TableSchema):
+                # If existing field metadata still matches schema of the file
+                # carry over metadata from the existing file because it could
+                # include human-defined properties.
+                new_fields = []
+                for field in description['schema'].fields:
+                    try:
+                        efield = existing_resource.get_field_description(
+                            field.name)
+                        # TODO: rewrite this as __eq__ of FieldSchema?
+                        if field.type == efield.type:
+                            field = dataclasses.replace(field, **efield.__dict__)
+                    except KeyError:
+                        pass
+                    new_fields.append(field)
+                description['schema'].fields = new_fields
+        # overwrite properties that are intrinsic to the dataset
+        # TODO: any other checks that the resources represent the same data?
         resource = dataclasses.replace(
             existing_resource, **description)
 
diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index 42e0ee2..c8b52cc 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -101,6 +101,7 @@ class BandSchema:
     gdal_type: int
     numpy_type: str
     nodata: int | float
+    title: str = ''
     description: str = ''
 
 
@@ -112,6 +113,19 @@ class RasterSchema:
     pixel_size: list
     raster_size: list
 
+    def __post_init__(self):
+        bands = []
+        for band in self.bands:
+            # When loading an existing document
+            # from serialized data we need to init a BandSchema for
+            # each band dict. But it's also okay to init a RasterSchema
+            # with bands as list of BandSchema.
+            if isinstance(band, BandSchema):
+                bands.append(band)
+            else:
+                bands.append(BandSchema(**band))
+        self.bands = bands
+
 
 @dataclass(kw_only=True)
 class Resource:
@@ -367,7 +381,7 @@ def write(self, workspace=None):
             target_path = self.metadata_path
         else:
             target_path = os.path.join(
-                workspace, f'{os.path.basename(self.datasource)}.yml')
+                workspace, os.path.basename(self.metadata_path))
 
         with open(target_path, 'w') as file:
             file.write(yaml.dump(
diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py
index 5670de9..74405a3 100644
--- a/tests/test_geometamaker.py
+++ b/tests/test_geometamaker.py
@@ -301,198 +301,94 @@ def test_set_contact(self):
         self.assertEqual(contact.position_name, position)
         self.assertEqual(contact.email, email)
 
-    def test_set_contact_validates(self):
-        """MetadataControl: invalid type raises ValidationError."""
-
-        import geometamaker
-
-        postalcode = 55555  # should be a string
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        with self.assertRaises(ValidationError):
-            mc.set_contact(postalcode=postalcode)
-
     def test_set_doi(self):
-        """MetadataControl: set and get a doi."""
+        """Test set and get a doi."""
 
         import geometamaker
 
         doi = '10.foo/bar'
-        mc = geometamaker.describe()
-        mc.set_doi(doi)
-        self.assertEqual(mc.get_doi(), doi)
+        resource = geometamaker.models.Resource()
+        resource.set_doi(doi)
+        self.assertEqual(resource.get_doi(), doi)
 
     def test_set_get_edition(self):
-        """MetadataControl: set and get dataset edition."""
+        """Test set and get dataset edition."""
 
         import geometamaker
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
+        resource = geometamaker.models.Resource()
         version = '3.14'
-        mc.set_edition(version)
-        self.assertEqual(mc.get_edition(), version)
-
-    def test_set_edition_validates(self):
-        """MetadataControl: test set edition raises ValidationError."""
-
-        import geometamaker
-
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        version = 3.14  # should be a string
-        with self.assertRaises(ValidationError):
-            mc.set_edition(version)
+        resource.set_edition(version)
+        self.assertEqual(resource.get_edition(), version)
 
     def test_set_keywords(self):
-        """MetadataControl: set keywords to default section."""
+        """Test set and get keywords."""
 
         import geometamaker
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_keywords(['foo', 'bar'])
+        resource = geometamaker.models.Resource()
+        resource.set_keywords(['foo', 'bar'])
 
         self.assertEqual(
-            mc.mcf['identification']['keywords']['default']['keywords'],
+            resource.get_keywords(),
             ['foo', 'bar'])
 
-    def test_set_keywords_to_section(self):
-        """MetadataControl: set keywords to named section."""
-
+    def test_set_and_get_license(self):
+        """Test set and get license for resource."""
         import geometamaker
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_keywords(['foo', 'bar'], section='first')
-        mc.set_keywords(['baz'], section='second')
-
-        self.assertEqual(
-            mc.mcf['identification']['keywords']['first']['keywords'],
-            ['foo', 'bar'])
-        self.assertEqual(
-            mc.mcf['identification']['keywords']['second']['keywords'],
-            ['baz'])
-
-    def test_overwrite_keywords(self):
-        """MetadataControl: overwrite keywords in existing section."""
-
-        import geometamaker
+        resource = geometamaker.models.Resource()
+        title = 'CC-BY-4.0'
+        path = 'https://creativecommons.org/licenses/by/4.0/'
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_keywords(['foo', 'bar'])
-        mc.set_keywords(['baz'])
+        resource.set_license(title=title)
 
         self.assertEqual(
-            mc.mcf['identification']['keywords']['default']['keywords'],
-            ['baz'])
-
-    def test_keywords_raises_validation_error(self):
-        """MetadataControl: set keywords validates."""
-        import geometamaker
-
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        with self.assertRaises(ValidationError):
-            mc.set_keywords('foo', 'bar')
-
-    def test_set_and_get_license(self):
-        """MetadataControl: set purpose of dataset."""
-        import geometamaker
-
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        name = 'CC-BY-4.0'
-        url = 'https://creativecommons.org/licenses/by/4.0/'
+            resource.get_license().__dict__, {'title': title, 'path': ''})
 
-        mc.set_license(name=name)
+        resource.set_license(path=path)
         self.assertEqual(
-            mc.mcf['identification']['accessconstraints'],
-            'license')
-        self.assertEqual(mc.get_license(), {'name': name, 'url': ''})
-
-        mc.set_license(url=url)
-        self.assertEqual(mc.get_license(), {'name': '', 'url': url})
-
-        mc.set_license(name=name, url=url)
-        self.assertEqual(mc.get_license(), {'name': name, 'url': url})
+            resource.get_license().__dict__, {'title': '', 'path': path})
 
-        mc.set_license()
-        self.assertEqual(mc.get_license(), {'name': '', 'url': ''})
+        resource.set_license(title=title, path=path)
         self.assertEqual(
-            mc.mcf['identification']['accessconstraints'],
-            'otherRestrictions')
-
-    def test_set_license_validates(self):
-        """MetadataControl: test set license raises ValidationError."""
-
-        import geometamaker
+            resource.get_license().__dict__, {'title': title, 'path': path})
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        name = 4.0  # should be a string
-        with self.assertRaises(ValidationError):
-            mc.set_license(name=name)
-        with self.assertRaises(ValidationError):
-            mc.set_license(url=name)
+        resource.set_license()
+        self.assertEqual(
+            resource.get_license().__dict__, {'title': '', 'path': ''})
 
     def test_set_and_get_lineage(self):
-        """MetadataControl: set lineage of dataset."""
+        """Test set and get lineage of a resource."""
         import geometamaker
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
+        resource = geometamaker.models.Resource()
         statement = 'a lineage statment'
 
-        mc.set_lineage(statement)
-        self.assertEqual(mc.get_lineage(), statement)
-
-    def test_set_lineage_validates(self):
-        """MetadataControl: test set lineage raises ValidationError."""
-
-        import geometamaker
-
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        lineage = ['some statement']  # should be a string
-        with self.assertRaises(ValidationError):
-            mc.set_lineage(lineage)
+        resource.set_lineage(statement)
+        self.assertEqual(resource.get_lineage(), statement)
 
     def test_set_and_get_purpose(self):
-        """MetadataControl: set purpose of dataset."""
+        """Test set and get purpose of resource."""
         import geometamaker
 
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
+        resource = geometamaker.models.Resource()
         purpose = 'foo'
-        mc.set_purpose(purpose)
-        self.assertEqual(mc.get_purpose(), purpose)
+        resource.set_purpose(purpose)
+        self.assertEqual(resource.get_purpose(), purpose)
 
     def test_set_url(self):
-        """MetadataControl: set and get a url."""
+        """Test set and get a url."""
 
         import geometamaker
 
         url = 'http://foo/bar'
-        mc = geometamaker.describe()
-        mc.set_url(url)
-        self.assertEqual(mc.get_url(), url)
+        resource = geometamaker.models.Resource()
+        resource.set_url(url)
+        self.assertEqual(resource.get_url(), url)
 
-    def test_preexisting_mc_raster(self):
-        """MetadataControl: test reading and ammending an existing MCF raster."""
+    def test_preexisting_metadata_document(self):
+        """Test reading and ammending an existing Metadata document."""
         import geometamaker
 
         title = 'Title'
@@ -500,73 +396,48 @@ def test_preexisting_mc_raster(self):
         band_name = 'The Band'
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_title(title)
-        mc.set_band_description(1, name=band_name)
-        mc.write()
+        resource = geometamaker.describe(datasource_path)
+        resource.set_title(title)
+        resource.set_band_description(1, title=band_name)
+        resource.write()
 
-        new_mc = geometamaker.describe(datasource_path)
-        new_mc.set_keywords([keyword])
+        new_resource = geometamaker.describe(datasource_path)
+        new_resource.set_keywords([keyword])
 
-        self.assertEqual(new_mc.mcf['metadata']['hierarchylevel'], 'dataset')
         self.assertEqual(
-            new_mc.get_title(), title)
+            new_resource.get_title(), title)
         self.assertEqual(
-            new_mc.get_band_description(1)['name'], band_name)
+            new_resource.get_band_description(1).title, band_name)
         self.assertEqual(
-            new_mc.get_keywords()['keywords'], [keyword])
+            new_resource.get_keywords(), [keyword])
 
-    def test_preexisting_mc_raster_new_bands(self):
-        """MetadataControl: test existing MCF when the raster has new bands."""
+    def test_preexisting_doc_new_bands(self):
+        """Test existing metadata document when the raster has new bands."""
         import geometamaker
 
         band_name = 'The Band'
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path, n_bands=1)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_band_description(1, name=band_name)
-        self.assertEqual(mc.get_band_description(1)['type'], 'integer')
-        mc.write()
+        resource = geometamaker.describe(datasource_path)
+        resource.set_band_description(1, title=band_name)
+        self.assertEqual(resource.get_band_description(1).numpy_type, 'int16')
+        resource.write()
 
         # The raster is modified after it's original metadata was written
         # There's an extra band, and the datatype has changed
         create_raster(numpy.float32, datasource_path, n_bands=2)
 
-        new_mc = geometamaker.describe(datasource_path)
-
-        band1 = new_mc.get_band_description(1)
-        self.assertEqual(band1['name'], band_name)
-        self.assertEqual(band1['type'], 'number')
-        band2 = new_mc.get_band_description(2)
-        self.assertEqual(band2['name'], '')
-        self.assertEqual(band2['type'], 'number')
-
-    def test_preexisting_mc_vector(self):
-        """MetadataControl: test reading and ammending an existing MCF vector."""
-        import geometamaker
-
-        title = 'Title'
-        datasource_path = os.path.join(self.workspace_dir, 'vector.geojson')
-        field_name = 'foo'
-        description = 'description'
-        field_map = {
-            field_name: list(_OGR_TYPES_VALUES_MAP)[0]}
-        create_vector(datasource_path, field_map)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_title(title)
-        mc.set_field_description(field_name, abstract=description)
-        mc.write()
-
-        new_mc = geometamaker.describe(datasource_path)
+        new_resource = geometamaker.describe(datasource_path)
 
-        self.assertEqual(new_mc.mcf['metadata']['hierarchylevel'], 'dataset')
-        self.assertEqual(
-            new_mc.get_title(), title)
-        self.assertEqual(
-            new_mc.get_field_description(field_name)['abstract'], description)
+        band1 = new_resource.get_band_description(1)
+        self.assertEqual(band1.title, '')
+        self.assertEqual(band1.numpy_type, 'float32')
+        band2 = new_resource.get_band_description(2)
+        self.assertEqual(band2.title, '')
+        self.assertEqual(band2.numpy_type, 'float32')
 
-    def test_preexisting_mc_vector_new_fields(self):
-        """MetadataControl: test an existing MCF for vector with new fields."""
+    def test_preexisting_doc_new_fields(self):
+        """Test an existing metadata document for vector with new fields."""
         import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'vector.geojson')
@@ -575,11 +446,11 @@ def test_preexisting_mc_vector_new_fields(self):
         field_map = {
             field1_name: list(_OGR_TYPES_VALUES_MAP)[0]}
         create_vector(datasource_path, field_map)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_field_description(field1_name, abstract=description)
+        resource = geometamaker.describe(datasource_path)
+        resource.set_field_description(field1_name, description=description)
         self.assertEqual(
-            mc.get_field_description(field1_name)['type'], 'integer')
-        mc.write()
+            resource.get_field_description(field1_name).type, 'Integer')
+        resource.write()
 
         # Modify the dataset by changing the field type of the
         # existing field. And add a second field.
@@ -588,62 +459,61 @@ def test_preexisting_mc_vector_new_fields(self):
             field1_name: list(_OGR_TYPES_VALUES_MAP)[2],
             field2_name: list(_OGR_TYPES_VALUES_MAP)[3]}
         create_vector(datasource_path, new_field_map)
-        new_mc = geometamaker.describe(datasource_path)
-
-        field1 = new_mc.get_field_description(field1_name)
-        self.assertEqual(field1['abstract'], description)
-        self.assertEqual(field1['type'], 'number')
-        field2 = new_mc.get_field_description(field2_name)
-        self.assertEqual(field2['type'], 'string')
-
-    def test_invalid_preexisting_mcf(self):
-        """MetadataControl: test overwriting an existing invalid MetadataControl."""
-        import geometamaker
-        title = 'Title'
-        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-        create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
-        mc.set_title(title)
-
-        # delete a required property and ensure invalid MetadataControl
-        del mc.mcf['mcf']
-        with self.assertRaises(ValidationError):
-            mc.validate()
-        mc.write()  # intentionally writing an invalid MetadataControl
-
-        new_mc = geometamaker.describe(datasource_path)
-
-        # The new MetadataControl should not have values from the invalid MetadataControl
-        self.assertEqual(
-            new_mc.mcf['identification']['title'], '')
-
-        try:
-            new_mc.validate()
-        except (MCFValidationError, SchemaError) as e:
-            self.fail(
-                'unexpected validation error occurred\n'
-                f'{e}')
-        try:
-            new_mc.write()
-        except Exception as e:
-            self.fail(
-                'unexpected write error occurred\n'
-                f'{e}')
+        new_resource = geometamaker.describe(datasource_path)
+
+        field1 = new_resource.get_field_description(field1_name)
+        # The field type changed, so the description does not carry over
+        self.assertEqual(field1.description, '')
+        self.assertEqual(field1.type, 'Real')
+        field2 = new_resource.get_field_description(field2_name)
+        self.assertEqual(field2.type, 'String')
+
+    # TODO: this is important, still need to design for it.
+    # def test_invalid_preexisting_mcf(self):
+    #     """Test overwriting an existing invalid metadata document."""
+    #     import geometamaker
+    #     title = 'Title'
+    #     datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
+    #     create_raster(numpy.int16, datasource_path)
+    #     mc = geometamaker.describe(datasource_path)
+    #     mc.set_title(title)
+
+    #     # delete a required property and ensure invalid MetadataControl
+    #     del mc.mcf['mcf']
+    #     with self.assertRaises(ValidationError):
+    #         mc.validate()
+    #     mc.write()  # intentionally writing an invalid MetadataControl
+
+    #     new_mc = geometamaker.describe(datasource_path)
+
+    #     # The new MetadataControl should not have values from the invalid MetadataControl
+    #     self.assertEqual(
+    #         new_mc.mcf['identification']['title'], '')
+
+    #     try:
+    #         new_mc.validate()
+    #     except (MCFValidationError, SchemaError) as e:
+    #         self.fail(
+    #             'unexpected validation error occurred\n'
+    #             f'{e}')
+    #     try:
+    #         new_mc.write()
+    #     except Exception as e:
+    #         self.fail(
+    #             'unexpected write error occurred\n'
+    #             f'{e}')
 
     def test_write_to_local_workspace(self):
-        """MetadataControl: test write metadata to a different location."""
+        """Test write metadata to a different location."""
         import geometamaker
 
         datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
         create_raster(numpy.int16, datasource_path)
-        mc = geometamaker.describe(datasource_path)
+        resource = geometamaker.describe(datasource_path)
 
         temp_dir = tempfile.mkdtemp(dir=self.workspace_dir)
-        mc.write(workspace=temp_dir)
+        resource.write(workspace=temp_dir)
 
         self.assertTrue(
             os.path.exists(os.path.join(
                 temp_dir, f'{os.path.basename(datasource_path)}.yml')))
-        self.assertTrue(
-            os.path.exists(os.path.join(
-                temp_dir, f'{os.path.basename(datasource_path)}.xml')))

From 323b792e0bf356743ddaa20ef8f3681dc8f3d118 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Mon, 22 Jul 2024 12:55:02 -0400
Subject: [PATCH 09/15] cleanup in tests

---
 tests/test_geometamaker.py | 56 --------------------------------------
 1 file changed, 56 deletions(-)

diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py
index 74405a3..5cb5019 100644
--- a/tests/test_geometamaker.py
+++ b/tests/test_geometamaker.py
@@ -4,18 +4,14 @@
 import tempfile
 import unittest
 
-from jsonschema.exceptions import SchemaError
-from jsonschema.exceptions import ValidationError
 import numpy
 from osgeo import gdal
 from osgeo import gdal_array
 from osgeo import ogr
 from osgeo import osr
-from pygeometa.core import MCFValidationError
 import pygeoprocessing
 from pygeoprocessing.geoprocessing_core import DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS
 import shapely
-import yaml
 
 REGRESSION_DATA = os.path.join(
     os.path.dirname(__file__), 'data')
@@ -103,23 +99,6 @@ def test_file_does_not_exist(self):
         with self.assertRaises(FileNotFoundError):
             _ = geometamaker.describe('foo.tif')
 
-    # def test_blank_geometamaker.describe(self):
-    #     """MetadataControl: template has expected properties."""
-    #     import geometamaker
-
-    #     target_filepath = os.path.join(self.workspace_dir, 'mcf.yml')
-
-    #     mc = geometamaker.describe()
-    #     mc.validate()
-    #     mc._write_mcf(target_filepath)
-
-    #     with open(target_filepath, 'r') as file:
-    #         actual = yaml.safe_load(file)
-    #     with open(os.path.join(REGRESSION_DATA, 'template.yml'), 'r') as file:
-    #         expected = yaml.safe_load(file)
-
-    #     self.assertEqual(actual, expected)
-
     def test_describe_csv(self):
         """Test setting properties on csv."""
         import geometamaker
@@ -468,41 +447,6 @@ def test_preexisting_doc_new_fields(self):
         field2 = new_resource.get_field_description(field2_name)
         self.assertEqual(field2.type, 'String')
 
-    # TODO: this is important, still need to design for it.
-    # def test_invalid_preexisting_mcf(self):
-    #     """Test overwriting an existing invalid metadata document."""
-    #     import geometamaker
-    #     title = 'Title'
-    #     datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
-    #     create_raster(numpy.int16, datasource_path)
-    #     mc = geometamaker.describe(datasource_path)
-    #     mc.set_title(title)
-
-    #     # delete a required property and ensure invalid MetadataControl
-    #     del mc.mcf['mcf']
-    #     with self.assertRaises(ValidationError):
-    #         mc.validate()
-    #     mc.write()  # intentionally writing an invalid MetadataControl
-
-    #     new_mc = geometamaker.describe(datasource_path)
-
-    #     # The new MetadataControl should not have values from the invalid MetadataControl
-    #     self.assertEqual(
-    #         new_mc.mcf['identification']['title'], '')
-
-    #     try:
-    #         new_mc.validate()
-    #     except (MCFValidationError, SchemaError) as e:
-    #         self.fail(
-    #             'unexpected validation error occurred\n'
-    #             f'{e}')
-    #     try:
-    #         new_mc.write()
-    #     except Exception as e:
-    #         self.fail(
-    #             'unexpected write error occurred\n'
-    #             f'{e}')
-
     def test_write_to_local_workspace(self):
         """Test write metadata to a different location."""
         import geometamaker

From 3b9d36a5ca500099435d05d10bb537db279a923e Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Mon, 22 Jul 2024 13:31:09 -0400
Subject: [PATCH 10/15] cleanup docstrings

---
 src/geometamaker/geometamaker.py | 65 +++++++++++++++++++++------
 src/geometamaker/models.py       | 77 +++++++++++++++++---------------
 2 files changed, 91 insertions(+), 51 deletions(-)

diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py
index dfd3324..22d352c 100644
--- a/src/geometamaker/geometamaker.py
+++ b/src/geometamaker/geometamaker.py
@@ -2,14 +2,11 @@
 import logging
 import os
 import uuid
-from datetime import datetime
 
 import frictionless
 import fsspec
 import numpy
 from osgeo import gdal
-from osgeo import ogr
-from osgeo import osr
 import pygeoprocessing
 import yaml
 
@@ -20,11 +17,17 @@
 
 
 def detect_file_type(filepath):
-    # TODO: zip, or other archives. Can they be represented as a Resource?
-    # or do they need to be a Package?
+    """Detect the type of resource contained in the file.
 
-    # TODO: guard against classifying netCDF, HDF5, etc as GDAL rasters,
-    # we'll want a different data model for multi-dimensional arrays.
+    Args:
+        filepath (str): path to a file to be opened by GDAL or frictionless
+
+    Returns
+        str
+
+    """
+    # TODO: guard against classifying netCDF, HDF5, etc as GDAL rasters.
+    # We'll likely want a different data model for multi-dimensional arrays.
 
     # GDAL considers CSV a vector, so check against frictionless
     # first.
@@ -38,16 +41,35 @@ def detect_file_type(filepath):
         return 'vector'
     if gis_type == pygeoprocessing.RASTER_TYPE:
         return 'raster'
-    raise ValueError()
+    raise ValueError(
+        f'{filepath} does not appear to be one of (archive, table, raster, vector)')
 
 
 def describe_archive(source_dataset_path):
+    """Describe file properties of a compressed file.
+
+    Args:
+        source_dataset_path (str): path to a file.
+
+    Returns:
+        dict
+
+    """
     description = frictionless.describe(
         source_dataset_path, stats=True).to_dict()
     return description
 
 
 def describe_vector(source_dataset_path):
+    """Describe properties of a GDAL vector file.
+
+    Args:
+        source_dataset_path (str): path to a GDAL vector.
+
+    Returns:
+        dict
+
+    """
     description = frictionless.describe(
         source_dataset_path, stats=True).to_dict()
     fields = []
@@ -72,6 +94,15 @@ def describe_vector(source_dataset_path):
 
 
 def describe_raster(source_dataset_path):
+    """Describe properties of a GDAL raster file.
+
+    Args:
+        source_dataset_path (str): path to a GDAL raster.
+
+    Returns:
+        dict
+
+    """
     description = frictionless.describe(
         source_dataset_path, stats=True).to_dict()
 
@@ -98,6 +129,15 @@ def describe_raster(source_dataset_path):
 
 
 def describe_table(source_dataset_path):
+    """Describe properties of a tabular dataset.
+
+    Args:
+        source_dataset_path (str): path to a file representing a table.
+
+    Returns:
+        dict
+
+    """
     description = frictionless.describe(
         source_dataset_path, stats=True).to_dict()
     description['schema'] = models.TableSchema(**description['schema'])
@@ -131,11 +171,10 @@ def describe(source_dataset_path):
             metadata applies
 
     Returns
-        instance of
-            ArchiveResource, TableResource,
-            VectorResource, RasterResource
-    """
+        instance of ArchiveResource, TableResource, VectorResource,
+        or RasterResource
 
+    """
     data_package_path = f'{source_dataset_path}.yml'
 
     # Despite naming, this does not open a file that must be closed
@@ -187,7 +226,6 @@ def describe(source_dataset_path):
                     new_fields.append(field)
                 description['schema'].fields = new_fields
         # overwrite properties that are intrinsic to the dataset
-        # TODO: any other checks that the resources represent the same data?
         resource = dataclasses.replace(
             existing_resource, **description)
 
@@ -196,4 +234,3 @@ def describe(source_dataset_path):
         resource = RESOURCE_MODELS[resource_type](**description)
 
     return resource
-
diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index c8b52cc..93e0584 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -19,6 +19,7 @@ def ignore_aliases(self, data):
 
 @dataclass
 class BoundingBox():
+    """Class for a spatial bounding box."""
 
     xmin: float
     ymin: float
@@ -28,6 +29,7 @@ class BoundingBox():
 
 @dataclass
 class SpatialSchema():
+    """Class for keeping track of spatial info."""
 
     bounding_box: BoundingBox
     crs: str
@@ -58,15 +60,13 @@ class License:
 
 @dataclass
 class FieldSchema:
-    """metadata for a field in a table."""
+    """Metadata for a field in a table."""
 
     # https://datapackage.org/standard/table-schema/
-    name: str = ''
-    title: str = ''
-    type: str = ''
-    format: str = ''
-    example: any = None
+    name: str
+    type: str
     description: str = ''
+    title: str = ''
     units: str = ''
 
 
@@ -101,8 +101,8 @@ class BandSchema:
     gdal_type: int
     numpy_type: str
     nodata: int | float
-    title: str = ''
     description: str = ''
+    title: str = ''
 
 
 @dataclass
@@ -132,36 +132,44 @@ class Resource:
     """Base class for metadata for a resource.
 
     https://datapackage.org/standard/data-resource/
-    This class should be based on Data Package - Resource
+    This class borrows from the Data Package - Resource
     specification. But we have some additional properties
     that are important to us.
-    """
 
-    # TODO: DP includes `sources` as list of source files
-    # with some amount of metadata for each item. For our
-    # use-case, I think a list of filenames is good enough.
+    All attributes are keyword-only so that we can init
+    with default values, allowing the user to get a template
+    with which to complete later.
 
-    path: str = ''
-    type: str = ''
-    scheme: str = ''
+    """
+
+    # These are populated by `frictionless.describe()`
+    bytes: int = 0
     encoding: str = ''
     format: str = ''
-    mediatype: str = ''
-    bytes: int = 0
     hash: str = ''
+    mediatype: str = ''
     name: str = ''
-    title: str = ''
-    description: str = ''
-    keywords: list = dataclasses.field(default_factory=list)
+    path: str = ''
+    scheme: str = ''
+    type: str = ''
+
+    # DataPackage includes `sources` as a list of source files
+    # with some amount of metadata for each item. For our
+    # use-case, I think a list of filenames is good enough.
     sources: list = dataclasses.field(default_factory=list)
-    licenses: list = dataclasses.field(default_factory=list)
+
+    # These are not populated by geometamaker
     citation: str = ''
+    contact: ContactSchema = ContactSchema()
+    description: str = ''
     doi: str = ''
-    url: str = ''
     edition: str = ''
+    keywords: list = dataclasses.field(default_factory=list)
+    licenses: list = dataclasses.field(default_factory=list)
     lineage: str = ''
     purpose: str = ''
-    contact: ContactSchema = ContactSchema()
+    title: str = ''
+    url: str = ''
 
     def __post_init__(self):
         self.metadata_path = f'{self.path}.yml'
@@ -180,7 +188,7 @@ def get_title(self):
         return self.title
 
     def set_description(self, description):
-        """Add an description for the dataset.
+        """Add a description for the dataset.
 
         Args:
             description (str)
@@ -420,7 +428,7 @@ def _get_field(self, name):
                 dict)
 
         Raises:
-            KeyError if no attributes exist in the MCF or if the named
+            KeyError if no attributes exist in the resource or if the named
                 attribute does not exist.
 
         """
@@ -434,15 +442,15 @@ def _get_field(self, name):
             f'{self.schema} has no field named {name}')
 
     def set_field_description(self, name, title=None, description=None,
-                              units=None, type=None, format=None,
-                              example=None):
+                              units=None, type=None):
         """Define metadata for a tabular field.
 
         Args:
             name (str): name and unique identifier of the field
             title (str): title for the field
-            abstract (str): description of the field
+            description (str): description of the field
             units (str): unit of measurement for the field's values
+            type (str): datatype of values in the field
 
         """
         idx, field = self._get_field(name)
@@ -455,10 +463,6 @@ def set_field_description(self, name, title=None, description=None,
             field.units = units
         if type is not None:
             field.type = type
-        if format is not None:
-            field.format = format
-        if example is not None:
-            field.example = example
 
         self.schema.fields[idx] = field
 
@@ -469,7 +473,7 @@ def get_field_description(self, name):
             name (str): name and unique identifier of the field
 
         Returns:
-            dict
+            FieldSchema
         """
         idx, field = self._get_field(name)
         return field
@@ -512,11 +516,9 @@ def set_band_description(self, band_number, title=None,
 
         Args:
             band_number (int): a raster band index, starting at 1
-            name (str): name for the raster band
             title (str): title for the raster band
-            abstract (str): description of the raster band
+            description (str): description of the raster band
             units (str): unit of measurement for the band's pixel values
-            type (str): of the band's values, either 'integer' or 'number'
 
         """
         idx = band_number - 1
@@ -538,6 +540,7 @@ def get_band_description(self, band_number):
             band_number (int): a raster band index, starting at 1
 
         Returns:
-            dict
+            BandSchema
+
         """
         return self.schema.bands[band_number - 1]

From d62f92a017fdbbe444e50a24d31f3184fca5347f Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Mon, 22 Jul 2024 13:40:58 -0400
Subject: [PATCH 11/15] updates to readme, requirements, etc

---
 README.md                        | 46 +++++++++++++++++++-------------
 docs/environment-rtd.yml         |  4 +--
 docs/source/conf.py              |  1 -
 requirements.txt                 |  4 +--
 src/geometamaker/geometamaker.py |  2 --
 5 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 404db44..8384e47 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,10 @@
-A Python library for creating [Metadata Control Files](https://geopython.github.io/pygeometa/reference/mcf/)
+A Python library for creating human and machine-readable metadata for geospatial data.
+
+Supported datatypes include:
+* everything supported by GDAL
+* tabular formats supported by `frictionless`
+* compressed formats supported by `frictionless`
+
 
 See `requirements.txt` for dependencies
 
@@ -7,48 +13,50 @@ See `requirements.txt` for dependencies
 #### Creating & adding metadata to file:
 
 ```python
-from geometamaker import MetadataControl
+import geometamaker
 
 data_path = 'data/watershed_gura.shp'
-mc = MetadataControl(data_path)
+resource = geometamaker.describe(data_path)
 
-mc.set_title('My Dataset')
-mc.set_abstract('all about my dataset')
-mc.set_keywords(['hydrology', 'watersheds'])
+resource.set_title('My Dataset')
+resource.set_description('all about my dataset')
+resource.set_keywords(['hydrology', 'watersheds'])
 
 # For a vector:
-mc.set_field_description(
+resource.set_field_description(
     'field_name',  # the name of an actual field in the vector's table
-    abstract='something about the field',
+    description='something about the field',
     units='mm')
 
 # or for a raster:
-mc.set_band_description(
+data_path = 'data/dem.tif'
+resource = geometamaker.describe(data_path)
+resource.set_band_description(
     1,  # a raster band index, starting at 1
     name='band name',
-    abstract='something about the band',
+    description='something about the band',
     units='mm')
 
 
-mc.validate()
-mc.write()
+resource.write()
 ```
 
 #### Creating metadata for a batch of files:
 ```python
 import os
 
-from geometamaker import MetadataControl
+import geometamaker
 
 data_dir = 'C:/Users/dmf/projects/invest/data/invest-sample-data'
 for path, dirs, files in os.walk(data_dir):
     for file in files:
-        if file.endswith(('.shp', '.gpkg', '.tif')):
-            filepath = os.path.join(path, file)
-            print(filepath)
-            mc = MetadataControl(filepath)
-            mc.validate()
-            mc.write()
+        filepath = os.path.join(path, file)
+        print(filepath)
+        try:
+            resource = geometamaker.describe(filepath)
+        except ValueError as err:
+            print(err)
+        resource.write()
 ```
 
 #### For a complete list of methods:
diff --git a/docs/environment-rtd.yml b/docs/environment-rtd.yml
index 897d210..8e2bb54 100644
--- a/docs/environment-rtd.yml
+++ b/docs/environment-rtd.yml
@@ -9,12 +9,10 @@ channels:
     - conda-forge
 dependencies:
     - python=3.8
+    - frictionless
     - fsspec
     - gdal>=3
-    - jsonschema
     - numpy
-    - pygeometa
     - pygeoprocessing>=2.4.2
-    - shapely
     - pyyaml
     - sphinx_rtd_theme
diff --git a/docs/source/conf.py b/docs/source/conf.py
index b260b05..f75e375 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -3,7 +3,6 @@
 # For the full list of built-in configuration values, see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-import datetime
 import os
 import sys
 import sphinx.ext.apidoc
diff --git a/requirements.txt b/requirements.txt
index a0a431d..a9efdbd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,10 +5,8 @@
 aiohttp
 fsspec
 GDAL
-jsonschema
+frictionless
 numpy
-pygeometa
 pygeoprocessing>=2.4.3
 pyyaml
 requests
-shapely
\ No newline at end of file
diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py
index 22d352c..84559b4 100644
--- a/src/geometamaker/geometamaker.py
+++ b/src/geometamaker/geometamaker.py
@@ -1,7 +1,5 @@
 import dataclasses
 import logging
-import os
-import uuid
 
 import frictionless
 import fsspec

From 2e37827045baf112b34f211e05a38df1c67aace7 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Mon, 22 Jul 2024 15:02:43 -0400
Subject: [PATCH 12/15] python version compatability issues

---
 src/geometamaker/models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index 93e0584..07a666c 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 import dataclasses
 from dataclasses import dataclass
 import logging
@@ -160,7 +161,7 @@ class Resource:
 
     # These are not populated by geometamaker
     citation: str = ''
-    contact: ContactSchema = ContactSchema()
+    contact: ContactSchema = dataclasses.field(default_factory=ContactSchema)
     description: str = ''
     doi: str = ''
     edition: str = ''

From e2418d85ff457c0af4f00618977c83add0085adc Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Tue, 23 Jul 2024 16:34:31 -0400
Subject: [PATCH 13/15] fixing package path for rtd

---
 docs/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index f75e375..f46bf8d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -8,7 +8,7 @@
 import sphinx.ext.apidoc
 from pkg_resources import get_distribution
 
-sys.path.insert(0, os.path.abspath('../../src/geometamaker'))
+sys.path.insert(0, os.path.abspath('../../src'))
 
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

From 80fb55c8198c9e554ba827f69d67a1858586ec29 Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Wed, 24 Jul 2024 08:39:14 -0400
Subject: [PATCH 14/15] added classmethod for loading existing metadata,
 checking compatibility.

---
 src/geometamaker/__init__.py     |  5 ++++
 src/geometamaker/geometamaker.py | 11 ++++-----
 src/geometamaker/models.py       | 39 +++++++++++++++++++++++++++++++-
 tests/test_geometamaker.py       | 28 ++++++++++++++++++++++-
 4 files changed, 74 insertions(+), 9 deletions(-)

diff --git a/src/geometamaker/__init__.py b/src/geometamaker/__init__.py
index af30c26..739417f 100644
--- a/src/geometamaker/__init__.py
+++ b/src/geometamaker/__init__.py
@@ -1 +1,6 @@
+import importlib.metadata
+
 from .geometamaker import describe
+
+
+__version__ = importlib.metadata.version('geometamaker')
diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py
index 84559b4..e419cb8 100644
--- a/src/geometamaker/geometamaker.py
+++ b/src/geometamaker/geometamaker.py
@@ -173,7 +173,7 @@ def describe(source_dataset_path):
         or RasterResource
 
     """
-    data_package_path = f'{source_dataset_path}.yml'
+    metadata_path = f'{source_dataset_path}.yml'
 
     # Despite naming, this does not open a file that must be closed
     of = fsspec.open(source_dataset_path)
@@ -185,11 +185,7 @@ def describe(source_dataset_path):
 
     # Load existing metadata file
     try:
-        with fsspec.open(data_package_path, 'r') as file:
-            yaml_string = file.read()
-
-        existing_resource = RESOURCE_MODELS[resource_type](
-            **yaml.safe_load(yaml_string))
+        existing_resource = RESOURCE_MODELS[resource_type].load(metadata_path)
         if 'schema' in description:
             if isinstance(description['schema'], models.RasterSchema):
                 # If existing band metadata still matches schema of the file
@@ -228,7 +224,8 @@ def describe(source_dataset_path):
             existing_resource, **description)
 
     # Common path: metadata file does not already exist
-    except FileNotFoundError as err:
+    # Or less common, ValueError if it exists but is incompatible
+    except (FileNotFoundError, ValueError) as err:
         resource = RESOURCE_MODELS[resource_type](**description)
 
     return resource
diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py
index 07a666c..a50cc2f 100644
--- a/src/geometamaker/models.py
+++ b/src/geometamaker/models.py
@@ -4,8 +4,11 @@
 import logging
 import os
 
+import fsspec
 import yaml
 
+import geometamaker
+
 
 LOGGER = logging.getLogger(__name__)
 
@@ -128,7 +131,7 @@ def __post_init__(self):
         self.bands = bands
 
 
-@dataclass(kw_only=True)
+@dataclass()
 class Resource:
     """Base class for metadata for a resource.
 
@@ -142,6 +145,8 @@ class Resource:
     with which to complete later.
 
     """
+    # A version string we can use to identify geometamaker compliant documents
+    metadata_version: str = dataclasses.field(init=False)
 
     # These are populated by `frictionless.describe()`
     bytes: int = 0
@@ -174,6 +179,38 @@ class Resource:
 
     def __post_init__(self):
         self.metadata_path = f'{self.path}.yml'
+        self.metadata_version: str = f'geometamaker.{geometamaker.__version__}'
+
+    @classmethod
+    def load(cls, filepath):
+        """Load metadata document from a yaml file.
+
+        Args:
+            filepath (str): path to yaml file
+
+        Returns:
+            instance of the class
+
+        Raises:
+            FileNotFoundError if filepath does not exist
+            ValueError if the metadata is found to be incompatible with
+                geometamaker.
+
+        """
+        with fsspec.open(filepath, 'r') as file:
+            yaml_string = file.read()
+        yaml_dict = yaml.safe_load(yaml_string)
+        if 'metadata_version' not in yaml_dict \
+                or not yaml_dict['metadata_version'].startswith('geometamaker'):
+            message = (f'{filepath} exists but is not compatible with '
+                       f'geometamaker. It will be overwritten if write() is '
+                       f'called for this resource.')
+            LOGGER.warning(message)
+            raise ValueError(message)
+        # delete this property so that geometamaker can initialize it itself
+        # with the current version info.
+        del yaml_dict['metadata_version']
+        return cls(**yaml_dict)
 
     def set_title(self, title):
         """Add a title for the dataset.
diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py
index 5cb5019..19c9804 100644
--- a/tests/test_geometamaker.py
+++ b/tests/test_geometamaker.py
@@ -12,6 +12,7 @@
 import pygeoprocessing
 from pygeoprocessing.geoprocessing_core import DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS
 import shapely
+import yaml
 
 REGRESSION_DATA = os.path.join(
     os.path.dirname(__file__), 'data')
@@ -93,7 +94,7 @@ def tearDown(self):
         shutil.rmtree(self.workspace_dir)
 
     def test_file_does_not_exist(self):
-        """MetadataControl: raises exception if given file does not exist."""
+        """Raises exception if given file does not exist."""
         import geometamaker
 
         with self.assertRaises(FileNotFoundError):
@@ -447,6 +448,31 @@ def test_preexisting_doc_new_fields(self):
         field2 = new_resource.get_field_description(field2_name)
         self.assertEqual(field2.type, 'String')
 
+    def test_preexisting_incompatible_doc(self):
+        """Test when yaml file not created by geometamaker already exists."""
+        import geometamaker
+
+        datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
+        target_path = f'{datasource_path}.yml'
+        with open(target_path, 'w') as file:
+            file.write(yaml.dump({'foo': 'bar'}))
+        create_raster(numpy.int16, datasource_path)
+
+        # Describing a dataset that already has an incompatible yaml
+        # sidecar file should log a warning.
+        with self.assertLogs('geometamaker', level='WARNING') as cm:
+            resource = geometamaker.describe(datasource_path)
+        expected_message = 'exists but is not compatible with'
+        self.assertIn(expected_message, ''.join(cm.output))
+
+        # After writing new doc, check it has expected property
+        resource.write()
+        with open(target_path, 'r') as file:
+            yaml_string = file.read()
+        yaml_dict = yaml.safe_load(yaml_string)
+        self.assertIn('metadata_version', yaml_dict)
+        self.assertIn('geometamaker', yaml_dict['metadata_version'])
+
     def test_write_to_local_workspace(self):
         """Test write metadata to a different location."""
         import geometamaker

From d95f0254db1d4b84da4bda9d267088be0d91c13e Mon Sep 17 00:00:00 2001
From: davemfish <davemfish@gmail.com>
Date: Wed, 24 Jul 2024 09:13:37 -0400
Subject: [PATCH 15/15] bump python version for RTD build

---
 docs/environment-rtd.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/environment-rtd.yml b/docs/environment-rtd.yml
index 8e2bb54..dde4957 100644
--- a/docs/environment-rtd.yml
+++ b/docs/environment-rtd.yml
@@ -8,7 +8,7 @@ name: env-readthedocs
 channels:
     - conda-forge
 dependencies:
-    - python=3.8
+    - python=3.10
     - frictionless
     - fsspec
     - gdal>=3