Skip to content

Commit

Permalink
Have MopperBuilder working to basic test standard - now to check its …
Browse files Browse the repository at this point in the history
…doing the right things
  • Loading branch information
marc-white committed Jan 7, 2025
1 parent 624ac29 commit 142acec
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 12 deletions.
3 changes: 3 additions & 0 deletions src/access_nri_intake/data/metadata_schema_experiment.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@
{
"pattern": "^subhr$"
},
{
"pattern": "^subhrPt$"
},
{
"pattern": "^\\d+hr$"
},
Expand Down
3 changes: 3 additions & 0 deletions src/access_nri_intake/data/metadata_schema_file.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@
{
"pattern": "^subhr$"
},
{
"pattern": "^subhrPt$"
},
{
"pattern": "^\\d+hr$"
},
Expand Down
33 changes: 21 additions & 12 deletions src/access_nri_intake/source/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def parse_filename(
patterns: list[str] | None = None,
frequencies: dict = FREQUENCIES,
redaction_fill: str = "X",
) -> tuple[str, str | None, str | None, dict | None]:
) -> tuple[str, str | None, str | None, dict]:
"""
Parse an ACCESS model filename and return a file id and any time information
Expand Down Expand Up @@ -290,7 +290,7 @@ def parse_filename(
# Parse file id
file_id = filename
timestamp = None
exargs = None
exargs = {}
for pattern in patterns:
match = re.match(pattern, file_id)
if match:
Expand Down Expand Up @@ -764,7 +764,7 @@ def parser(cls, fpath, to_select=None):
# TODO work out if more appropriate to override parse_ncfile
# FIXME self --> cls
@classmethod
def parse_ncfile(self, fpath, exargs={}):
def parse_ncfile(cls, file: str, time_dim: str = "time"):
"""
Get Intake-ESM datastore entry info from an ACCESS netcdf file
CMOR has its own base date format, length depends on frequency
Expand All @@ -784,12 +784,19 @@ def parse_ncfile(self, fpath, exargs={}):
Stores extra arguments as frequency, date_range, variable etc, derived from fpattern
"""

file_path = Path(file)

file_id, filename_timestamp, filename_frequency, exargs = cls.parse_filename(
cls._get_relevant_filepath(file_path)
)

time_format = "%Y-%m-%d, %H:%M:%S"
# get format for dates based on dates lenght
# dformat is the longest possible datetime format for cmor
dformat = "%Y%m%d%H%M%S"
date_range = exargs.get("date_range", "")
if date_range == "":
date_range = exargs.get("date_range", None)
if date_range is None:
start_date = "none"
end_date = "none"
else:
Expand All @@ -800,9 +807,11 @@ def parse_ncfile(self, fpath, exargs={}):
te = datetime.strptime(te, cmor_format)
end_date = te.strftime(time_format)

variable = exargs.get("variable", "")
variable = exargs.get("variable", None)
if variable is None:
raise RuntimeError(f"Unable to parse variable name from {file}")
with xr.open_dataset(
fpath,
file,
chunks={},
decode_cf=False,
decode_times=False,
Expand All @@ -813,12 +822,12 @@ def parse_ncfile(self, fpath, exargs={}):
variable_standard_name = attrs.get("standard_name", "unknown")
variable_cell_methods = attrs.get("cell_methods", "unknown")
variable_units = attrs.get("units", "unknown")
tracking_id = ds.attrs.get("tracking_id", "unknown")
# tracking_id = ds.attrs.get("tracking_id", "unknown")

output_nc_info = _NCFileInfo(
filename=Path(fpath).name,
path=fpath,
file_id=tracking_id,
filename=Path(file).name,
path=file,
file_id=file_id,
filename_timestamp=date_range,
frequency=exargs.get("frequency", ""),
start_date=start_date,
Expand All @@ -830,4 +839,4 @@ def parse_ncfile(self, fpath, exargs={}):
variable_cell_methods=[variable_cell_methods],
)

return output_nc_info, exargs
return output_nc_info

0 comments on commit 142acec

Please sign in to comment.