Skip to content

Commit

Permalink
Add new exargs output to parse_filename
Browse files Browse the repository at this point in the history
  • Loading branch information
marc-white committed Jan 7, 2025
1 parent b63ea7a commit 624ac29
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 26 deletions.
11 changes: 8 additions & 3 deletions src/access_nri_intake/source/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def parse_filename(
patterns: list[str] | None = None,
frequencies: dict = FREQUENCIES,
redaction_fill: str = "X",
) -> tuple[str, str | None, str | None]:
) -> tuple[str, str | None, str | None, dict | None]:
"""
Parse an ACCESS model filename and return a file id and any time information
Expand All @@ -273,6 +273,9 @@ def parse_filename(
A string of the redacted time information (e.g. "1990-01") if available, otherwise None
frequency: str | None
The frequency of the file if available in the filename, otherwise None
exargs: dict | None
The dictionary of named groups found in the filename regexp match. Includes the initial
values of any groups that have been redacted to create the `file_id`.
"""
if patterns is None:
patterns = cls.PATTERNS
Expand All @@ -287,6 +290,7 @@ def parse_filename(
# Parse file id
file_id = filename
timestamp = None
exargs = None
for pattern in patterns:
match = re.match(pattern, file_id)
if match:
Expand All @@ -299,13 +303,14 @@ def parse_filename(
+ redaction
+ file_id[match.end(grp) :]
)
exargs = match.groupdict()
break

# Remove non-python characters from file ids
file_id = re.sub(r"[-.]", "_", file_id)
file_id = re.sub(r"_+", "_", file_id).strip("_")

return file_id, timestamp, frequency
return file_id, timestamp, frequency, exargs

@classmethod
def parse_ncfile(cls, file: str, time_dim: str = "time") -> _NCFileInfo:
Expand All @@ -331,7 +336,7 @@ def parse_ncfile(cls, file: str, time_dim: str = "time") -> _NCFileInfo:

file_path = Path(file)

file_id, filename_timestamp, filename_frequency = cls.parse_filename(
file_id, filename_timestamp, filename_frequency, _ = cls.parse_filename(
file_path.stem
)

Expand Down
Loading

0 comments on commit 624ac29

Please sign in to comment.