Skip to content

Commit

Permalink
Remove NaN, Inf, and -Inf from netcdf_header before writing into the …
Browse files Browse the repository at this point in the history
…database
  • Loading branch information
jochenklar committed Oct 26, 2023
1 parent 434be29 commit 4712af6
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
4 changes: 2 additions & 2 deletions isimip_publisher/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def insert_datasets():

for file in dataset.files:
database.insert_file(session, settings.VERSION, file.dataset.path, file.uuid, file.name, file.path,
file.size, file.checksum, file.checksum_type, file.netcdf_header, file.specifiers)
file.size, file.checksum, file.checksum_type, file.cleaned_header, file.specifiers)

session.commit()

Expand Down Expand Up @@ -210,7 +210,7 @@ def link_datasets():
target_file_path = str(settings.TARGET_PATH / Path(file.path).relative_to(settings.PATH))
database.insert_file_link(session, settings.VERSION, target_file_path, file.dataset.path,
file.name, file.path, file.size, file.checksum, file.checksum_type,
file.netcdf_header, file.specifiers)
file.cleaned_header, file.specifiers)

session.commit()
database.update_tree(session, settings.PATH, settings.TREE)
Expand Down
6 changes: 5 additions & 1 deletion isimip_publisher/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from isimip_utils.decorators import cached_property
from isimip_utils.netcdf import get_dimensions, get_global_attributes, get_variables, open_dataset_read

from .utils.files import get_size
from .utils.files import get_size, clean_header

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -77,6 +77,10 @@ def netcdf_header(self):
'global_attributes': get_global_attributes(dataset, convert=True)
}

@cached_property
def cleaned_header(self):
return clean_header(self.netcdf_header)

@cached_property
def size(self):
return get_size(self.abspath)
Expand Down
18 changes: 18 additions & 0 deletions isimip_publisher/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,21 @@ def mock_file(mock_path):

def get_size(file_abspath):
return Path(file_abspath).stat().st_size


def clean_header(header):
# remove key/value pairs with NaN, Inf or -Inf from the header recursively,
# since they cannot be stored in a JSONB database field
special_values = {float('NaN'), float('Inf'), float('-Inf')}
cleaned_header = {}
for key, value in header.items():
if isinstance(value, dict):
cleaned_header[key] = clean_header(value)
elif isinstance(value, list):
if not special_values.intersection(value):
cleaned_header[key] = value
else:
if value not in special_values:
cleaned_header[key] = value

return cleaned_header

0 comments on commit 4712af6

Please sign in to comment.