From c2ea5c03102963bbc1aae9347256a14554d64715 Mon Sep 17 00:00:00 2001 From: Jeff Cullis Date: Fri, 19 Feb 2021 12:46:12 -0400 Subject: [PATCH] #56 added a new test odf function specifically for populating the NetCDF globals from a BIO ODF file. --- odf_transform/BioCtdNc.py | 13 -- odf_transform/test/test_bio_odf.py | 260 +++++++++++++++++++++++++++++ 2 files changed, 260 insertions(+), 13 deletions(-) delete mode 100644 odf_transform/BioCtdNc.py create mode 100644 odf_transform/test/test_bio_odf.py diff --git a/odf_transform/BioCtdNc.py b/odf_transform/BioCtdNc.py deleted file mode 100644 index d6e3922e9..000000000 --- a/odf_transform/BioCtdNc.py +++ /dev/null @@ -1,13 +0,0 @@ -sys.path.insert(0, "../../") - -from odf_transform.odfCls import CtdNcFile - -class BioCtdNcFile(CtdNcFile): - - def map_odf_header(self, json_header): - self.title = json_header['ODF_HEADER']['FILE_SPECIFICATION_1'] - self.summary = ' '.join( - 'CTD cast from', json_header['CRUISE_HEADER']['CRUISE_DESCRIPTION'], - 'cruise number', json_header['CRUISE_HEADER']['CRUISE_NUMBER'] - ) - \ No newline at end of file diff --git a/odf_transform/test/test_bio_odf.py b/odf_transform/test/test_bio_odf.py new file mode 100644 index 000000000..2235136c5 --- /dev/null +++ b/odf_transform/test/test_bio_odf.py @@ -0,0 +1,260 @@ +import sys +import os +import numpy as np +import json + +sys.path.insert(0, "../../") + +from odf_transform.odfCls import CtdNcFile, NcVar +from odf_transform.utils.utils import get_geo_code, read_geojson +from ios_data_transform import is_in +from datetime import datetime +from pytz import timezone +import glob + +class BioCtdNcFile(CtdNcFile): + + def __init__(self, odf_json): + super().__init__() + self.institution = odf_json["metadata"]["institute"] + self.project = odf_json["metadata"]["cruise"] + # write full original header, as json dictionary + self.header = json.dumps( + odf_json["metadata"]["header"], ensure_ascii=False, indent=False + ) + # initcreate dimension variable + # use length of first variable to define length of profile + self.nrec = len(odf_json["data"][list(odf_json["data"].keys())[0]]) + + # create unique ID for each profile + profile_id = "{}-{}-{}".format( + odf_json["metadata"]["cruiseNumber"], + odf_json["metadata"]["eventNumber"], + odf_json["metadata"]["eventQualifier"], + ) + print("Profile ID:", profile_id) + self.id = profile_id + + def nc_config(self, **kwargs): + """ write global attributes from config.json """ + self.featureType = "profile" + try: + self.summary = kwargs["summary"] + self.title = kwargs["title"] + self.infoUrl = kwargs["infoUrl"] + self.description = kwargs["description"] + self.keywords = kwargs["keywords"] + self.acknowledgement = kwargs["acknowledgement"] + self.naming_authority = "COARDS" + self.creator_name = kwargs["creator_name"] + self.creator_email = kwargs["creator_email"] + self.creator_url = kwargs["creator_url"] + self.license = kwargs["license"] + + self.keywords_vocabulary = kwargs["keywords_vocabulary"] + self.Conventions = kwargs["Conventions"] + except KeyError as e: + raise Exception( + f"Unable to find following value for {e} in the config file..." + ) + self.cdm_profile_variables = "time" + + @staticmethod + def str_clean(odf_val): + return odf_val.replace('\'', '') + + def odf_to_globals(self, json_header): + """ Map values from the ODF file into the NetCDF globals. """ + self.title = json_header['ODF_HEADER']['FILE_SPECIFICATION_1'] + self.summary = ' '.join([ + 'CTD cast from', self.str_clean(json_header['CRUISE_HEADER']['CRUISE_DESCRIPTION_1']), + 'cruise number', self.str_clean(json_header['CRUISE_HEADER']['CRUISE_NUMBER_1']) + ]) + +def create_ncvar(data, profile_id, **kwargs): + + # add variable profile_id + ncfile_var_list = [] + ncfile_var_list.append( + NcVar( + "str_id", + "filename", + None, + data["metadata"]["filename"].split("/")[-1], + ) + ) + # add administration variables + ncfile_var_list.append(NcVar("str_id", "country", None, "Canada")) + ncfile_var_list.append( + NcVar("str_id", "cruise_id", None, data["metadata"]["cruiseNumber"]) + ) + ncfile_var_list.append( + NcVar("str_id", "scientist", None, data["metadata"]["scientist"]) + ) + ncfile_var_list.append( + NcVar("str_id", "platform", None, data["metadata"]["ship"]) + ) + ncfile_var_list.append( + NcVar( + "str_id", + "instrument_type", + None, + data["metadata"]["type"] + " " + data["metadata"]["model"], + ) + ) + ncfile_var_list.append( + NcVar( + "str_id", + "instrument_serial_number", + None, + data["metadata"]["serialNumber"], + ) + ) + # add locations variables + ncfile_var_list.append( + NcVar("lat", "latitude", "degrees_north", data["metadata"]["latitude"]) + ) + ncfile_var_list.append( + NcVar( + "lon", "longitude", "degrees_east", data["metadata"]["longitude"] + ) + ) + if False: + ncfile_var_list.append( + NcVar( + "str_id", + "geographic_area", + None, + get_geo_code( + [ + float(data["metadata"]["longitude"]), + float(data["metadata"]["latitude"]), + ], + kwargs["polygons_dict"], + ), + ) + ) + + event_id = "{}-{}".format( + data["metadata"]["eventQualifier"], data["metadata"]["eventNumber"] + ) + ncfile_var_list.append(NcVar("str_id", "event_number", None, event_id)) + ncfile_var_list.append(NcVar("profile", "profile", None, profile_id)) + # pramod - someone should check this... + date_obj = datetime.utcfromtimestamp(data["metadata"]["startTime"]) + date_obj = date_obj.astimezone(timezone("UTC")) + ncfile_var_list.append(NcVar("time", "time", None, [date_obj])) + + for i, var in enumerate(data["data"].keys()): + # + # *********** TODO: CREATE A FUNCTION TO CONVERT UNITS FROM DICTIONARY FORMAT TO PLAIN STRING ************ + # *********** TODO: DETERMINE BODC/GF3 CODE FROM THE UNITS AND VARIABLE NAME IN ODF FILE ******************* + # + null_value = np.nan + if is_in(["depth"], var): + ncfile_var_list.append( + NcVar( + vartype="depth", + varname="depth", + varunits="meters", + varval=data["data"][var], + varclslist=ncfile_var_list, + vardim=("z"), + varnull=null_value, + ) + ) + elif is_in(["pressure"], var): + ncfile_var_list.append( + NcVar( + "pressure", + "pressure", + "dbar", + data["data"][var], + ncfile_var_list, + ("z"), + null_value, + ) + ) + elif is_in(["temperature"], var): + ncfile_var_list.append( + NcVar( + "temperature", + "temperature", + "IPTS-68", + data["data"][var], + ncfile_var_list, + ("z"), + null_value, + ) + ) + elif is_in(["salinity"], var): + ncfile_var_list.append( + NcVar( + "salinity", + "salinity", + "PSS-78", + data["data"][var], + ncfile_var_list, + ("z"), + null_value, + ) + ) + else: + pass + # print(var, data['metadata']['units'][var], 'not transferred to netcdf file !') + return ncfile_var_list + +def write_ctd_ncfile(outfile, odf_data, **kwargs): + """ + use data and methods in ctdcls object to write the CTD data into a netcdf file + author: + inputs: + outfile: output file name to be created in netcdf format + odf_data: dict with data from odf file converted to json using oce package + **kwargs: optional arguments + output: + NONE + """ + # print(kwargs.keys()) + ncf = BioCtdNcFile(odf_data) + ncf.nc_config(**kwargs) + ncf.odf_to_globals(odf_data["metadata"]["header"]) + + # now actuallY write the information in CtdNcFile object to a netcdf file + ncf.varlist = create_ncvar(odf_data, ncf.id) + # print(ncfile_var_list[0]) + # print('Writing ncfile:',outfile) + ncf.write_ncfile(outfile) + + +# read json file with information on dataset etc. +with open("./config.json", "r") as fid: + info = json.load(fid) + +# read geojson files +polygons_dict = {} +for fname in info["geojsonFileList"]: + polygons_dict.update(read_geojson(fname)) +info.update({"polygons_dict": polygons_dict}) +# print(polygons_dict) + +# flist = glob.glob("./test_files/*.json") +flist = ['./test_files/CTD_HUD2018004_001_01_DN.ODF.json'] +if not os.path.isdir("./temp/"): + os.mkdir("./temp/") + +for f in flist: + with open(f, "r") as fid: + data = fid.read() + data = json.loads(data) + # parse file + try: + print(f) + write_ctd_ncfile( + outfile="./temp/{}.nc".format(f.split("/")[-1]), + odf_data=data, + **info, + ) + except Exception as e: + print("***** ERROR***", f) + print(e)