From 1794799fb9a28fc426894a9709d52d679a482d43 Mon Sep 17 00:00:00 2001 From: JP Swinski Date: Tue, 28 Jan 2025 20:21:19 +0000 Subject: [PATCH] passing atl03 dataframe self test --- datasets/bathy/docker/oceaneyes/runner.py | 1 - datasets/icesat2/package/Atl03DataFrame.cpp | 29 ++-- datasets/icesat2/package/Atl03DataFrame.h | 10 +- datasets/icesat2/package/Icesat2Fields.h | 8 +- datasets/icesat2/package/icesat2.cpp | 3 + .../icesat2/selftests/atl03_dataframe.lua | 151 ++++++++++++++++++ packages/core/GeoDataFrame.cpp | 45 +++++- packages/core/GeoDataFrame.h | 2 + packages/h5/H5VarSet.h | 1 + 9 files changed, 227 insertions(+), 23 deletions(-) create mode 100644 datasets/icesat2/selftests/atl03_dataframe.lua diff --git a/datasets/bathy/docker/oceaneyes/runner.py b/datasets/bathy/docker/oceaneyes/runner.py index 5b5421ef..08e027b7 100644 --- a/datasets/bathy/docker/oceaneyes/runner.py +++ b/datasets/bathy/docker/oceaneyes/runner.py @@ -426,7 +426,6 @@ def runClassifier(classifier, classifier_func, num_processes=6): gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:7912") # get CMR-compatible bounding polygon - bounding_polygon = settings['bounding_polygon'] cmr_polygon = ' '.join([f'{coord[0]} {coord[1]}' for coord in zip(bounding_polygon["lat"], bounding_polygon["lon"])]) # lat1 lon1 lat2 lon2 ... # get detailed spatial-temporal query information diff --git a/datasets/icesat2/package/Atl03DataFrame.cpp b/datasets/icesat2/package/Atl03DataFrame.cpp index ae97f373..8498d023 100644 --- a/datasets/icesat2/package/Atl03DataFrame.cpp +++ b/datasets/icesat2/package/Atl03DataFrame.cpp @@ -115,13 +115,15 @@ Atl03DataFrame::Atl03DataFrame (lua_State* L, const char* beam_str, Icesat2Field { {"spot", &spot}, {"cycle", &cycle}, - {"spacecraft_orientation", &spacecraft_orientation}, - {"reference_ground_track", &reference_ground_track} + {"region", ®ion}, + {"reference_ground_track", &reference_ground_track}, + {"spacecraft_orientation", &spacecraft_orientation} }), spot(0, META_COLUMN), cycle(0, META_COLUMN), - spacecraft_orientation(Icesat2Fields::SC_TRANSITION, META_COLUMN), + region(0, META_COLUMN), reference_ground_track(0, META_COLUMN), + spacecraft_orientation(Icesat2Fields::SC_TRANSITION, META_COLUMN), active(false), readerPid(NULL), readTimeoutMs(_parms->readTimeout.value * 1000), @@ -141,6 +143,11 @@ Atl03DataFrame::Atl03DataFrame (lua_State* L, const char* beam_str, Icesat2Field signalConfColIndex = static_cast(parms->surfaceType.value); } + /* Set MetaData from Parameters */ + cycle = parms->cycle.value; + region = parms->region.value; + reference_ground_track = parms->rgt.value; + /* Setup Output Queue (for messages) */ if(outq_name) outQ = new Publisher(outq_name); @@ -999,7 +1006,6 @@ void* Atl03DataFrame::subsettingThread (void* parm) /* Set MetaData */ df->spot = Icesat2Fields::getSpotNumber((Icesat2Fields::sc_orient_t)atl03.sc_orient[0], df->beam); df->spacecraft_orientation = atl03.sc_orient[0]; - df->reference_ground_track = parms.rgt.value; /* Perform YAPC Scoring (if requested) */ const YapcScore yapc(df, region, atl03); @@ -1014,11 +1020,8 @@ void* Atl03DataFrame::subsettingThread (void* parm) int32_t background_index = 0; /* Traverse All Photons In Dataset */ - while(df->active && (current_photon < atl03.dist_ph_along.size)) + while(df->active && (++current_photon < atl03.dist_ph_along.size)) { - /* Go to Next Photon */ - current_photon++; - /* Go to Photon's Segment */ current_count++; while((current_segment < region.segment_ph_cnt.size) && @@ -1031,7 +1034,7 @@ void* Atl03DataFrame::subsettingThread (void* parm) /* Check Current Segment */ if(current_segment >= atl03.segment_dist_x.size) { - throw RunTimeException(ERROR, RTE_ERROR, "Photons with no segments are detected is %s/%s (%d %ld %ld)!", df->hdf03->name, df->beam, current_segment, atl03.segment_dist_x.size, region.num_segments); + throw RunTimeException(ERROR, RTE_ERROR, "Photons with no segments are detected in %s/%s (%d %ld %ld) (%d %d)", df->hdf03->name, df->beam, current_segment, atl03.segment_dist_x.size, region.num_segments, current_photon, current_count); } /* Check Region Mask */ @@ -1202,10 +1205,10 @@ void* Atl03DataFrame::subsettingThread (void* parm) df->segment_id.append(atl03.segment_id[current_segment]); /* Add Ancillary Elements */ - atl03.anc_geo_data.addToGDF(df, current_segment); - atl03.anc_corr_data.addToGDF(df, current_segment); - atl03.anc_ph_data.addToGDF(df, current_photon); - atl08.anc_seg_data.addToGDF(df, atl08.anc_seg_indices[current_photon]); + if(atl03.anc_geo_data.length() > 0) atl03.anc_geo_data.addToGDF(df, current_segment); + if(atl03.anc_corr_data.length() > 0) atl03.anc_corr_data.addToGDF(df, current_segment); + if(atl03.anc_ph_data.length() > 0) atl03.anc_ph_data.addToGDF(df, current_photon); + if(atl08.anc_seg_indices) atl08.anc_seg_data.addToGDF(df, atl08.anc_seg_indices[current_photon]); } } catch(const RunTimeException& e) diff --git a/datasets/icesat2/package/Atl03DataFrame.h b/datasets/icesat2/package/Atl03DataFrame.h index 7efe90cb..86581256 100644 --- a/datasets/icesat2/package/Atl03DataFrame.h +++ b/datasets/icesat2/package/Atl03DataFrame.h @@ -29,8 +29,8 @@ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __atl03_reader__ -#define __atl03_reader__ +#ifndef __atl03_dataframe__ +#define __atl03_dataframe__ /****************************************************************************** * INCLUDES @@ -49,6 +49,7 @@ #include "H5Array.h" #include "H5DArray.h" #include "H5VarSet.h" +#include "H5Object.h" #include "Icesat2Fields.h" /****************************************************************************** @@ -92,8 +93,9 @@ class Atl03DataFrame: public GeoDataFrame /* DataFrame MetaData */ FieldElement spot; // 1, 2, 3, 4, 5, 6 FieldElement cycle; - FieldElement spacecraft_orientation; // 0 (backwards), 1 (forward) + FieldElement region; FieldElement reference_ground_track; + FieldElement spacecraft_orientation; // 0 (backwards), 1 (forward) /*-------------------------------------------------------------------- * Methods @@ -256,4 +258,4 @@ class Atl03DataFrame: public GeoDataFrame static void* subsettingThread (void* parm); }; -#endif /* __atl03_reader__ */ +#endif /* __atl03_dataframe__ */ diff --git a/datasets/icesat2/package/Icesat2Fields.h b/datasets/icesat2/package/Icesat2Fields.h index 34030c53..3306a19b 100644 --- a/datasets/icesat2/package/Icesat2Fields.h +++ b/datasets/icesat2/package/Icesat2Fields.h @@ -365,10 +365,10 @@ class Icesat2Fields: public RequestFields FieldElement year; // ATL03 granule observation date - year FieldElement month; // ATL03 granule observation date - month FieldElement day; // ATL03 granule observation date - day - FieldElement rgt; // ATL03 granule reference ground track - FieldElement cycle; // ATL03 granule cycle - FieldElement region; // ATL03 granule region - FieldElement version; // ATL03 granule version + FieldElement rgt {-1}; // ATL03 granule reference ground track + FieldElement cycle {-1}; // ATL03 granule cycle + FieldElement region {-1}; // ATL03 granule region + FieldElement version {-1}; // ATL03 granule version bool stages[NUM_STAGES] = {true, false, false, false}; diff --git a/datasets/icesat2/package/icesat2.cpp b/datasets/icesat2/package/icesat2.cpp index fefeca5e..bd86eaaf 100644 --- a/datasets/icesat2/package/icesat2.cpp +++ b/datasets/icesat2/package/icesat2.cpp @@ -37,6 +37,7 @@ #include "RasterObject.h" #include "Asset.h" #include "Icesat2Fields.h" +#include "Atl03DataFrame.h" #include "Atl03Reader.h" #include "Atl03Viewer.h" #include "Atl03Indexer.h" @@ -75,6 +76,8 @@ int icesat2_open (lua_State *L) {"atl06s", Atl06Reader::luaCreate}, {"atl08", Atl08Dispatch::luaCreate}, {"atl13s", Atl13Reader::luaCreate}, + // dataframes + {"atl03x", Atl03DataFrame::luaCreate}, #ifdef __unittesting__ {"ut_atl06", UT_Atl06Dispatch::luaCreate}, #endif diff --git a/datasets/icesat2/selftests/atl03_dataframe.lua b/datasets/icesat2/selftests/atl03_dataframe.lua new file mode 100644 index 00000000..b0a3a1f8 --- /dev/null +++ b/datasets/icesat2/selftests/atl03_dataframe.lua @@ -0,0 +1,151 @@ +local runner = require("test_executive") +local asset = require("asset") +local json = require("json") +local prettyprint = require("prettyprint") + +-- Setup Logging -- + +local console = require("console") +console.monitor:config(core.LOG, core.INFO) +sys.setlvl(core.LOG, core.INFO) + +-- Setup Credentials -- + +local assets = asset.loaddir() +local asset_name = "icesat2" +local nsidc_s3 = core.getbyname(asset_name) +local name, identity, driver = nsidc_s3:info() +local creds = aws.csget(identity) +if not creds then + local earthdata_url = "https://data.nsidc.earthdatacloud.nasa.gov/s3credentials" + local response, _ = core.get(earthdata_url) + local _, credential = pcall(json.decode, response) + aws.csput(identity, credential) +end + +-- Helper Function -- + +local function check_expected(exp, df, index, t) + for key,value in pairs(exp) do + if index then + runner.check(math.abs(df[key][index] - value) <= t, string.format("%s[%d] => %f", key, index, df[key][index])) + else + runner.check(math.abs(df:meta(key) - value) <= t, string.format("%s => %f", key, df:meta(key))) + end + end +end + +-- Unit Test -- + +runner.unittest("ATL03 DataFrame", function() + + local parms = icesat2.parms({ + cnf = 4, + resource = "ATL03_20200304065203_10470605_006_01.h5" + }) + + local atl03h5 = h5.object(asset_name, parms["resource"]) + local atl03df = icesat2.atl03x("gt1l", parms, atl03h5, nil, core.EVENTQ) + + runner.check(atl03df:waiton(10000), "failed to create dataframe", true) + runner.check(atl03df:inerror() == false, "dataframe encountered error") + + runner.check(atl03df:numrows() == 5912939, string.format("incorrect number of rows: %d", atl03df:numrows())) + runner.check(atl03df:numcols() == 17, string.format("incorrect number of columns: %d", atl03df:numcols())) + + check_expected({ + time_ns = 1583304724130344448, + latitude = 79.993572, + longitude = -40.942408, + x_atc = 11132842.088085, + y_atc = 3271.814941, + height = 2178.863281, + relief = 0.0, + solar_elevation = -11.243111, + background_rate = 33019.825791, + spacecraft_velocity = 7096.781738, + landcover = 255, + snowcover = 255, + atl08_class = 4, + atl03_cnf = 4, + quality_ph = 0, + yapc_score = 0, + segment_id = 555765 + }, atl03df, 100, 0.00001) + + check_expected({ + spot = 6, + cycle = 6, + region = 5, + reference_ground_track = 1047, + spacecraft_orientation = 1 + }, atl03df, nil, 0) + +end) + +-- Unit Test -- + +runner.unittest("ATL03 DataFrame - Ancillary Data", function() + + local parms = icesat2.parms({ + cnf = 4, + resource = "ATL03_20200304065203_10470605_006_01.h5", + atl03_geo_fields = {"knn", "pitch"}, + atl03_corr_fields = {"geoid"}, + atl03_ph_fields = {"ph_id_channel", "ph_id_pulse"}, + atl08_fields = {"h_dif_ref", "rgt", "sigma_atlas_land", "cloud_flag_atm"} + }) + + local atl03h5 = h5.object(asset_name, parms["resource"]) + local atl08h5 = h5.object(asset_name, "ATL08_20200304065203_10470605_006_01.h5") + local atl03df = icesat2.atl03x("gt2r", parms, atl03h5, atl08h5, core.EVENTQ) + + runner.check(atl03df:waiton(30000), "failed to create dataframe", true) + runner.check(atl03df:inerror() == false, "dataframe encountered error") + + runner.check(atl03df:numrows() == 19522774, string.format("incorrect number of rows: %d", atl03df:numrows())) + runner.check(atl03df:numcols() == 26, string.format("incorrect number of columns: %d", atl03df:numcols())) + + check_expected({ + time_ns = 1583304724455644416, + latitude = 80.000077, + longitude = -41.109609, + x_atc = 11132821.369912, + y_atc = -52.097466, + height = 2180.452148, + relief = 0.0, + solar_elevation = -11.265012, + background_rate = 10853.832031, + spacecraft_velocity = 7096.785645, + landcover = 255, + snowcover = 255, + atl08_class = 1, + atl03_cnf = 4, + quality_ph = 0, + yapc_score = 0, + segment_id = 555764, + knn = 14, + pitch = -0.049935, + geoid = 33.014797, + ph_id_channel = 92, + ph_id_pulse = 83, + h_dif_ref = 0.676025, + rgt = 1047, + sigma_atlas_land = 0.130923, + cloud_flag_atm = 0 + }, atl03df, 100, 0.00001) + + check_expected({ + spot = 3, + cycle = 6, + region = 5, + reference_ground_track = 1047, + spacecraft_orientation = 1 + }, atl03df, nil, 0) + +end) + +-- Report Results -- + +runner.report() + diff --git a/packages/core/GeoDataFrame.cpp b/packages/core/GeoDataFrame.cpp index a3b1da07..97d5515b 100644 --- a/packages/core/GeoDataFrame.cpp +++ b/packages/core/GeoDataFrame.cpp @@ -96,6 +96,9 @@ int GeoDataFrame::FrameColumn::luaGetData (lua_State* L) GeoDataFrame::FrameColumn* lua_obj = dynamic_cast(getLuaSelf(L, 1)); const long index = getLuaInteger(L, 2) - 1; // lua indexing starts at 1, convert to c indexing that starts at 0 + // check index + if(index < 0) throw RunTimeException(CRITICAL, RTE_ERROR, "invalid index: %ld", index + 1); + // check the metatable for the key (to support functions) luaL_getmetatable(L, lua_obj->LuaMetaName); lua_pushinteger(L, index); @@ -108,7 +111,7 @@ int GeoDataFrame::FrameColumn::luaGetData (lua_State* L) } catch(const RunTimeException& e) { - mlog(e.level(), "Error exporting %s: %s", OBJECT_TYPE, e.what()); + mlog(e.level(), "Error indexing frame column %s: %s", OBJECT_TYPE, e.what()); lua_pushnil(L); } @@ -625,6 +628,8 @@ GeoDataFrame::GeoDataFrame( lua_State* L, { // set lua functions LuaEngine::setAttrFunc(L, "inerror", luaInError); + LuaEngine::setAttrFunc(L, "numrows", luaNumRows); + LuaEngine::setAttrFunc(L, "numcols", luaNumColumns); LuaEngine::setAttrFunc(L, "export", luaExport); LuaEngine::setAttrFunc(L, "send", luaSend); LuaEngine::setAttrFunc(L, "receive", luaReceive); @@ -1059,6 +1064,44 @@ int GeoDataFrame::luaInError (lua_State* L) return 1; } +/*---------------------------------------------------------------------------- + * luaNumRows - numrows() + *----------------------------------------------------------------------------*/ +int GeoDataFrame::luaNumRows (lua_State* L) +{ + try + { + const GeoDataFrame* lua_obj = dynamic_cast(getLuaSelf(L, 1)); + lua_pushinteger(L, lua_obj->numRows); + } + catch(const RunTimeException& e) + { + mlog(e.level(), "Error determining number of rows in dataframe: %s", e.what()); + lua_pushnil(L); + } + + return 1; +} + +/*---------------------------------------------------------------------------- + * luaNumColumns - numcols() + *----------------------------------------------------------------------------*/ +int GeoDataFrame::luaNumColumns (lua_State* L) +{ + try + { + const GeoDataFrame* lua_obj = dynamic_cast(getLuaSelf(L, 1)); + lua_pushinteger(L, lua_obj->columnFields.length()); + } + catch(const RunTimeException& e) + { + mlog(e.level(), "Error determining number of columns in dataframe: %s", e.what()); + lua_pushnil(L); + } + + return 1; +} + /*---------------------------------------------------------------------------- * luaExport - export() --> lua table *----------------------------------------------------------------------------*/ diff --git a/packages/core/GeoDataFrame.h b/packages/core/GeoDataFrame.h index 8315fbbf..32fc4162 100644 --- a/packages/core/GeoDataFrame.h +++ b/packages/core/GeoDataFrame.h @@ -214,6 +214,8 @@ class GeoDataFrame: public LuaObject, public Field void fromLua (lua_State* L, int index) override; static int luaInError (lua_State* L); + static int luaNumRows (lua_State* L); + static int luaNumColumns (lua_State* L); static int luaExport (lua_State* L); static int luaSend (lua_State* L); static int luaReceive (lua_State* L); diff --git a/packages/h5/H5VarSet.h b/packages/h5/H5VarSet.h index 7af594dc..80dc06c7 100644 --- a/packages/h5/H5VarSet.h +++ b/packages/h5/H5VarSet.h @@ -57,6 +57,7 @@ class H5VarSet *--------------------------------------------------------------------*/ H5VarSet (const FieldList& variable_list, H5Coro::Context* context, const char* group=NULL, long col=0, long startrow=0, long numrows=H5Coro::ALL_ROWS); virtual ~H5VarSet (void) = default; + long length (void) const { return variables.length(); } void joinToGDF (GeoDataFrame* gdf, int timeout_ms, bool throw_exception=true); void addToGDF (GeoDataFrame* gdf, long element) const; static int getDictSize (int list_size) { return list_size * 2 + 1; };