Skip to content

Commit 2b4e9d3

Browse files
committedOct 28, 2024
Merge branch 'main' of github.com:SlideRuleEarth/sliderule
2 parents 7a82459 + 0577149 commit 2b4e9d3

22 files changed

+602
-312
lines changed
 

‎clients/python/tests/test_arcticdem.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,9 @@ def test_indexed_raster(self, init):
113113
"samples": {"strips": {"asset": "arcticdem-strips", "with_flags": True}} }
114114
gdf = icesat2.atl06p(parms, resources=['ATL03_20191108234307_06580503_005_01.h5'])
115115
assert init
116-
assert len(gdf.attrs['file_directory']) == 32
117-
for file_id in range(16):
116+
assert len(gdf.attrs['file_directory']) == 16
117+
for file_id in range(0, 16, 2):
118118
assert file_id in gdf.attrs['file_directory'].keys()
119119
assert '/pgc-opendata-dems/arcticdem/strips/' in gdf.attrs['file_directory'][file_id]
120+
assert '_dem.tif' in gdf.attrs['file_directory'][file_id] # only dems, no flags
120121

‎datasets/gebco/package/GebcoBathyRaster.cpp

+5-6
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder)
8686
if (!rastergeo->Intersects(geo)) continue;
8787

8888
rasters_group_t* rgroup = new rasters_group_t;
89-
rgroup->featureId = feature->GetFieldAsString("id");
9089
rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate);
9190

9291
const char* dataFile = feature->GetFieldAsString("data_raster");
@@ -95,7 +94,7 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder)
9594
raster_info_t rinfo;
9695
rinfo.dataIsElevation = true;
9796
rinfo.tag = VALUE_TAG;
98-
rinfo.fileName = filePath + "/" + dataFile;
97+
rinfo.fileId = finder->fileDict.add(filePath + "/" + dataFile);
9998
rgroup->infovect.push_back(rinfo);
10099
}
101100

@@ -106,16 +105,16 @@ bool GebcoBathyRaster::findRasters(raster_finder_t* finder)
106105
{
107106
raster_info_t rinfo;
108107
rinfo.dataIsElevation = false;
109-
rinfo.tag = FLAGS_TAG;
110-
rinfo.fileName = filePath + "/" + flagsFile;
108+
rinfo.tag = FLAGS_TAG;
109+
rinfo.fileId = finder->fileDict.add(filePath + "/" + flagsFile);
111110
rgroup->infovect.push_back(rinfo);
112111
}
113112
}
114113
rgroup->infovect.shrink_to_fit();
115114

116-
mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size());
115+
mlog(DEBUG, "Added group with %ld rasters", rgroup->infovect.size());
117116
for(unsigned j = 0; j < rgroup->infovect.size(); j++)
118-
mlog(DEBUG, " %s", rgroup->infovect[j].fileName.c_str());
117+
mlog(DEBUG, " %s", finder->fileDict.get(rgroup->infovect[j].fileId));
119118

120119
// Add the group
121120
finder->rasterGroups.push_back(rgroup);

‎datasets/landsat/package/LandsatHlsRaster.cpp

+11-10
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder)
168168

169169
/* Set raster group time and group featureId */
170170
rasters_group_t* rgroup = new rasters_group_t;
171-
rgroup->featureId = feature->GetFieldAsString("id");
171+
rgroup->featureId = StringLib::duplicate(feature->GetFieldAsString("id"));
172172
rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate);
173173

174174
/* Find each requested band in the index file */
@@ -188,7 +188,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder)
188188

189189
raster_info_t rinfo;
190190
rinfo.dataIsElevation = false; /* All bands are not elevation */
191-
rinfo.fileName = filePath + fileName.substr(pos);
191+
rinfo.fileId = finder->fileDict.add(filePath + fileName.substr(pos));
192192

193193
if(strcmp(bandName, "Fmask") == 0)
194194
{
@@ -207,7 +207,7 @@ bool LandsatHlsRaster::findRasters(raster_finder_t* finder)
207207
}
208208
}
209209

210-
// mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size());
210+
// mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId, rgroup->infovect.size());
211211
finder->rasterGroups.push_back(rgroup);
212212
}
213213
// mlog(DEBUG, "Found %ld raster groups", finder->rasterGroups.size());
@@ -274,10 +274,11 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
274274
bool isS2 = false;
275275
std::size_t pos;
276276

277-
pos = rgroup->featureId.find("HLS.L30");
277+
const std::string featureId = rgroup->featureId;
278+
pos = featureId.find("HLS.L30");
278279
if(pos != std::string::npos) isL8 = true;
279280

280-
pos = rgroup->featureId.find("HLS.S30");
281+
pos = featureId.find("HLS.S30");
281282
if(pos != std::string::npos) isS2 = true;
282283

283284
if(!isL8 && !isS2)
@@ -295,7 +296,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
295296
{
296297
for(const auto& rinfo : rgroup->infovect)
297298
{
298-
const char* key = rinfo.fileName.c_str();
299+
const char* key = fileDictGet(rinfo.fileId);
299300
cacheitem_t* item;
300301
if(cache.find(key, &item))
301302
{
@@ -407,12 +408,12 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
407408
}
408409

409410
const double groupTime = rgroup->gpsTime / 1000;
410-
const std::string groupName = rgroup->featureId + " {\"algo\": \"";
411+
const std::string groupName = featureId + " {\"algo\": \"";
411412

412413
/* Calculate algos - make sure that all the necessary bands were read */
413414
if(ndsi)
414415
{
415-
RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDSI\"}"));
416+
RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDSI\"}"));
416417
if((green != invalid) && (swir16 != invalid))
417418
sample->value = (green - swir16) / (green + swir16);
418419
else sample->value = invalid;
@@ -421,7 +422,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
421422

422423
if(ndvi)
423424
{
424-
RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDVI\"}"));
425+
RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDVI\"}"));
425426
if((red != invalid) && (nir08 != invalid))
426427
sample->value = (nir08 - red) / (nir08 + red);
427428
else sample->value = invalid;
@@ -430,7 +431,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
430431

431432
if(ndwi)
432433
{
433-
RasterSample* sample = new RasterSample(groupTime, fileDictAdd(groupName + "NDWI\"}"));
434+
RasterSample* sample = new RasterSample(groupTime, fileDict.add(groupName + "NDWI\"}"));
434435
if((nir08 != invalid) && (swir16 != invalid))
435436
sample->value = (nir08 - swir16) / (nir08 + swir16);
436437
else sample->value = invalid;

‎datasets/pgc/package/PgcDemStripsRaster.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ bool PgcDemStripsRaster::findRasters(raster_finder_t* finder)
228228
raster_info_t demRinfo;
229229
demRinfo.dataIsElevation = true;
230230
demRinfo.tag = VALUE_TAG;
231-
demRinfo.fileName = fileName;
231+
demRinfo.fileId = finder->fileDict.add(fileName);
232232

233233
/* bitmask raster, ie flags_file */
234234
if(parms->flags_file)
@@ -242,13 +242,12 @@ bool PgcDemStripsRaster::findRasters(raster_finder_t* finder)
242242
}
243243
else fileName.clear();
244244

245-
raster_info_t flagsRinfo;
246-
flagsRinfo.dataIsElevation = false;
247-
flagsRinfo.tag = FLAGS_TAG;
248-
flagsRinfo.fileName = fileName;
249-
250-
if(!flagsRinfo.fileName.empty())
245+
if(!fileName.empty())
251246
{
247+
raster_info_t flagsRinfo;
248+
flagsRinfo.dataIsElevation = false;
249+
flagsRinfo.tag = FLAGS_TAG;
250+
flagsRinfo.fileId = finder->fileDict.add(fileName);
252251
rgroup->infovect.push_back(flagsRinfo);
253252
}
254253
}

‎datasets/usgs3dep/package/Usgs3dep1meterDemRaster.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ bool Usgs3dep1meterDemRaster::findRasters(raster_finder_t* finder)
112112
if (!rastergeo->Intersects(geo)) continue;
113113

114114
rasters_group_t* rgroup = new rasters_group_t;
115-
rgroup->featureId = feature->GetFieldAsString("id");
116115
rgroup->gpsTime = getGmtDate(feature, DATE_TAG, rgroup->gmtDate);
117116

118117
const char* fname = feature->GetFieldAsString("url");
@@ -124,11 +123,11 @@ bool Usgs3dep1meterDemRaster::findRasters(raster_finder_t* finder)
124123
raster_info_t rinfo;
125124
rinfo.dataIsElevation = true;
126125
rinfo.tag = VALUE_TAG;
127-
rinfo.fileName = filePath + fileName.substr(pos);
126+
rinfo.fileId = finder->fileDict.add(filePath + fileName.substr(pos));
128127
rgroup->infovect.push_back(rinfo);
129128
}
130129

131-
// mlog(DEBUG, "Added group: %s with %ld rasters", rgroup->featureId.c_str(), rgroup->infovect.size());
130+
// mlog(DEBUG, "Added group with %ld rasters", rgroup->infovect.size());
132131
finder->rasterGroups.push_back(rgroup);
133132
}
134133
// mlog(DEBUG, "Found %ld raster groups", finder->rasterGroups.size());

‎packages/arrow/ArrowSampler.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,6 @@ void* ArrowSampler::mainThread(void* parm)
182182

183183
/* Release since not needed anymore */
184184
sampler->samples.clear();
185-
sampler->file_ids.clear();
186185
}
187186

188187
try

‎packages/arrow/ArrowSampler.h

-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ class ArrowSampler: public LuaObject
9191
RasterObject* robj;
9292
ArrowSampler* obj;
9393
List<sample_list_t*> samples;
94-
std::set<uint64_t> file_ids;
9594
std::vector<std::pair<uint64_t, const char*>> filemap;
9695

9796
explicit BatchSampler (const char* _rkey, RasterObject* _robj, ArrowSampler* _obj);

‎packages/arrow/ArrowSamplerImpl.cpp

+5-24
Original file line numberDiff line numberDiff line change
@@ -135,26 +135,13 @@ bool ArrowSamplerImpl::processSamples(ArrowSampler::batch_sampler_t* sampler)
135135
if(status)
136136
{
137137
/* Create raster file map <id, filename> */
138-
Dictionary<uint64_t>::Iterator iterator(sampler->robj->fileDictGet());
139-
for(int i = 0; i < iterator.length; i++)
138+
const std::set<uint64_t> &sampleIds = sampler->robj->fileDictGetSampleIds();
139+
for(std::set<uint64_t>::const_iterator it = sampleIds.begin(); it != sampleIds.end(); it++)
140140
{
141-
const char* name = iterator[i].key;
142-
const uint64_t id = iterator[i].value;
143-
144-
/* For some data sets, dictionary contains quality mask rasters in addition to data rasters.
145-
* Only add rasters with id present in the samples
146-
*/
147-
if(sampler->file_ids.find(id) != sampler->file_ids.end())
148-
{
149-
sampler->filemap.emplace_back(id, name);
150-
}
141+
const uint64_t fileId = *it;
142+
const char* name = sampler->robj->fileDictGet(fileId);
143+
sampler->filemap.emplace_back(fileId, name);
151144
}
152-
153-
/* Sort the map with increasing file id */
154-
std::sort(sampler->filemap.begin(), sampler->filemap.end(),
155-
[](const std::pair<uint64_t, std::string>& a, const std::pair<uint64_t, std::string>& b)
156-
{ return a.first < b.first; });
157-
158145
}
159146
else
160147
{
@@ -534,9 +521,6 @@ bool ArrowSamplerImpl::makeColumnsWithLists(ArrowSampler::batch_sampler_t* sampl
534521
PARQUET_THROW_NOT_OK(stdev_builder->Append(sample->stats.stdev));
535522
PARQUET_THROW_NOT_OK(mad_builder->Append(sample->stats.mad));
536523
}
537-
538-
/* Collect all fileIds used by samples - duplicates are ignored */
539-
sampler->file_ids.insert(sample->fileId);
540524
}
541525
}
542526

@@ -686,9 +670,6 @@ bool ArrowSamplerImpl::makeColumnsWithOneSample(ArrowSampler::batch_sampler_t* s
686670
PARQUET_THROW_NOT_OK(stdev_builder.Append(sample->stats.stdev));
687671
PARQUET_THROW_NOT_OK(mad_builder.Append(sample->stats.mad));
688672
}
689-
690-
/* Collect all fileIds used by samples - duplicates are ignored */
691-
sampler->file_ids.insert(sample->fileId);
692673
}
693674

694675
/* Finish the builders */

‎packages/geo/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ if (GDAL_FOUND AND PROJ_FOUND AND TIFF_FOUND)
3636
${CMAKE_CURRENT_LIST_DIR}/RasterObject.cpp
3737
${CMAKE_CURRENT_LIST_DIR}/RasterSampler.cpp
3838
${CMAKE_CURRENT_LIST_DIR}/RasterSubset.cpp
39+
${CMAKE_CURRENT_LIST_DIR}/RasterFileDictionary.cpp
3940
${CMAKE_CURRENT_LIST_DIR}/GeoFields.cpp
4041
${CMAKE_CURRENT_LIST_DIR}/GeoLib.cpp
4142
${CMAKE_CURRENT_LIST_DIR}/GeoRtree.cpp
@@ -61,6 +62,7 @@ if (GDAL_FOUND AND PROJ_FOUND AND TIFF_FOUND)
6162
${CMAKE_CURRENT_LIST_DIR}/RasterSampler.h
6263
${CMAKE_CURRENT_LIST_DIR}/RasterSample.h
6364
${CMAKE_CURRENT_LIST_DIR}/RasterSubset.h
65+
${CMAKE_CURRENT_LIST_DIR}/RasterFileDictionary.h
6466
${CMAKE_CURRENT_LIST_DIR}/GeoFields.cpp
6567
${CMAKE_CURRENT_LIST_DIR}/GeoLib.h
6668
${CMAKE_CURRENT_LIST_DIR}/GeoRtree.h

0 commit comments

Comments
 (0)