From 6dbe670d630617b0616609c17dec43b11a2a2b1f Mon Sep 17 00:00:00 2001 From: Patrick Brosi Date: Mon, 25 Nov 2024 16:19:50 +0100 Subject: [PATCH] intermediate commit --- src/pfaedle/osm/source/PBFSource.cpp | 392 +++++++++++++++++++++++++-- src/pfaedle/osm/source/PBFSource.h | 66 +++++ 2 files changed, 439 insertions(+), 19 deletions(-) diff --git a/src/pfaedle/osm/source/PBFSource.cpp b/src/pfaedle/osm/source/PBFSource.cpp index 28f0a62..3024648 100644 --- a/src/pfaedle/osm/source/PBFSource.cpp +++ b/src/pfaedle/osm/source/PBFSource.cpp @@ -2,71 +2,425 @@ // Chair of Algorithms and Data Structures. // Authors: Patrick Brosi +#include +#include + #include "pfaedle/osm/source/PBFSource.h" +#include "protozero/pbf_reader.hpp" #include "util/Misc.h" +#ifndef PFXML_NO_ZLIB +#include +#endif -using pfaedle::osm::source::PBFSource; +using pfaedle::osm::source::OsmSourceAttr; using pfaedle::osm::source::OsmSourceNode; -using pfaedle::osm::source::OsmSourceWay; using pfaedle::osm::source::OsmSourceRelation; using pfaedle::osm::source::OsmSourceRelationMember; -using pfaedle::osm::source::OsmSourceAttr; +using pfaedle::osm::source::OsmSourceWay; +using pfaedle::osm::source::PBFSource; + +static const size_t BUFFER_S = 32 * 1024 * 1024; // _____________________________________________________________________________ PBFSource::PBFSource(const std::string& path) : _path(path) { std::cout << "Init PBF source" << std::endl; + + _file = open(_path.c_str(), O_RDONLY); + if (_file < 0) throw std::runtime_error(std::string("could not open file")); + + _buf = new unsigned char*[2]; + _buf[0] = new unsigned char[BUFFER_S + 1]; + _buf[1] = new unsigned char[BUFFER_S + 1]; + _blockbuf = new unsigned char[BUFFER_S + 1]; + +#ifdef __unix__ + posix_fadvise(_file, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + /// ___ + read(_file, _buf[_which], BUFFER_S); + _c = _buf[_which]; + + getNextBlock(); + + getNextBlock(); + + exit(1); } // _____________________________________________________________________________ -const OsmSourceNode* PBFSource::nextNode() { - return 0; +void PBFSource::getNextBlock() { + // block begins by big-endian 32bit length of BlobHeader + uint32_t blobHeaderLength = ((*(_c + 3)) << 0) | ((*(_c + 2)) << 8) | + ((*(_c + 1)) << 16) | ((*_c) << 24); + std::cout << "blob header length: " << blobHeaderLength << " bytes" + << std::endl; + _c += 4; + + auto header = parseBlobHeader(blobHeaderLength); + + std::cout << "BLOB: <" << header.type << ">, len " << header.datasize + << std::endl; + + if (header.type == "OSMHeader") { + parseOSMHeader(parseBlob(header.datasize)); + return; + } + + if (header.type == "OSMData") { + parseOSMData(parseBlob(header.datasize)); + return; + } } // _____________________________________________________________________________ -void PBFSource::seekNodes() { +util::geo::Box PBFSource::parseHeaderBBox(unsigned char*& c) { + auto len = parseVarUInt(c); + auto start = c; + + int64_t llx, lly, urx, ury; + + while ((size_t)(c - start) < len) { + auto typeId = nextTypeAndId(c); + + if (typeId.second == 1) llx = parseVarInt(c); + if (typeId.second == 4) lly = parseVarInt(c); + if (typeId.second == 2) urx = parseVarInt(c); + if (typeId.second == 3) ury = parseVarInt(c); + } + + return {{llx * 1.0, lly * 1.0}, {urx * 1.0, ury * 1.0}}; } // _____________________________________________________________________________ -void PBFSource::seekWays() { +PBFSource::PrimitiveBlock PBFSource::parseOSMData(const Blob& blob) { + auto c = reinterpret_cast(const_cast(blob.content)); + auto start = c; + + PBFSource::PrimitiveBlock block; + + while ((size_t)(c - start) < blob.datasize) { + auto typeId = nextTypeAndId(c); + + if (typeId.second == 1) { + block.stringTable = parseStringTable(c); + } else if (typeId.second == 2) { + block.primitiveGroups = c; + // TODO: skip + } else if (typeId.second == 17) { + block.granularity = parseVarUInt(c); + } else if (typeId.second == 19) { + block.latOffset = parseVarUInt(c); + } else if (typeId.second == 20) { + block.lonOffset = parseVarUInt(c); + } else if (typeId.second == 18) { + block.dateGranularity = parseVarUInt(c); + } else { + skipType(typeId.first, c); + } + } + + return block; } // _____________________________________________________________________________ -void PBFSource::seekRels() { +std::vector PBFSource::parseStringTable(unsigned char *& c) { + auto start = c; + + size_t len = parseVarUInt(c); + + std::cout << len << std::endl; + + std::vector table; + table.reserve(len / 10); + + while ((size_t)(c - start) < len) { + auto typeId = nextTypeAndId(c); + + if (typeId.second == 1) { + auto str = parseString(c); + table.push_back(str); + } else { + skipType(typeId.first, c); + } + } + + return table; } // _____________________________________________________________________________ -void PBFSource::cont() { +PBFSource::OSMHeader PBFSource::parseOSMHeader(const Blob& blob) { + auto c = reinterpret_cast(const_cast(blob.content)); + auto start = c; + + PBFSource::OSMHeader header; + + while ((size_t)(c - start) < blob.datasize) { + auto typeId = nextTypeAndId(c); + + std::cout << "TYPE " << (int)typeId.first << " " << "ID " << (int)typeId.second << std::endl; + + if (typeId.second == 1) { + auto a = parseHeaderBBox(c); + } else if (typeId.second == 4) { + auto feature = parseString(c); + header.requiredFeatures.push_back(feature); + } else if (typeId.second == 5) { + auto feature = parseString(c); + header.optionalFeatures.push_back(feature); + } else if (typeId.second == 16) { + header.writingProgram = parseString(c); + } else { + skipType(typeId.first, c); + } + } + + return header; } // _____________________________________________________________________________ -const OsmSourceWay* PBFSource::nextWay() { - return 0; +PBFSource::Blob PBFSource::parseBlob(size_t len) { + auto start = _c; + + PBFSource::Blob ret; + + while ((size_t)(_c - start) < len) { + auto typeId = nextTypeAndId(); + + if (typeId.second == 1) { + // raw, no compression + auto typeId = nextTypeAndId(); + ret.datasize = parseVarUInt(); + ret.content = reinterpret_cast(_c); + _c += ret.datasize; + } else if (typeId.second == 4) { + throw std::runtime_error("LZMA compression not supported"); + } else if (typeId.second == 5) { + throw std::runtime_error("BZIP2 compression not supported"); + } else if (typeId.second == 6) { + throw std::runtime_error("LZ4 compression not supported"); + } else if (typeId.second == 7) { + throw std::runtime_error("ZSTD compression not supported"); + } else if (typeId.second == 3) { + // ZLIB compression + if (typeId.first != PBFSource::VarType::S) { + throw std::runtime_error("expected byte array value"); + } + size_t size = parseVarUInt(); + size_t uncompressedSize = BUFFER_S; + int a = uncompress(_blockbuf, &uncompressedSize, _c, size); + + ret.content = reinterpret_cast(_blockbuf); + ret.datasize = uncompressedSize; + + _c += size; + } else if (typeId.second == 2) { + ret.datasize = parseUInt(typeId); + } else { + skipType(typeId.first); + } + } + + return ret; } // _____________________________________________________________________________ -const OsmSourceRelationMember* PBFSource::nextMember() { - return 0; +PBFSource::BlobHeader PBFSource::parseBlobHeader(size_t len) { + auto start = _c; + + BlobHeader ret; + + while ((size_t)(_c - start) < len) { + auto typeId = nextTypeAndId(); + + if (typeId.second == 1) { + ret.type = parseString(); + } else if (typeId.second == 3) { + ret.datasize = parseUInt(typeId); + } else { + skipType(typeId.first); + } + } + + return ret; } // _____________________________________________________________________________ -uint64_t PBFSource::nextMemberNode() { - return 0; +uint32_t PBFSource::parseFixedUInt32() { return parseFixedUInt32(_c); } + +// _____________________________________________________________________________ +uint32_t PBFSource::parseFixedUInt32(unsigned char*& c) { + uint32_t ret = *(reinterpret_cast(c)); + c += 4; + return ret; } // _____________________________________________________________________________ -const OsmSourceRelation* PBFSource::nextRel() { - return 0; +uint64_t PBFSource::parseFixedUInt64() { return parseFixedUInt64(_c); } + +// _____________________________________________________________________________ +uint64_t PBFSource::parseFixedUInt64(unsigned char*& c) { + uint64_t ret = *(reinterpret_cast(c)); + c += 8; + return ret; } // _____________________________________________________________________________ -const OsmSourceAttr PBFSource::nextAttr() { +std::string PBFSource::parseString() { return parseString(_c); } + +// _____________________________________________________________________________ +std::string PBFSource::parseString(unsigned char*& c) { + auto strlen = parseVarUInt(c); + std::string ret = {reinterpret_cast(c), strlen}; + c += strlen; + + return ret; } +// _____________________________________________________________________________ +uint64_t PBFSource::parseUInt(std::pair typeId) { + if (typeId.first == PBFSource::VarType::I) { + return parseFixedUInt32(); + } + + if (typeId.first == PBFSource::VarType::D) { + return parseFixedUInt64(); + } + + if (typeId.first == PBFSource::VarType::V) { + return parseVarUInt(); + } + + throw std::runtime_error(std::string("expected integer value")); +} + +// _____________________________________________________________________________ +int64_t PBFSource::parseVarInt(unsigned char*& c) { + int64_t i = parseVarUInt(c); + return (i << 1) ^ (i >> 31); +} + +// _____________________________________________________________________________ +int64_t PBFSource::parseVarInt() { return parseVarInt(_c); } + +// _____________________________________________________________________________ +uint64_t PBFSource::parseVarUInt(unsigned char*& c) { + int i = 0; + + uint64_t ret = 0; + uint64_t cur; + + do { + cur = *c; + ret |= (uint64_t)((uint8_t)(cur << 1) >> 1) << (i * 7); + i++; + c++; + } while (cur & (1 << 7)); + + return ret; +} + +// _____________________________________________________________________________ +uint64_t PBFSource::parseVarUInt() { return parseVarUInt(_c); } + +// _____________________________________________________________________________ +std::pair PBFSource::nextTypeAndId() { + return nextTypeAndId(_c); +} + +// _____________________________________________________________________________ +std::pair PBFSource::nextTypeAndId( + unsigned char*& c) { + + PBFSource::VarType type = PBFSource::VarType(((unsigned char)((*c) << 5) >> 5)); + + uint64_t id = (uint8_t)((uint8_t)((*c) << 1) >> 4); + + int i = 0; + + if ((*c) & (1 << 7)) { + c++; + uint64_t cur; + + do { + cur = *c; + id |= (uint64_t)((uint8_t)(cur << 1) >> 1) << (4 + i * 7); + i++; + c++; + } while (cur & (1 << 7)); + } else { + c++; + } + + return {type, id}; +} // _____________________________________________________________________________ -util::geo::Box PBFSource::getBounds() { +void PBFSource::skipType(PBFSource::VarType type) { skipType(type, _c); } + +// _____________________________________________________________________________ +void PBFSource::skipType(PBFSource::VarType type, unsigned char*& c) { + if (type == PBFSource::VarType::V) { + parseVarUInt(c); + return; + } + if (type == PBFSource::VarType::D) { + parseFixedUInt64(c); + return; + } + if (type == PBFSource::VarType::S) { + auto a = parseString(c); + return; + } + if (type == PBFSource::VarType::I) { + parseFixedUInt32(c); + return; + } + + throw std::runtime_error("parse error"); } +// _____________________________________________________________________________ +PBFSource::~PBFSource() { + delete[] _buf[0]; + delete[] _buf[1]; + delete[] _buf; + delete[] _blockbuf; +} + +// _____________________________________________________________________________ +const OsmSourceNode* PBFSource::nextNode() { return 0; } + +// _____________________________________________________________________________ +void PBFSource::seekNodes() {} + +// _____________________________________________________________________________ +void PBFSource::seekWays() {} + +// _____________________________________________________________________________ +void PBFSource::seekRels() {} + +// _____________________________________________________________________________ +void PBFSource::cont() {} + +// _____________________________________________________________________________ +const OsmSourceWay* PBFSource::nextWay() { return 0; } + +// _____________________________________________________________________________ +const OsmSourceRelationMember* PBFSource::nextMember() { return 0; } + +// _____________________________________________________________________________ +uint64_t PBFSource::nextMemberNode() { return 0; } + +// _____________________________________________________________________________ +const OsmSourceRelation* PBFSource::nextRel() { return 0; } + +// _____________________________________________________________________________ +const OsmSourceAttr PBFSource::nextAttr() {} + +// _____________________________________________________________________________ +util::geo::Box PBFSource::getBounds() {} + // _____________________________________________________________________________ std::string PBFSource::decode(const char* str) const { return str; // TODO diff --git a/src/pfaedle/osm/source/PBFSource.h b/src/pfaedle/osm/source/PBFSource.h index 457ce44..4de323c 100644 --- a/src/pfaedle/osm/source/PBFSource.h +++ b/src/pfaedle/osm/source/PBFSource.h @@ -6,6 +6,7 @@ #define PFAEDLE_OSM_SOURCE_PBFSOURCE_H_ #include "pfaedle/osm/source/OsmSource.h" +#include "util/geo/Geo.h" namespace pfaedle { @@ -13,8 +14,42 @@ namespace osm { namespace source { class PBFSource : public OsmSource { + enum VarType : uint8_t { + V = 0, + D = 1, + S = 2, + I = 5, + }; + + struct OSMHeader { + util::geo::Box bbox; + std::vector requiredFeatures; + std::vector optionalFeatures; + std::string writingProgram; + }; + + struct PrimitiveBlock { + std::vector stringTable; + std::vector primitiveGroups; + uint32_t granularity = 100; + uint64_t latOffset = 0; + uint64_t lonOffset = 0; + uint32_t dateGranularity = 1000; + }; + + struct BlobHeader { + std::string type; + uint32_t datasize; + }; + + struct Blob { + const char* content; + uint32_t datasize; + }; + public: PBFSource(const std::string& path); + virtual ~PBFSource(); virtual const OsmSourceNode* nextNode(); virtual const OsmSourceAttr nextAttr(); virtual const OsmSourceWay* nextWay(); @@ -33,6 +68,37 @@ class PBFSource : public OsmSource { virtual std::string decode(const std::string& str) const; private: std::string _path; + int _file; + + unsigned char** _buf; + unsigned char* _c; + + unsigned char* _blockbuf; + + uint8_t _which = 0; + + void getNextBlock(); + OSMHeader parseOSMHeader(const Blob& blob); + PrimitiveBlock parseOSMData(const Blob& blob); + BlobHeader parseBlobHeader(size_t len); + std::vector parseStringTable(unsigned char *& c); + Blob parseBlob(size_t len); + std::pair nextTypeAndId(); + std::pair nextTypeAndId(unsigned char *& c); + std::string parseString(); + std::string parseString(unsigned char *& c); + uint32_t parseFixedUInt32(); + uint32_t parseFixedUInt32(unsigned char *& c); + uint64_t parseFixedUInt64(); + uint64_t parseFixedUInt64(unsigned char *& c); + int64_t parseVarInt(); + int64_t parseVarInt(unsigned char *& c); + uint64_t parseVarUInt(); + uint64_t parseVarUInt(unsigned char *& c); + uint64_t parseUInt(std::pair typeId); + void skipType(VarType type, unsigned char *& c); + void skipType(VarType type); + util::geo::Box parseHeaderBBox(unsigned char*& c); }; } // namespace source