2 * Copyright (c) 2018, Bertold Van den Bergh (vandenbergh@bertold.org)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the author nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR DISTRIBUTOR BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <unordered_map>
39 const double Inf = std::numeric_limits<float>::infinity();
41 std::unordered_map<std::string, std::string> alpha2ToName;
42 std::unordered_map<std::string, std::string> tzidToAlpha2;
44 void errorFatal(std::string what)
46 std::cerr<<what<<"\n";
50 int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool handleNeg = true)
52 uint64_t value = valueIn * 2;
54 value = -valueIn * 2 + 1;
63 uint8_t byteOut = value & 0x7F;
67 output.push_back(byteOut);
75 int64_t doubleToFixedPoint(double input, double scale, unsigned int precision = 32)
77 double inputScaled = input / scale;
78 return inputScaled * pow(2, precision-1);
83 Point(double lat = 0, double lon = 0)
89 Point operator-(const Point& p)
91 Point result(lat_ - p.lat_, lon_ - p.lon_);
95 std::tuple<int64_t, int64_t> toFixedPoint(unsigned int precision = 32)
97 int64_t latFixedPoint = doubleToFixedPoint(lat_, 90, precision);
98 int64_t lonFixedPoint = doubleToFixedPoint(lon_, 180, precision);
100 return std::make_tuple(latFixedPoint, lonFixedPoint);
103 int encodePointBinary(std::vector<uint8_t>& output, unsigned int precision = 32)
105 int64_t latFixedPoint, lonFixedPoint;
106 std::tie(latFixedPoint, lonFixedPoint) = toFixedPoint(precision);
108 int bytesUsed = encodeVariableLength(output, latFixedPoint);
109 bytesUsed += encodeVariableLength(output, lonFixedPoint);
121 std::vector<Point> points_;
122 unsigned long fileIndex_ = 0;
123 unsigned long metadataId_;
125 void processPoint(const Point& p)
127 if(p.lat_ < boundingMin.lat_) {
128 boundingMin.lat_ = p.lat_;
130 if(p.lon_ < boundingMin.lon_) {
131 boundingMin.lon_ = p.lon_;
133 if(p.lat_ > boundingMax.lat_) {
134 boundingMax.lat_ = p.lat_;
136 if(p.lon_ > boundingMax.lon_) {
137 boundingMax.lon_ = p.lon_;
140 points_.push_back(p);
143 PolygonData(unsigned long id):
144 boundingMin(Inf, Inf),
145 boundingMax(-Inf, -Inf),
150 long encodeBinaryData(std::vector<uint8_t>& output, unsigned int precision = 20)
152 long bytesEncoded = 0;
154 int64_t latFixedPoint = 0, lonFixedPoint = 0;
155 int64_t latFixedPointPrev, lonFixedPointPrev;
156 uint64_t vertices = 0;
158 std::vector<uint8_t> tmp;
160 int64_t diffLatAcc = 0, diffLonAcc = 0, diffLatPrev = 0, diffLonPrev = 0;
162 for(Point& point: points_) {
163 /* The points should first be rounded, and then the integer value is differentiated */
164 latFixedPointPrev = latFixedPoint;
165 lonFixedPointPrev = lonFixedPoint;
166 std::tie(latFixedPoint, lonFixedPoint) = point.toFixedPoint(precision);
168 int64_t diffLat = latFixedPoint - latFixedPointPrev;
169 int64_t diffLon = lonFixedPoint - lonFixedPointPrev;
172 /* First point is always encoded */
174 encodeVariableLength(tmp, latFixedPoint);
175 encodeVariableLength(tmp, lonFixedPoint);
178 /* Ignore points that are not different */
179 if(!diffLon && !diffLat) {
183 if(diffLat != diffLatPrev || diffLon != diffLonPrev) {
184 /* Encode accumulator */
186 encodeVariableLength(tmp, diffLatAcc);
187 encodeVariableLength(tmp, diffLonAcc);
193 diffLatAcc += diffLat;
194 diffLonAcc += diffLon;
197 diffLatPrev = diffLat;
198 diffLonPrev = diffLon;
201 /* Encode final point */
203 encodeVariableLength(tmp, diffLatAcc);
204 encodeVariableLength(tmp, diffLonAcc);
206 encodeVariableLength(output, vertices, false);
207 std::copy(tmp.begin(), tmp.end(), std::back_inserter(output));
213 void encodeStringToBinary(std::vector<uint8_t>& output, std::string& input)
215 encodeVariableLength(output, input.size(), false);
216 for(unsigned int i=0; i<input.size(); i++) {
217 output.push_back(input[i] ^ 0x80);
222 std::unordered_map<std::string, uint64_t> usedStrings_;
225 void encodeBinaryData(std::vector<uint8_t>& output)
227 for(std::string& str: data_) {
228 if(str.length() >= 256) {
229 std::cout << "Metadata string is too long\n";
233 if(!usedStrings_.count(str)) {
234 usedStrings_[str] = output.size();
235 encodeStringToBinary(output, str);
237 encodeVariableLength(output, usedStrings_[str] + 256, false);
242 std::vector<std::string> data_;
244 unsigned long fileIndex_;
248 std::vector<PolygonData*> polygons_;
249 std::vector<MetaData> metadata_;
250 std::vector<std::string> fieldNames_;
253 unsigned int decodeVariableLength(uint8_t* buffer, int64_t* result, bool handleNeg = true)
256 unsigned int i=0, shift = 0;
259 value |= (buffer[i] & 0x7F) << shift;
261 } while(buffer[i++] & 0x80);
266 *result = (value & 1)?-(value/2):(value/2);
271 void readMetaDataTimezone(DBFHandle dataHandle)
273 /* Specify field names */
274 fieldNames_.push_back("TimezoneIdPrefix");
275 fieldNames_.push_back("TimezoneId");
276 fieldNames_.push_back("CountryAlpha2");
277 fieldNames_.push_back("CountryName");
279 /* Parse attribute names */
280 for(int i = 0; i < DBFGetRecordCount(dataHandle); i++) {
281 metadata_[i].data_.resize(4);
282 for(int j = 0; j < DBFGetFieldCount(dataHandle); j++) {
284 int fieldWidth, fieldDecimals;
285 DBFFieldType eType = DBFGetFieldInfo(dataHandle, j, fieldTitle, &fieldWidth, &fieldDecimals);
288 std::string fieldTitleStr(fieldTitle);
290 if( eType == FTString ) {
291 if(fieldTitleStr == "tzid") {
292 std::string data = DBFReadStringAttribute(dataHandle, i, j);
293 size_t pos = data.find('/');
294 if (pos == std::string::npos) {
295 metadata_[i].data_.at(0) = data;
297 metadata_[i].data_.at(0) = data.substr(0, pos) + "/";
298 metadata_[i].data_.at(1) = data.substr(pos + 1, std::string::npos);
300 if(tzidToAlpha2.count(data)) {
301 metadata_[i].data_.at(2) = tzidToAlpha2[data];
302 if(alpha2ToName.count(metadata_[i].data_.at(2))) {
303 metadata_[i].data_.at(3) = alpha2ToName[metadata_[i].data_.at(2)];
305 std::cout<<metadata_[i].data_.at(2)<< " not found in alpha2ToName! ("<<data<<")\n";
308 std::cout<<data<<" not found in zoneToAlpha2!\n";
316 void readMetaDataNaturalEarthCountry(DBFHandle dataHandle)
318 /* Specify field names */
319 fieldNames_.push_back("Alpha2");
320 fieldNames_.push_back("Alpha3");
321 fieldNames_.push_back("Name");
323 /* Parse attribute names */
324 for(int i = 0; i < DBFGetRecordCount(dataHandle); i++) {
325 metadata_[i].data_.resize(3);
326 for(int j = 0; j < DBFGetFieldCount(dataHandle); j++) {
328 int fieldWidth, fieldDecimals;
329 DBFFieldType eType = DBFGetFieldInfo(dataHandle, j, fieldTitle, &fieldWidth, &fieldDecimals);
332 std::string fieldTitleStr(fieldTitle);
334 if( eType == FTString ) {
335 if(fieldTitleStr == "ISO_A2" || fieldTitleStr == "WB_A2") {
336 std::string tmp = DBFReadStringAttribute(dataHandle, i, j);
338 metadata_[i].data_.at(0) = tmp;
340 } else if(fieldTitleStr == "ISO_A3" || fieldTitleStr == "WB_A3" || fieldTitleStr == "BRK_A3") {
341 std::string tmp = DBFReadStringAttribute(dataHandle, i, j);
343 metadata_[i].data_.at(1) = tmp;
345 } else if(fieldTitleStr == "NAME_LONG") {
346 metadata_[i].data_.at(2) = DBFReadStringAttribute(dataHandle, i, j);
354 std::unordered_map<std::string, std::string> parseAlpha2ToName(DBFHandle dataHandle)
356 std::unordered_map<std::string, std::string> result;
358 for(int i = 0; i < DBFGetRecordCount(dataHandle); i++) {
359 std::string alpha2, name;
360 for(int j = 0; j < DBFGetFieldCount(dataHandle); j++) {
362 int fieldWidth, fieldDecimals;
363 DBFFieldType eType = DBFGetFieldInfo(dataHandle, j, fieldTitle, &fieldWidth, &fieldDecimals);
366 std::string fieldTitleStr(fieldTitle);
368 if( eType == FTString ) {
369 if(fieldTitleStr == "ISO_A2" || fieldTitleStr == "WB_A2") {
370 std::string tmp = DBFReadStringAttribute(dataHandle, i, j);
371 if(tmp != "-99" && alpha2 == "") {
374 } else if(fieldTitleStr == "NAME_LONG") {
375 name = DBFReadStringAttribute(dataHandle, i, j);
384 result["GF"]="French Guiana";
385 result["GP"]="Guadeloupe";
386 result["BQ"]="Bonaire";
387 result["MQ"]="Martinique";
388 result["SJ"]="Svalbard and Jan Mayen Islands";
389 result["NO"]="Norway";
390 result["CX"]="Christmas Island";
391 result["CC"]="Cocos Islands";
392 result["YT"]="Mayotte";
393 result["RE"]="RĂ©union";
394 result["TK"]="Tokelau";
399 std::unordered_map<std::string, std::string> parseTimezoneToAlpha2(std::string path)
401 std::unordered_map<std::string, std::string> result;
402 //TODO: Clean solution...
403 #include "zoneToAlpha.h"
408 int main(int argc, char ** argv )
411 std::cout << "Wrong number of parameters\n";
415 tzidToAlpha2 = parseTimezoneToAlpha2("TODO");
417 char tableType = argv[1][0];
418 std::string path = argv[2];
419 std::string outPath = argv[3];
420 unsigned int precision = strtol(argv[4], NULL, 10);
421 std::string notice = argv[5];
423 DBFHandle dataHandle = DBFOpen("naturalearth/ne_10m_admin_0_countries_lakes", "rb" );
424 alpha2ToName = parseAlpha2ToName(dataHandle);
425 DBFClose(dataHandle);
427 dataHandle = DBFOpen(path.c_str(), "rb" );
428 if( dataHandle == NULL ) {
429 errorFatal("Could not open attribute file\n");
432 metadata_.resize(DBFGetRecordCount(dataHandle));
433 std::cout << "Reading "<<metadata_.size()<<" metadata records.\n";
435 if(tableType == 'C') {
436 readMetaDataNaturalEarthCountry(dataHandle);
437 } else if(tableType == 'T') {
438 readMetaDataTimezone(dataHandle);
440 std::cout << "Unknown table type\n";
444 DBFClose(dataHandle);
446 SHPHandle shapeHandle = SHPOpen(path.c_str(), "rb");
447 if( shapeHandle == NULL ) {
448 errorFatal("Could not open shapefile\n");
451 int numEntities, shapeType, totalPolygons = 0;
452 SHPGetInfo(shapeHandle, &numEntities, &shapeType, NULL, NULL);
454 std::cout<<"Opened "<<SHPTypeName( shapeType )<< " file with "<<numEntities<<" entries.\n";
456 for(int i = 0; i < numEntities; i++ ) {
457 SHPObject *shapeObject;
459 shapeObject = SHPReadObject( shapeHandle, i );
461 if(shapeObject->nSHPType != 3 && shapeObject->nSHPType != 5 &&
462 shapeObject->nSHPType != 13 && shapeObject->nSHPType != 15) {
463 std::cout<<"Unsupported shape object ("<< SHPTypeName(shapeObject->nSHPType) <<")\n";
469 PolygonData* polygonData = nullptr;
471 for(int j = 0; j < shapeObject->nVertices; j++ ) {
472 if(j == 0 || j == shapeObject->panPartStart[partIndex]) {
477 polygons_.push_back(polygonData);
479 polygonData = new PolygonData(i);
481 if(partIndex + 1 < shapeObject->nParts) {
486 Point p(shapeObject->padfY[j], shapeObject->padfX[j]);
487 polygonData->processPoint(p);
493 polygons_.push_back(polygonData);
496 SHPDestroyObject(shapeObject);
500 SHPClose(shapeHandle);
502 std::cout<<"Parsed "<<totalPolygons<<" polygons.\n";
504 /* Sort according to bounding box */
505 std::sort(polygons_.begin(), polygons_.end(), [](PolygonData* a, PolygonData* b) {
506 return a->boundingMin.lat_ < b->boundingMin.lat_;
509 /* Encode data section and store pointers */
510 std::vector<uint8_t> outputData;
511 for(PolygonData* polygon: polygons_) {
512 polygon->fileIndex_ = outputData.size();
513 polygon->encodeBinaryData(outputData, precision);
515 std::cout << "Encoded data section into "<<outputData.size()<<" bytes.\n";
517 /* Encode metadata */
518 std::vector<uint8_t> outputMeta;
519 for(MetaData& metadata: metadata_) {
520 metadata.fileIndex_ = outputMeta.size();
521 metadata.encodeBinaryData(outputMeta);
523 std::cout << "Encoded metadata into "<<outputMeta.size()<<" bytes.\n";
525 /* Encode bounding boxes */
526 std::vector<uint8_t> outputBBox;
527 int64_t prevFileIndex = 0;
528 int64_t prevMetaIndex = 0;
529 for(PolygonData* polygon: polygons_) {
530 polygon->boundingMin.encodePointBinary(outputBBox, precision);
531 polygon->boundingMax.encodePointBinary(outputBBox, precision);
533 encodeVariableLength(outputBBox, metadata_.at(polygon->metadataId_).fileIndex_ - prevMetaIndex);
534 prevMetaIndex = metadata_[polygon->metadataId_].fileIndex_;
536 encodeVariableLength(outputBBox, polygon->fileIndex_ - prevFileIndex, false);
537 prevFileIndex = polygon->fileIndex_;
539 std::cout << "Encoded bounding box section into "<<outputBBox.size()<<" bytes.\n";
542 std::vector<uint8_t> outputHeader;
543 outputHeader.push_back('P');
544 outputHeader.push_back('L');
545 outputHeader.push_back('B');
546 outputHeader.push_back(tableType);
547 outputHeader.push_back(0);
548 outputHeader.push_back(precision);
549 outputHeader.push_back(fieldNames_.size());
550 for(unsigned int i=0; i<fieldNames_.size(); i++) {
551 encodeStringToBinary(outputHeader, fieldNames_[i]);
553 encodeStringToBinary(outputHeader, notice);
554 encodeVariableLength(outputHeader, outputBBox.size(), false);
555 encodeVariableLength(outputHeader, outputMeta.size(), false);
556 encodeVariableLength(outputHeader, outputData.size(), false);
557 std::cout << "Encoded header into "<<outputHeader.size()<<" bytes.\n";
559 FILE* outputFile = fopen(outPath.c_str(), "wb");
560 fwrite(outputHeader.data(), 1, outputHeader.size(), outputFile);
561 fwrite(outputBBox.data(), 1, outputBBox.size(), outputFile);
562 fwrite(outputMeta.data(), 1, outputMeta.size(), outputFile);
563 fwrite(outputData.data(), 1, outputData.size(), outputFile);