Revise timezone function
[geeqie.git] / scripts / zonedetect / builder.cpp
diff --git a/scripts/zonedetect/builder.cpp b/scripts/zonedetect/builder.cpp
deleted file mode 100644 (file)
index af6e65e..0000000
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
- * Copyright (c) 2018, Bertold Van den Bergh (vandenbergh@bertold.org)
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name of the author nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR DISTRIBUTOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <shapefil.h>
-#include <iostream>
-#include <limits>
-#include <fstream>
-#include <vector>
-#include <algorithm>
-#include <unordered_map>
-#include <functional>
-#include <math.h>
-#include <tuple>
-
-const double Inf = std::numeric_limits<float>::infinity();
-
-std::unordered_map<std::string, std::string> alpha2ToName;
-std::unordered_map<std::string, std::string> tzidToAlpha2;
-
-void errorFatal(std::string what)
-{
-    std::cerr<<what<<"\n";
-    exit(1);
-}
-
-int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool handleNeg = true)
-{
-    uint64_t value = valueIn * 2;
-    if(valueIn < 0) {
-        value = -valueIn * 2 + 1;
-    }
-
-    if(!handleNeg) {
-        value = valueIn;
-    }
-
-    int bytesUsed = 0;
-    do {
-        uint8_t byteOut = value & 0x7F;
-        if(value >= 128) {
-            byteOut |= 0x80;
-        }
-        output.push_back(byteOut);
-        bytesUsed ++;
-        value >>= 7;
-    } while(value);
-
-    return bytesUsed;
-}
-
-int64_t doubleToFixedPoint(double input, double scale, unsigned int precision = 32)
-{
-    double inputScaled = input / scale;
-    return inputScaled * pow(2, precision-1);
-
-}
-
-struct Point {
-    Point(double lat = 0, double lon = 0)
-    {
-        lat_ = lat;
-        lon_ = lon;
-    }
-
-    Point operator-(const Point& p)
-    {
-        Point result(lat_ - p.lat_, lon_ - p.lon_);
-        return result;
-    }
-
-    std::tuple<int64_t, int64_t> toFixedPoint(unsigned int precision = 32)
-    {
-        int64_t latFixedPoint = doubleToFixedPoint(lat_, 90, precision);
-        int64_t lonFixedPoint = doubleToFixedPoint(lon_, 180, precision);
-
-        return std::make_tuple(latFixedPoint, lonFixedPoint);
-    }
-
-    int encodePointBinary(std::vector<uint8_t>& output, unsigned int precision = 32)
-    {
-        int64_t latFixedPoint, lonFixedPoint;
-        std::tie(latFixedPoint, lonFixedPoint) = toFixedPoint(precision);
-
-        int bytesUsed = encodeVariableLength(output, latFixedPoint);
-        bytesUsed += encodeVariableLength(output, lonFixedPoint);
-
-        return bytesUsed;
-    }
-
-    double lat_;
-    double lon_;
-};
-
-struct PolygonData {
-    Point boundingMin;
-    Point boundingMax;
-    std::vector<Point> points_;
-    unsigned long fileIndex_ = 0;
-    unsigned long metadataId_;
-
-    void processPoint(const Point& p)
-    {
-        if(p.lat_ < boundingMin.lat_) {
-            boundingMin.lat_ = p.lat_;
-        }
-        if(p.lon_ < boundingMin.lon_) {
-            boundingMin.lon_ = p.lon_;
-        }
-        if(p.lat_ > boundingMax.lat_) {
-            boundingMax.lat_ = p.lat_;
-        }
-        if(p.lon_ > boundingMax.lon_) {
-            boundingMax.lon_ = p.lon_;
-        }
-
-        points_.push_back(p);
-    }
-
-    PolygonData(unsigned long id):
-        boundingMin(Inf, Inf),
-        boundingMax(-Inf, -Inf),
-        metadataId_(id)
-    {
-    }
-
-    long encodeBinaryData(std::vector<uint8_t>& output, unsigned int precision = 20)
-    {
-        long bytesEncoded = 0;
-        bool first = true;
-        int64_t latFixedPoint = 0, lonFixedPoint = 0;
-        int64_t latFixedPointPrev, lonFixedPointPrev;
-        uint64_t vertices = 0;
-
-        std::vector<uint8_t> tmp;
-
-        int64_t diffLatAcc = 0, diffLonAcc = 0, diffLatPrev = 0, diffLonPrev = 0;
-
-        for(Point& point: points_) {
-            /* The points should first be rounded, and then the integer value is differentiated */
-            latFixedPointPrev = latFixedPoint;
-            lonFixedPointPrev = lonFixedPoint;
-            std::tie(latFixedPoint, lonFixedPoint) = point.toFixedPoint(precision);
-
-            int64_t diffLat = latFixedPoint - latFixedPointPrev;
-            int64_t diffLon = lonFixedPoint - lonFixedPointPrev;
-
-            if(first) {
-                /* First point is always encoded */
-                vertices++;
-                encodeVariableLength(tmp, latFixedPoint);
-                encodeVariableLength(tmp, lonFixedPoint);
-                first = false;
-            } else {
-                /* Ignore points that are not different */
-                if(!diffLon && !diffLat) {
-                    continue;
-                }
-
-                if(diffLat != diffLatPrev || diffLon != diffLonPrev) {
-                    /* Encode accumulator */
-                    vertices++;
-                    encodeVariableLength(tmp, diffLatAcc);
-                    encodeVariableLength(tmp, diffLonAcc);
-
-                    diffLatAcc = 0;
-                    diffLonAcc = 0;
-                }
-
-                diffLatAcc += diffLat;
-                diffLonAcc += diffLon;
-            }
-
-            diffLatPrev = diffLat;
-            diffLonPrev = diffLon;
-        }
-
-        /* Encode final point */
-        vertices++;
-        encodeVariableLength(tmp, diffLatAcc);
-        encodeVariableLength(tmp, diffLonAcc);
-
-        encodeVariableLength(output, vertices, false);
-        std::copy(tmp.begin(), tmp.end(), std::back_inserter(output));
-
-        return bytesEncoded;
-    }
-};
-
-void encodeStringToBinary(std::vector<uint8_t>& output, std::string& input)
-{
-    encodeVariableLength(output, input.size(), false);
-    for(unsigned int i=0; i<input.size(); i++) {
-        output.push_back(input[i] ^ 0x80);
-    }
-}
-
-
-std::unordered_map<std::string, uint64_t> usedStrings_;
-
-struct MetaData {
-    void encodeBinaryData(std::vector<uint8_t>& output)
-    {
-        for(std::string& str: data_) {
-            if(str.length() >= 256) {
-                std::cout << "Metadata string is too long\n";
-                exit(1);
-            }
-
-            if(!usedStrings_.count(str)) {
-                usedStrings_[str] = output.size();
-                encodeStringToBinary(output, str);
-            } else {
-                encodeVariableLength(output, usedStrings_[str] + 256, false);
-            }
-        }
-    }
-
-    std::vector<std::string> data_;
-
-    unsigned long fileIndex_;
-};
-
-
-std::vector<PolygonData*> polygons_;
-std::vector<MetaData> metadata_;
-std::vector<std::string> fieldNames_;
-
-
-unsigned int decodeVariableLength(uint8_t* buffer, int64_t* result, bool handleNeg = true)
-{
-    int64_t value = 0;
-    unsigned int i=0, shift = 0;
-
-    do {
-        value |= (buffer[i] & 0x7F) << shift;
-        shift += 7;
-    } while(buffer[i++] & 0x80);
-
-    if(!handleNeg) {
-        *result = value;
-    } else {
-        *result = (value & 1)?-(value/2):(value/2);
-    }
-    return i;
-}
-
-void readMetaDataTimezone(DBFHandle dataHandle)
-{
-    /* Specify field names */
-    fieldNames_.push_back("TimezoneIdPrefix");
-    fieldNames_.push_back("TimezoneId");
-    fieldNames_.push_back("CountryAlpha2");
-    fieldNames_.push_back("CountryName");
-
-    /* Parse attribute names */
-    for(int i = 0; i < DBFGetRecordCount(dataHandle); i++) {
-        metadata_[i].data_.resize(4);
-        for(int j = 0; j < DBFGetFieldCount(dataHandle); j++) {
-            char fieldTitle[12];
-            int fieldWidth, fieldDecimals;
-            DBFFieldType eType = DBFGetFieldInfo(dataHandle, j, fieldTitle, &fieldWidth, &fieldDecimals);
-
-            fieldTitle[11] = 0;
-            std::string fieldTitleStr(fieldTitle);
-
-            if( eType == FTString ) {
-                if(fieldTitleStr == "tzid") {
-                    std::string data = DBFReadStringAttribute(dataHandle, i, j);
-                    size_t pos = data.find('/');
-                    if (pos == std::string::npos) {
-                        metadata_[i].data_.at(0) = data;
-                    } else {
-                        metadata_[i].data_.at(0) = data.substr(0, pos) + "/";
-                        metadata_[i].data_.at(1) = data.substr(pos + 1, std::string::npos);
-                    }
-                    if(tzidToAlpha2.count(data)) {
-                        metadata_[i].data_.at(2) = tzidToAlpha2[data];
-                        if(alpha2ToName.count(metadata_[i].data_.at(2))) {
-                            metadata_[i].data_.at(3) = alpha2ToName[metadata_[i].data_.at(2)];
-                        } else {
-                            std::cout<<metadata_[i].data_.at(2)<< " not found in alpha2ToName! ("<<data<<")\n";
-                        }
-                    } else {
-                        std::cout<<data<<" not found in zoneToAlpha2!\n";
-                    }
-                }
-            }
-        }
-    }
-}
-
-void readMetaDataNaturalEarthCountry(DBFHandle dataHandle)
-{
-    /* Specify field names */
-    fieldNames_.push_back("Alpha2");
-    fieldNames_.push_back("Alpha3");
-    fieldNames_.push_back("Name");
-
-    /* Parse attribute names */
-    for(int i = 0; i < DBFGetRecordCount(dataHandle); i++) {
-        metadata_[i].data_.resize(3);
-        for(int j = 0; j < DBFGetFieldCount(dataHandle); j++) {
-            char fieldTitle[12];
-            int fieldWidth, fieldDecimals;
-            DBFFieldType eType = DBFGetFieldInfo(dataHandle, j, fieldTitle, &fieldWidth, &fieldDecimals);
-
-            fieldTitle[11] = 0;
-            std::string fieldTitleStr(fieldTitle);
-
-            if( eType == FTString ) {
-                if(fieldTitleStr == "ISO_A2" || fieldTitleStr == "WB_A2") {
-                    std::string tmp = DBFReadStringAttribute(dataHandle, i, j);
-                    if(tmp != "-99") {
-                        metadata_[i].data_.at(0) = tmp;
-                    }
-                } else if(fieldTitleStr == "ISO_A3" || fieldTitleStr == "WB_A3" || fieldTitleStr == "BRK_A3") {
-                    std::string tmp = DBFReadStringAttribute(dataHandle, i, j);
-                    if(tmp != "-99") {
-                        metadata_[i].data_.at(1) = tmp;
-                    }
-                } else if(fieldTitleStr == "NAME_LONG") {
-                    metadata_[i].data_.at(2) = DBFReadStringAttribute(dataHandle, i, j);
-                }
-            }
-
-        }
-    }
-}
-
-std::unordered_map<std::string, std::string> parseAlpha2ToName(DBFHandle dataHandle)
-{
-    std::unordered_map<std::string, std::string> result;
-
-    for(int i = 0; i < DBFGetRecordCount(dataHandle); i++) {
-        std::string alpha2, name;
-        for(int j = 0; j < DBFGetFieldCount(dataHandle); j++) {
-            char fieldTitle[12];
-            int fieldWidth, fieldDecimals;
-            DBFFieldType eType = DBFGetFieldInfo(dataHandle, j, fieldTitle, &fieldWidth, &fieldDecimals);
-
-            fieldTitle[11] = 0;
-            std::string fieldTitleStr(fieldTitle);
-
-            if( eType == FTString ) {
-                if(fieldTitleStr == "ISO_A2" || fieldTitleStr == "WB_A2") {
-                    std::string tmp = DBFReadStringAttribute(dataHandle, i, j);
-                    if(tmp != "-99" && alpha2 == "") {
-                        alpha2 = tmp;
-                    }
-                } else if(fieldTitleStr == "NAME_LONG") {
-                    name = DBFReadStringAttribute(dataHandle, i, j);
-                }
-            }
-        }
-        if(alpha2 != "") {
-            result[alpha2]=name;
-        }
-    }
-
-    result["GF"]="French Guiana";
-    result["GP"]="Guadeloupe";
-    result["BQ"]="Bonaire";
-    result["MQ"]="Martinique";
-    result["SJ"]="Svalbard and Jan Mayen Islands";
-    result["NO"]="Norway";
-    result["CX"]="Christmas Island";
-    result["CC"]="Cocos Islands";
-    result["YT"]="Mayotte";
-    result["RE"]="RĂ©union";
-    result["TK"]="Tokelau";
-
-    return result;
-}
-
-std::unordered_map<std::string, std::string> parseTimezoneToAlpha2(std::string path)
-{
-    std::unordered_map<std::string, std::string> result;
-    //TODO: Clean solution...
-#include "zoneToAlpha.h"
-
-    return result;
-}
-
-int main(int argc, char ** argv )
-{
-    if(argc != 6) {
-        std::cout << "Wrong number of parameters\n";
-        return 1;
-    }
-
-    tzidToAlpha2 = parseTimezoneToAlpha2("TODO");
-
-    char tableType = argv[1][0];
-    std::string path = argv[2];
-    std::string outPath = argv[3];
-    unsigned int precision = strtol(argv[4], NULL, 10);
-    std::string notice = argv[5];
-
-    DBFHandle dataHandle = DBFOpen("naturalearth/ne_10m_admin_0_countries_lakes", "rb" );
-    alpha2ToName = parseAlpha2ToName(dataHandle);
-    DBFClose(dataHandle);
-
-    dataHandle = DBFOpen(path.c_str(), "rb" );
-    if( dataHandle == NULL ) {
-        errorFatal("Could not open attribute file\n");
-    }
-
-    metadata_.resize(DBFGetRecordCount(dataHandle));
-    std::cout << "Reading "<<metadata_.size()<<" metadata records.\n";
-
-    if(tableType == 'C') {
-        readMetaDataNaturalEarthCountry(dataHandle);
-    } else if(tableType == 'T') {
-        readMetaDataTimezone(dataHandle);
-    } else {
-        std::cout << "Unknown table type\n";
-        return 1;
-    }
-
-    DBFClose(dataHandle);
-
-    SHPHandle shapeHandle = SHPOpen(path.c_str(), "rb");
-    if( shapeHandle == NULL ) {
-        errorFatal("Could not open shapefile\n");
-    }
-
-    int numEntities, shapeType, totalPolygons = 0;
-    SHPGetInfo(shapeHandle, &numEntities, &shapeType, NULL, NULL);
-
-    std::cout<<"Opened "<<SHPTypeName( shapeType )<< " file with "<<numEntities<<" entries.\n";
-
-    for(int i = 0; i < numEntities; i++ ) {
-        SHPObject *shapeObject;
-
-        shapeObject = SHPReadObject( shapeHandle, i );
-        if(shapeObject) {
-            if(shapeObject->nSHPType != 3 && shapeObject->nSHPType != 5 &&
-                    shapeObject->nSHPType != 13 && shapeObject->nSHPType != 15) {
-                std::cout<<"Unsupported shape object ("<< SHPTypeName(shapeObject->nSHPType) <<")\n";
-                continue;
-            }
-
-            int partIndex = 0;
-
-            PolygonData* polygonData = nullptr;
-
-            for(int j = 0; j < shapeObject->nVertices; j++ ) {
-                if(j == 0 || j == shapeObject->panPartStart[partIndex]) {
-                    totalPolygons++;
-
-                    if(polygonData) {
-                        /* Commit it */
-                        polygons_.push_back(polygonData);
-                    }
-                    polygonData =  new PolygonData(i);
-
-                    if(partIndex + 1 < shapeObject->nParts) {
-                        partIndex++;
-                    }
-                }
-
-                Point p(shapeObject->padfY[j], shapeObject->padfX[j]);
-                polygonData->processPoint(p);
-
-            }
-
-            if(polygonData) {
-                /* Commit it */
-                polygons_.push_back(polygonData);
-            }
-
-            SHPDestroyObject(shapeObject);
-        }
-    }
-
-    SHPClose(shapeHandle);
-
-    std::cout<<"Parsed "<<totalPolygons<<" polygons.\n";
-
-    /* Sort according to bounding box */
-    std::sort(polygons_.begin(), polygons_.end(), [](PolygonData* a, PolygonData* b) {
-        return a->boundingMin.lat_ < b->boundingMin.lat_;
-    });
-
-    /* Encode data section and store pointers */
-    std::vector<uint8_t> outputData;
-    for(PolygonData* polygon: polygons_) {
-        polygon->fileIndex_ = outputData.size();
-        polygon->encodeBinaryData(outputData, precision);
-    }
-    std::cout << "Encoded data section into "<<outputData.size()<<" bytes.\n";
-
-    /* Encode metadata */
-    std::vector<uint8_t> outputMeta;
-    for(MetaData& metadata: metadata_) {
-        metadata.fileIndex_ = outputMeta.size();
-        metadata.encodeBinaryData(outputMeta);
-    }
-    std::cout << "Encoded metadata into "<<outputMeta.size()<<" bytes.\n";
-
-    /* Encode bounding boxes */
-    std::vector<uint8_t> outputBBox;
-    int64_t prevFileIndex = 0;
-    int64_t prevMetaIndex = 0;
-    for(PolygonData* polygon: polygons_) {
-        polygon->boundingMin.encodePointBinary(outputBBox, precision);
-        polygon->boundingMax.encodePointBinary(outputBBox, precision);
-
-        encodeVariableLength(outputBBox, metadata_.at(polygon->metadataId_).fileIndex_ - prevMetaIndex);
-        prevMetaIndex = metadata_[polygon->metadataId_].fileIndex_;
-
-        encodeVariableLength(outputBBox, polygon->fileIndex_ - prevFileIndex, false);
-        prevFileIndex = polygon->fileIndex_;
-    }
-    std::cout << "Encoded bounding box section into "<<outputBBox.size()<<" bytes.\n";
-
-    /* Encode header */
-    std::vector<uint8_t> outputHeader;
-    outputHeader.push_back('P');
-    outputHeader.push_back('L');
-    outputHeader.push_back('B');
-    outputHeader.push_back(tableType);
-    outputHeader.push_back(0);
-    outputHeader.push_back(precision);
-    outputHeader.push_back(fieldNames_.size());
-    for(unsigned int i=0; i<fieldNames_.size(); i++) {
-        encodeStringToBinary(outputHeader, fieldNames_[i]);
-    }
-    encodeStringToBinary(outputHeader, notice);
-    encodeVariableLength(outputHeader, outputBBox.size(), false);
-    encodeVariableLength(outputHeader, outputMeta.size(), false);
-    encodeVariableLength(outputHeader, outputData.size(), false);
-    std::cout << "Encoded header into "<<outputHeader.size()<<" bytes.\n";
-
-    FILE* outputFile = fopen(outPath.c_str(), "wb");
-    fwrite(outputHeader.data(), 1, outputHeader.size(), outputFile);
-    fwrite(outputBBox.data(), 1, outputBBox.size(), outputFile);
-    fwrite(outputMeta.data(), 1, outputMeta.size(), outputFile);
-    fwrite(outputData.data(), 1, outputData.size(), outputFile);
-    fclose(outputFile);
-
-}