/**
* Relational pipes
* Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <sstream>
#include <codecvt>
#include <arpa/inet.h>
#include "UnescapingProcessor.h"
using namespace std;
using namespace relpipe::writer;
namespace relpipe {
namespace in {
namespace ini {
namespace lib {
/**
* Should work according to <https://docs.oracle.com/javase/specs/jls/se15/html/jls-3.html#jls-3.3> 3.3. Unicode Escapes
*/
class JavaPropertiesUnescapingProcessor : public UnescapingProcessor {
private:
wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
bool readHex(const char* hexadecimal, size_t hexLength, uint8_t* resultBuffer, size_t binLength) {
if (hexLength != binLength * 2) return false;
for (size_t i = 0; i < binLength; i++) {
uint8_t value = 0;
char a = hexadecimal[i * 2];
char b = hexadecimal[i * 2 + 1];
if (a >= '0' && a <= '9') value += (a - '0')*16;
else if (a >= 'a' && a <= 'f') value += (a - 'a' + 10)*16;
else if (a >= 'A' && a <= 'F') value += (a - 'A' + 10)*16;
else return false;
if (b >= '0' && b <= '9') value += b - '0';
else if (b >= 'a' && b <= 'f') value += b - 'a' + 10;
else if (b >= 'A' && b <= 'F') value += b - 'A' + 10;
else return false;
if (resultBuffer) resultBuffer[i] = value;
}
return true;
}
public:
std::string unescape(const std::string& s, const TextType type) override {
std::stringstream result;
for (int i = 0, length = s.size(); i < length; i++) {
char ch = s[i];
if (i + 1 < length && ch == ESC) {
ch = s[i + 1];
if (ch == 'u') {
// TODO: simplify, clean-up, verify (but seems working)
i++;
int hexLength = 4;
if (i + hexLength < length) {
uint16_t u16;
bool hexOK = readHex(s.c_str() + i + 1, hexLength, (uint8_t*) & u16, sizeof (u16));
if (hexOK) result << convertor.to_bytes(ntohs(u16));
else throw std::logic_error(std::string("Invalid unicode escape sequence: invalid HEX"));
i += hexLength;
} else {
throw std::logic_error(std::string("Invalid unicode escape sequence: missing characters"));
}
} else if (ch == ESC) put(result, ESC, i).put(ESC); // copy and skip even the second \ to avoid its misinterpretation in the next cycle
else result.put(ESC); // keep the escape sequence for later unescaping phase
} else if (ch == ESC) {
throw std::logic_error(std::string("Missing escape sequence")); // this should not happen
} else {
result.put(ch);
}
}
return result.str();
}
};
}
}
}
}