# HG changeset patch # User František Kučera # Date 1574258201 -3600 # Node ID 9d566568d37c8e012b3303a6904ac6f07f09a044 # Parent 46151cd23815ef3e58606cc9438997fdae926a77 Iconv and TextCodec classes for converting text encodings diff -r 46151cd23815 -r 9d566568d37c include/relpipe/common/text/Iconv.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/relpipe/common/text/Iconv.h Wed Nov 20 14:56:41 2019 +0100 @@ -0,0 +1,55 @@ +/** + * Relational pipes (library) + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the: + * - GNU Lesser General Public License as published by the Free Software Foundation; + * version 3 of the License or (at your option) + * - GNU General Public License as published by the Free Software Foundation; + * version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace relpipe { +namespace common { +namespace text { + +/** + * Iconv class is a simple wrapper for iconv() functions. + * It converts text from one encoding to another. + * + * List of all encodings: iconv --list + * + * n.b. if the default (platform) encoding "" is used, it is required to initialize locales: setlocale(LC_ALL, ""); + */ +class Iconv { +private: + class IconvInternal; + IconvInternal* internal; +public: + Iconv(std::string to, std::string from); + Iconv(std::string to, std::string from, size_t bufferSize); + virtual ~Iconv(); + std::string convert(std::string originalText); +}; + +} +} +} diff -r 46151cd23815 -r 9d566568d37c include/relpipe/common/text/TextCodec.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/relpipe/common/text/TextCodec.h Wed Nov 20 14:56:41 2019 +0100 @@ -0,0 +1,53 @@ +/** + * Relational pipes (library) + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the: + * - GNU Lesser General Public License as published by the Free Software Foundation; + * version 3 of the License or (at your option) + * - GNU General Public License as published by the Free Software Foundation; + * version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include + +#include "Iconv.h" + +namespace relpipe { +namespace common { +namespace text { + +/** + * TextCodec converts text from an encoding (usually the plarform default) + * to our internal string representations (which is octet string consisting of UTF-8 encoded text). + * + * bytes = platform encoding + * text = our encoding + * + * n.b. if the default (platform) encoding "" is used, it is required to initialize locales: setlocale(LC_ALL, ""); + */ +class TextCodec { +private: + class TextCodecInternal; + TextCodecInternal* internal; +public: + TextCodec(); + TextCodec(const std::string& encoding); + virtual ~TextCodec(); + std::string fromBytes(std::string bytes); + std::string toBytes(std::string text); +}; + +} +} +} diff -r 46151cd23815 -r 9d566568d37c nbproject/configurations.xml --- a/nbproject/configurations.xml Sun Nov 17 12:14:18 2019 +0100 +++ b/nbproject/configurations.xml Wed Nov 20 14:56:41 2019 +0100 @@ -41,7 +41,18 @@ + + + + + TextCodec.h + + + + + Iconv.cpp + TextCodec.cpp relpipe-lib-common.cpp @@ -76,7 +87,7 @@ build/Debug ${MAKE} -f Makefile ${MAKE} -f Makefile clean - build/Debug/src/welcome + build/Debug/src/relpipe-lib-common build/Debug/src @@ -92,8 +103,19 @@ true + + + + + + + + + + relpipe_lib_common_cpp_EXPORTS + @@ -126,6 +148,15 @@ true + + + + + + diff -r 46151cd23815 -r 9d566568d37c src/CMakeLists.txt --- a/src/CMakeLists.txt Sun Nov 17 12:14:18 2019 +0100 +++ b/src/CMakeLists.txt Wed Nov 20 14:56:41 2019 +0100 @@ -23,6 +23,8 @@ # Library output: add_library( ${LIBRARY_FILE} SHARED + Iconv.cpp + TextCodec.cpp ${PROJECT_CODE}.cpp ) diff -r 46151cd23815 -r 9d566568d37c src/Iconv.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Iconv.cpp Wed Nov 20 14:56:41 2019 +0100 @@ -0,0 +1,82 @@ +/** + * Relational pipes (library) + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the: + * - GNU Lesser General Public License as published by the Free Software Foundation; + * version 3 of the License or (at your option) + * - GNU General Public License as published by the Free Software Foundation; + * version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "../include/relpipe/common/text/Iconv.h" + +namespace relpipe { +namespace common { +namespace text { + +class Iconv::IconvInternal { +public: + static const size_t BUFFER_SIZE_MINIMUM; + static const size_t BUFFER_SIZE_DEFAULT; + const size_t bufferSize; + iconv_t cd; + + IconvInternal(std::string to, std::string from, const size_t bufferSize) : bufferSize(std::max(bufferSize, BUFFER_SIZE_MINIMUM)) { + cd = iconv_open(to.c_str(), from.c_str()); + if (errno) throw std::string("iconv_open() error: ") + strerror(errno); // TODO: custom exception + } + + virtual ~IconvInternal() { + iconv_close(cd); + } + +}; + +const size_t Iconv::IconvInternal::BUFFER_SIZE_MINIMUM = 2; // TODO: enough? +const size_t Iconv::IconvInternal::BUFFER_SIZE_DEFAULT = 20; + +Iconv::Iconv(std::string to, std::string from) : Iconv(to, from, IconvInternal::BUFFER_SIZE_DEFAULT) { +} + +Iconv::Iconv(std::string to, std::string from, size_t bufferSize) { + internal = new Iconv::IconvInternal(to, from, bufferSize); + +} + +Iconv::~Iconv() { + delete internal; +} + +std::string Iconv::convert(std::string originalText) { + std::stringstream result; + size_t inBytesLeft = originalText.size(); + size_t outBytesLeft = internal->bufferSize; + char outBuffer[internal->bufferSize]; + char* outBuf = (char*) outBuffer; + char* inBuf = (char*) originalText.c_str(); + + do { + size_t nconv = iconv(internal->cd, &inBuf, &inBytesLeft, &outBuf, &outBytesLeft); + if (nconv < 0) throw std::string("iconv() error: ") + strerror(errno); // TODO: custom exception + // TODO: throw exception if locale is not initialized and platform default "" is used (avoid infinite loop) + result.write(outBuffer, internal->bufferSize - outBytesLeft); + outBytesLeft = internal->bufferSize; + outBuf = (char*) outBuffer; + } while (inBytesLeft > 0); + + return result.str(); +} + +} +} +} diff -r 46151cd23815 -r 9d566568d37c src/TextCodec.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/TextCodec.cpp Wed Nov 20 14:56:41 2019 +0100 @@ -0,0 +1,80 @@ +/** + * Relational pipes (library) + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the: + * - GNU Lesser General Public License as published by the Free Software Foundation; + * version 3 of the License or (at your option) + * - GNU General Public License as published by the Free Software Foundation; + * version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "../include/relpipe/common/text/TextCodec.h" + +namespace relpipe { +namespace common { +namespace text { + +class TextCodec::TextCodecInternal { +private: +public: + static const std::string UTF8; + static const std::string DEFAULT; + Iconv* bytesToText; + Iconv* textToBytes; + //std::unique_ptr bytesToText; + //std::unique_ptr textToBytes; + bool sameEncoding = false; + + TextCodecInternal(const std::string& encoding) { + // TODO: set sameEncoding = true if $LANG or current locale is same as encoding + bytesToText = new Iconv(encoding, DEFAULT); + textToBytes = new Iconv(DEFAULT, encoding); + //bytesToText.reset(new Iconv(encoding, DEFAULT)); + //textToBytes.reset(new Iconv(DEFAULT, encoding)); + } + + virtual ~TextCodecInternal() { + delete bytesToText; + delete textToBytes; + } +}; + +const std::string TextCodec::TextCodecInternal::UTF8 = "UTF-8"; +const std::string TextCodec::TextCodecInternal::DEFAULT = ""; + +TextCodec::TextCodec(const std::string& encoding) { + internal = new TextCodec::TextCodecInternal(encoding); +} + +TextCodec::TextCodec() : TextCodec(TextCodec::TextCodecInternal::UTF8) { +} + +TextCodec::~TextCodec() { + delete internal; +} + +std::string TextCodec::fromBytes(std::string bytes) { + if (internal->sameEncoding) return bytes; + else return internal->bytesToText->convert(bytes); +} + +std::string TextCodec::toBytes(std::string text) { + if (internal->sameEncoding) return text; + else return internal->textToBytes->convert(text); +} + +} +} +} diff -r 46151cd23815 -r 9d566568d37c src/relpipe-lib-common.cpp --- a/src/relpipe-lib-common.cpp Sun Nov 17 12:14:18 2019 +0100 +++ b/src/relpipe-lib-common.cpp Wed Nov 20 14:56:41 2019 +0100 @@ -19,9 +19,7 @@ */ /* - * The relpipe-lib-protocol.cpp is a header-only C++ library. - * Generated .so file is quite useless and it is not needed to compile this library. - * Compilation just validates that the .h file has no severe bugs. - * In future, the binary might contain some code for testing purposes. + * The header-only parts of this library should be included from here. + * Compilation validates that the .h file has no severe bugs. */ #include "../include/relpipe/protocol/constants.h" \ No newline at end of file