Iconv and TextCodec classes for converting text encodings v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Wed, 20 Nov 2019 14:56:41 +0100
branchv_0
changeset 18 9d566568d37c
parent 17 46151cd23815
child 19 2999fca726f3
Iconv and TextCodec classes for converting text encodings
include/relpipe/common/text/Iconv.h
include/relpipe/common/text/TextCodec.h
nbproject/configurations.xml
src/CMakeLists.txt
src/Iconv.cpp
src/TextCodec.cpp
src/relpipe-lib-common.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/relpipe/common/text/Iconv.h	Wed Nov 20 14:56:41 2019 +0100
@@ -0,0 +1,55 @@
+/**
+ * Relational pipes (library)
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the:
+ *  - GNU Lesser General Public License as published by the Free Software Foundation;
+ *    version 3 of the License or (at your option)
+ *  - GNU General Public License as published by the Free Software Foundation;
+ *    version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <string.h>
+#include <iconv.h>
+#include <string>
+#include <array>
+#include <assert.h>
+#include <ostream>
+#include <sstream>
+
+namespace relpipe {
+namespace common {
+namespace text {
+
+/**
+ * Iconv class is a simple wrapper for iconv() functions.
+ * It converts text from one encoding to another.
+ * 
+ * List of all encodings: iconv --list
+ * 
+ * n.b. if the default (platform) encoding "" is used, it is required to initialize locales: setlocale(LC_ALL, "");
+ */
+class Iconv {
+private:
+	class IconvInternal;
+	IconvInternal* internal;
+public:
+	Iconv(std::string to, std::string from);
+	Iconv(std::string to, std::string from, size_t bufferSize);
+	virtual ~Iconv();
+	std::string convert(std::string originalText);
+};
+
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/relpipe/common/text/TextCodec.h	Wed Nov 20 14:56:41 2019 +0100
@@ -0,0 +1,53 @@
+/**
+ * Relational pipes (library)
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the:
+ *  - GNU Lesser General Public License as published by the Free Software Foundation;
+ *    version 3 of the License or (at your option)
+ *  - GNU General Public License as published by the Free Software Foundation;
+ *    version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <memory>
+
+#include "Iconv.h"
+
+namespace relpipe {
+namespace common {
+namespace text {
+
+/**
+ * TextCodec converts text from an encoding (usually the plarform default) 
+ * to our internal string representations (which is octet string consisting of UTF-8 encoded text).
+ * 
+ * bytes = platform encoding
+ * text = our encoding
+ * 
+ * n.b. if the default (platform) encoding "" is used, it is required to initialize locales: setlocale(LC_ALL, "");
+ */
+class TextCodec {
+private:
+	class TextCodecInternal;
+	TextCodecInternal* internal;
+public:
+	TextCodec();
+	TextCodec(const std::string& encoding);
+	virtual ~TextCodec();
+	std::string fromBytes(std::string bytes);
+	std::string toBytes(std::string text);
+};
+
+}
+}
+}
--- a/nbproject/configurations.xml	Sun Nov 17 12:14:18 2019 +0100
+++ b/nbproject/configurations.xml	Wed Nov 20 14:56:41 2019 +0100
@@ -41,7 +41,18 @@
 <configurationDescriptor version="100">
   <logicalFolder name="root" displayName="root" projectFiles="true" kind="ROOT">
     <df root="." name="0">
+      <df name="include">
+        <df name="relpipe">
+          <df name="common">
+            <df name="text">
+              <in>TextCodec.h</in>
+            </df>
+          </df>
+        </df>
+      </df>
       <df name="src">
+        <in>Iconv.cpp</in>
+        <in>TextCodec.cpp</in>
         <in>relpipe-lib-common.cpp</in>
       </df>
     </df>
@@ -76,7 +87,7 @@
           <buildCommandWorkingDir>build/Debug</buildCommandWorkingDir>
           <buildCommand>${MAKE} -f Makefile</buildCommand>
           <cleanCommand>${MAKE} -f Makefile clean</cleanCommand>
-          <executablePath>build/Debug/src/welcome</executablePath>
+          <executablePath>build/Debug/src/relpipe-lib-common</executablePath>
           <ccTool>
             <incDir>
               <pElem>build/Debug/src</pElem>
@@ -92,8 +103,19 @@
           <preBuildFirst>true</preBuildFirst>
         </preBuild>
       </makefileType>
+      <item path="src/Iconv.cpp" ex="false" tool="1" flavor2="11">
+        <ccTool flags="0">
+        </ccTool>
+      </item>
+      <item path="src/TextCodec.cpp" ex="false" tool="1" flavor2="0">
+        <ccTool flags="0">
+        </ccTool>
+      </item>
       <item path="src/relpipe-lib-common.cpp" ex="false" tool="1" flavor2="0">
         <ccTool flags="0">
+          <preprocessorList>
+            <Elem>relpipe_lib_common_cpp_EXPORTS</Elem>
+          </preprocessorList>
         </ccTool>
       </item>
     </conf>
@@ -126,6 +148,15 @@
           <preBuildFirst>true</preBuildFirst>
         </preBuild>
       </makefileType>
+      <item path="include/relpipe/common/text/TextCodec.h"
+            ex="false"
+            tool="3"
+            flavor2="0">
+      </item>
+      <item path="src/Iconv.cpp" ex="false" tool="1" flavor2="0">
+      </item>
+      <item path="src/TextCodec.cpp" ex="false" tool="1" flavor2="0">
+      </item>
     </conf>
   </confs>
 </configurationDescriptor>
--- a/src/CMakeLists.txt	Sun Nov 17 12:14:18 2019 +0100
+++ b/src/CMakeLists.txt	Wed Nov 20 14:56:41 2019 +0100
@@ -23,6 +23,8 @@
 # Library output:
 add_library(
 	${LIBRARY_FILE} SHARED
+	Iconv.cpp
+	TextCodec.cpp
 	${PROJECT_CODE}.cpp
 )
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Iconv.cpp	Wed Nov 20 14:56:41 2019 +0100
@@ -0,0 +1,82 @@
+/**
+ * Relational pipes (library)
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the:
+ *  - GNU Lesser General Public License as published by the Free Software Foundation;
+ *    version 3 of the License or (at your option)
+ *  - GNU General Public License as published by the Free Software Foundation;
+ *    version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../include/relpipe/common/text/Iconv.h"
+
+namespace relpipe {
+namespace common {
+namespace text {
+
+class Iconv::IconvInternal {
+public:
+	static const size_t BUFFER_SIZE_MINIMUM;
+	static const size_t BUFFER_SIZE_DEFAULT;
+	const size_t bufferSize;
+	iconv_t cd;
+
+	IconvInternal(std::string to, std::string from, const size_t bufferSize) : bufferSize(std::max(bufferSize, BUFFER_SIZE_MINIMUM)) {
+		cd = iconv_open(to.c_str(), from.c_str());
+		if (errno) throw std::string("iconv_open() error: ") + strerror(errno); // TODO: custom exception
+	}
+
+	virtual ~IconvInternal() {
+		iconv_close(cd);
+	}
+
+};
+
+const size_t Iconv::IconvInternal::BUFFER_SIZE_MINIMUM = 2;  // TODO: enough?
+const size_t Iconv::IconvInternal::BUFFER_SIZE_DEFAULT = 20;
+
+Iconv::Iconv(std::string to, std::string from) : Iconv(to, from, IconvInternal::BUFFER_SIZE_DEFAULT) {
+}
+
+Iconv::Iconv(std::string to, std::string from, size_t bufferSize) {
+	internal = new Iconv::IconvInternal(to, from, bufferSize);
+
+}
+
+Iconv::~Iconv() {
+	delete internal;
+}
+
+std::string Iconv::convert(std::string originalText) {
+	std::stringstream result;
+	size_t inBytesLeft = originalText.size();
+	size_t outBytesLeft = internal->bufferSize;
+	char outBuffer[internal->bufferSize];
+	char* outBuf = (char*) outBuffer;
+	char* inBuf = (char*) originalText.c_str();
+
+	do {
+		size_t nconv = iconv(internal->cd, &inBuf, &inBytesLeft, &outBuf, &outBytesLeft);
+		if (nconv < 0) throw std::string("iconv() error: ") + strerror(errno); // TODO: custom exception
+		// TODO: throw exception if locale is not initialized and platform default "" is used (avoid infinite loop)
+		result.write(outBuffer, internal->bufferSize - outBytesLeft);
+		outBytesLeft = internal->bufferSize;
+		outBuf = (char*) outBuffer;
+	} while (inBytesLeft > 0);
+
+	return result.str();
+}
+
+}
+}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/TextCodec.cpp	Wed Nov 20 14:56:41 2019 +0100
@@ -0,0 +1,80 @@
+/**
+ * Relational pipes (library)
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the:
+ *  - GNU Lesser General Public License as published by the Free Software Foundation;
+ *    version 3 of the License or (at your option)
+ *  - GNU General Public License as published by the Free Software Foundation;
+ *    version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <memory>
+
+#include "../include/relpipe/common/text/TextCodec.h"
+
+namespace relpipe {
+namespace common {
+namespace text {
+
+class TextCodec::TextCodecInternal {
+private:
+public:
+	static const std::string UTF8;
+	static const std::string DEFAULT;
+	Iconv* bytesToText;
+	Iconv* textToBytes;
+	//std::unique_ptr<Iconv> bytesToText;
+	//std::unique_ptr<Iconv> textToBytes;
+	bool sameEncoding = false;
+
+	TextCodecInternal(const std::string& encoding) {
+		// TODO: set sameEncoding = true if $LANG or current locale is same as encoding
+		bytesToText = new Iconv(encoding, DEFAULT);
+		textToBytes = new Iconv(DEFAULT, encoding);
+		//bytesToText.reset(new Iconv(encoding, DEFAULT));
+		//textToBytes.reset(new Iconv(DEFAULT, encoding));
+	}
+
+	virtual ~TextCodecInternal() {
+		delete bytesToText;
+		delete textToBytes;
+	}
+};
+
+const std::string TextCodec::TextCodecInternal::UTF8 = "UTF-8";
+const std::string TextCodec::TextCodecInternal::DEFAULT = "";
+
+TextCodec::TextCodec(const std::string& encoding) {
+	internal = new TextCodec::TextCodecInternal(encoding);
+}
+
+TextCodec::TextCodec() : TextCodec(TextCodec::TextCodecInternal::UTF8) {
+}
+
+TextCodec::~TextCodec() {
+	delete internal;
+}
+
+std::string TextCodec::fromBytes(std::string bytes) {
+	if (internal->sameEncoding) return bytes;
+	else return internal->bytesToText->convert(bytes);
+}
+
+std::string TextCodec::toBytes(std::string text) {
+	if (internal->sameEncoding) return text;
+	else return internal->textToBytes->convert(text);
+}
+
+}
+}
+}
--- a/src/relpipe-lib-common.cpp	Sun Nov 17 12:14:18 2019 +0100
+++ b/src/relpipe-lib-common.cpp	Wed Nov 20 14:56:41 2019 +0100
@@ -19,9 +19,7 @@
  */
 
 /*
- * The relpipe-lib-protocol.cpp is a header-only C++ library.
- * Generated .so file is quite useless and it is not needed to compile this library.
- * Compilation just validates that the .h file has no severe bugs.
- * In future, the binary might contain some code for testing purposes.
+ * The header-only parts of this library should be included from here.
+ * Compilation validates that the .h file has no severe bugs.
  */
 #include "../include/relpipe/protocol/constants.h"
\ No newline at end of file