author | František Kučera <franta-hg@frantovo.cz> |
Wed, 20 Nov 2019 14:56:41 +0100 | |
branch | v_0 |
changeset 18 | 9d566568d37c |
permissions | -rw-r--r-- |
18
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
1 |
/** |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
2 |
* Relational pipes (library) |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
3 |
* Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
4 |
* |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
5 |
* This program is free software: you can redistribute it and/or modify |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
6 |
* it under the terms of the: |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
7 |
* - GNU Lesser General Public License as published by the Free Software Foundation; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
8 |
* version 3 of the License or (at your option) |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
9 |
* - GNU General Public License as published by the Free Software Foundation; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
10 |
* version 2 of the License. |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
11 |
* |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
12 |
* This program is distributed in the hope that it will be useful, |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
15 |
* GNU General Public License for more details. |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
16 |
* |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
17 |
* You should have received a copy of the GNU General Public License |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
18 |
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
19 |
*/ |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
20 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
21 |
#include "../include/relpipe/common/text/Iconv.h" |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
22 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
23 |
namespace relpipe { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
24 |
namespace common { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
25 |
namespace text { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
26 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
27 |
class Iconv::IconvInternal { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
28 |
public: |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
29 |
static const size_t BUFFER_SIZE_MINIMUM; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
30 |
static const size_t BUFFER_SIZE_DEFAULT; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
31 |
const size_t bufferSize; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
32 |
iconv_t cd; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
33 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
34 |
IconvInternal(std::string to, std::string from, const size_t bufferSize) : bufferSize(std::max(bufferSize, BUFFER_SIZE_MINIMUM)) { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
35 |
cd = iconv_open(to.c_str(), from.c_str()); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
36 |
if (errno) throw std::string("iconv_open() error: ") + strerror(errno); // TODO: custom exception |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
37 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
38 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
39 |
virtual ~IconvInternal() { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
40 |
iconv_close(cd); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
41 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
42 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
43 |
}; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
44 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
45 |
const size_t Iconv::IconvInternal::BUFFER_SIZE_MINIMUM = 2; // TODO: enough? |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
46 |
const size_t Iconv::IconvInternal::BUFFER_SIZE_DEFAULT = 20; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
47 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
48 |
Iconv::Iconv(std::string to, std::string from) : Iconv(to, from, IconvInternal::BUFFER_SIZE_DEFAULT) { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
49 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
50 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
51 |
Iconv::Iconv(std::string to, std::string from, size_t bufferSize) { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
52 |
internal = new Iconv::IconvInternal(to, from, bufferSize); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
53 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
54 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
55 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
56 |
Iconv::~Iconv() { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
57 |
delete internal; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
58 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
59 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
60 |
std::string Iconv::convert(std::string originalText) { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
61 |
std::stringstream result; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
62 |
size_t inBytesLeft = originalText.size(); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
63 |
size_t outBytesLeft = internal->bufferSize; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
64 |
char outBuffer[internal->bufferSize]; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
65 |
char* outBuf = (char*) outBuffer; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
66 |
char* inBuf = (char*) originalText.c_str(); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
67 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
68 |
do { |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
69 |
size_t nconv = iconv(internal->cd, &inBuf, &inBytesLeft, &outBuf, &outBytesLeft); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
70 |
if (nconv < 0) throw std::string("iconv() error: ") + strerror(errno); // TODO: custom exception |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
71 |
// TODO: throw exception if locale is not initialized and platform default "" is used (avoid infinite loop) |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
72 |
result.write(outBuffer, internal->bufferSize - outBytesLeft); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
73 |
outBytesLeft = internal->bufferSize; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
74 |
outBuf = (char*) outBuffer; |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
75 |
} while (inBytesLeft > 0); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
76 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
77 |
return result.str(); |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
78 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
79 |
|
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
80 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
81 |
} |
9d566568d37c
Iconv and TextCodec classes for converting text encodings
František Kučera <franta-hg@frantovo.cz>
parents:
diff
changeset
|
82 |
} |