src/XmlHandler.h
author František Kučera <franta-hg@frantovo.cz>
Wed, 19 Dec 2018 23:10:19 +0100
branchv_0
changeset 10 2099260a8164
parent 8 0fb84b194a8f
child 12 b8274181b061
permissions -rw-r--r--
builds with cmake 3.7.2 (e.g. on Debian GNU/Linux 9.6)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
3
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     1
/**
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     2
 * Relational pipes
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     3
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     4
 *
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     7
 * the Free Software Foundation, either version 3 of the License, or
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     8
 * (at your option) any later version.
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
     9
 *
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    10
 * This program is distributed in the hope that it will be useful,
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    13
 * GNU General Public License for more details.
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    14
 *
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    15
 * You should have received a copy of the GNU General Public License
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    16
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
878648aa663f license: GNU GPLv3+
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    17
 */
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#pragma once
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
#include <string>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <vector>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <iostream>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <sstream>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
#include <locale>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <codecvt>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <regex>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <relpipe/reader/typedefs.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
#include <relpipe/reader/TypeId.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
#include <relpipe/reader/handlers/AttributeMetadata.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
namespace relpipe {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
namespace out {
4
7e0211f00413 fix names
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    35
namespace xml {
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
using namespace relpipe::reader;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
class XmlHandler : public handlers::RelationalReaderStringHadler {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
private:
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    41
	std::wstring_convert<std::codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    42
	const char* INDENT = "\t";
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    43
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    44
	std::ostream &output;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    46
	std::vector<TypeId> columnTypes;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
	std::vector<string_t> columnTypeCodes;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    48
	std::vector<string_t> columnNames;
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    49
	integer_t valueCount = 0;
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    50
	integer_t columnCount = 0;
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    51
	integer_t relationCount = 0;
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    52
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    53
	const std::string escapeXmlText(const string_t &value) {
8
0fb84b194a8f todo: performance
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    54
		// TODO: really bad performance → rewrite
0fb84b194a8f todo: performance
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    55
		// 72 % of whole relpipe-out-xml according to valgrind/callgrind
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    56
		std::wstringstream result;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    57
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    58
		for (auto & ch : value) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    59
			switch (ch) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    60
				case L'&': result << L"&amp;";
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    61
					break;
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    62
				case L'<': result << L"&lt;";
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    63
					break;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    64
				case L'>': result << L"&gt;";
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    65
					break;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    66
				case L'\'': result << L"&apos;"; // TODO: escape ' and " only in attributes
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    67
					break;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    68
				case L'"': result << L"&quot;"; // TODO: escape ' and " only in attributes
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    69
					break;
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    70
				default: result << ch;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    71
			}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    72
		}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    73
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    74
		return convertor.to_bytes(result.str());
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    75
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    76
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    77
public:
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    78
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    79
	XmlHandler(std::ostream& output) : output(output) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    80
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    81
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    82
	void startRelation(string_t name, std::vector<handlers::AttributeMetadata> attributes) override {
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    83
		// TODO: refactor and move common XML functions to relpipe-lib-xml
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    84
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    85
		valueCount = 0;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    86
		columnCount = 0;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    87
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    88
		if (relationCount == 0) {
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    89
			output << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    90
			output << "<pipe>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    91
			// TODO: xmlns
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    92
		} else {
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    93
			output << INDENT << INDENT << "</record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    94
			output << INDENT << "</relation>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    95
		}
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    96
		relationCount++;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    97
		output << INDENT << "<relation>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    98
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    99
		output << INDENT << INDENT << "<name>" << escapeXmlText(name) << "</name>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   100
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   101
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   102
		columnCount = attributes.size();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   103
		columnTypes.resize(columnCount);
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   104
		columnTypeCodes.resize(columnCount);
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   105
		columnNames.resize(columnCount);
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   106
		for (int i = 0; i < attributes.size(); i++) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   107
			columnNames[i] = attributes[i].getAttributeName();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   108
			columnTypes[i] = attributes[i].getTypeId();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   109
			columnTypeCodes[i] = attributes[i].getTypeName();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   110
		}
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   111
		
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   112
		// TODO: print attribute metadata
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   113
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   114
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   115
	void attribute(const string_t& value) override {
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   116
		integer_t i = valueCount % columnCount;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   117
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   118
		if (i == 0 && valueCount) output << INDENT << INDENT << "</record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   119
		if (i == 0) output << INDENT << INDENT << "<record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   120
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   121
		valueCount++;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   122
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   123
		// TODO: print attribute metadata (optional)
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   124
		output << INDENT << INDENT << INDENT << "<attribute>";
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   125
		output << escapeXmlText(value);
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   126
		output << "</attribute>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   127
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   128
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   129
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   130
	void endOfPipe() {
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   131
		if (valueCount) output << INDENT << INDENT << "</record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   132
		if (relationCount) output << INDENT << "</relation>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   133
		output << "</pipe>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   134
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   135
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   136
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   137
};
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   138
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   139
}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   140
}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   141
}