src/XmlHandler.h
author František Kučera <franta-hg@frantovo.cz>
Sun, 16 Sep 2018 18:12:31 +0200
branchv_0
changeset 2 13a41e435ea0
parent 1 82ba555a97d1
child 3 878648aa663f
permissions -rw-r--r--
quick and dirty XML output
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
#pragma once
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
#include <string>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
#include <vector>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
#include <iostream>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
#include <sstream>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
#include <locale>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
#include <codecvt>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
#include <regex>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
#include <relpipe/reader/typedefs.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
#include <relpipe/reader/TypeId.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
#include <relpipe/reader/handlers/AttributeMetadata.h>
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
namespace relpipe {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
namespace out {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
namespace tabular {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
using namespace relpipe::reader;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
class XmlHandler : public handlers::RelationalReaderStringHadler {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
private:
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
	std::wstring_convert<std::codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
	const char* INDENT = "\t";
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
	std::ostream &output;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
	std::vector<TypeId> columnTypes;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
	std::vector<string_t> columnTypeCodes;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
	std::vector<string_t> columnNames;
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    32
	integer_t valueCount = 0;
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
	integer_t columnCount = 0;
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    34
	integer_t relationCount = 0;
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    36
	const std::string escapeXmlText(const string_t &value) {
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
		std::wstringstream result;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
		for (auto & ch : value) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
			switch (ch) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    41
				case L'&': result << L"&amp;";
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    42
					break;
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    43
				case L'<': result << L"&lt;";
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    44
					break;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    45
				case L'>': result << L"&gt;";
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    46
					break;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    47
				case L'\'': result << L"&apos;"; // TODO: escape ' and " only in attributes
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    48
					break;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    49
				case L'"': result << L"&quot;"; // TODO: escape ' and " only in attributes
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    50
					break;
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    51
				default: result << ch;
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    52
			}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    53
		}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    54
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    55
		return convertor.to_bytes(result.str());
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    56
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    57
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    58
public:
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    59
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    60
	XmlHandler(std::ostream& output) : output(output) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    61
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    62
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    63
	void startRelation(string_t name, std::vector<handlers::AttributeMetadata> attributes) override {
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    64
		// TODO: refactor and move common XML functions to relpipe-lib-xml
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    65
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    66
		valueCount = 0;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    67
		columnCount = 0;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    68
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    69
		if (relationCount == 0) {
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    70
			output << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    71
			output << "<pipe>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    72
			// TODO: xmlns
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    73
		} else {
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    74
			output << INDENT << INDENT << "</record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    75
			output << INDENT << "</relation>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    76
		}
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    77
		relationCount++;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    78
		output << INDENT << "<relation>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    79
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    80
		output << INDENT << INDENT << "<name>" << escapeXmlText(name) << "</name>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    81
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    82
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    83
		columnCount = attributes.size();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    84
		columnTypes.resize(columnCount);
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    85
		columnTypeCodes.resize(columnCount);
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    86
		columnNames.resize(columnCount);
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    87
		for (int i = 0; i < attributes.size(); i++) {
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    88
			columnNames[i] = attributes[i].getAttributeName();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    89
			columnTypes[i] = attributes[i].getTypeId();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    90
			columnTypeCodes[i] = attributes[i].getTypeName();
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    91
		}
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    92
		
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    93
		// TODO: print attribute metadata
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    94
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    95
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    96
	void attribute(const string_t& value) override {
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    97
		integer_t i = valueCount % columnCount;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    98
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    99
		if (i == 0 && valueCount) output << INDENT << INDENT << "</record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   100
		if (i == 0) output << INDENT << INDENT << "<record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   101
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   102
		valueCount++;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   103
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   104
		// TODO: print attribute metadata (optional)
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   105
		output << INDENT << INDENT << INDENT << "<attribute>";
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   106
		output << escapeXmlText(value);
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   107
		output << "</attribute>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   108
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   109
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   110
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   111
	void endOfPipe() {
2
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   112
		if (valueCount) output << INDENT << INDENT << "</record>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   113
		if (relationCount) output << INDENT << "</relation>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   114
		output << "</pipe>" << std::endl;
13a41e435ea0 quick and dirty XML output
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   115
1
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   116
	}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   117
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   118
};
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   119
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   120
}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   121
}
82ba555a97d1 relpipe-out-xml skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   122
}