include/relpipe/xmlwriter/XMLWriter.h
branchv_0
changeset 2 f21d6ae71bf0
parent 1 7d3081e51970
child 3 edbebc6163e3
equal deleted inserted replaced
1:7d3081e51970 2:f21d6ae71bf0
    16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
    16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
    17  */
    17  */
    18 #pragma once
    18 #pragma once
    19 
    19 
    20 #include <iostream>
    20 #include <iostream>
       
    21 #include <sstream>
    21 #include <string>
    22 #include <string>
    22 #include <vector>
    23 #include <vector>
    23 #include <codecvt>
    24 #include <codecvt>
       
    25 #include <locale>
    24 
    26 
    25 #include "RelpipeXMLWriterException.h"
    27 #include "RelpipeXMLWriterException.h"
    26 
    28 
    27 using namespace std;
    29 using namespace std;
    28 
    30 
    29 namespace relpipe {
    31 namespace relpipe {
    30 namespace xmlwriter {
    32 namespace xmlwriter {
    31 
    33 
       
    34 /**
       
    35  * A simple library for writing XML streams.
       
    36  * 
       
    37  * Does:
       
    38  *  - convert wstring values to UTF-8
       
    39  *  - escape values of text nodes and attributes
       
    40  *  - indent elements
       
    41  *  - preserve whitespace of text nodes
       
    42  *  - in comments: replace - with –
       
    43  * Does not:
       
    44  *  - understand XML namespaces:
       
    45  *     - handles xmlns:x="y" as ordinary attributes
       
    46  *     - thus can not check whether elements/attributes belong to an declared namespace
       
    47  *     - thus can generate invalid XML – namespaces and their prefixes must be checked on the caller side
       
    48  */
    32 class XMLWriter {
    49 class XMLWriter {
    33 private:
    50 private:
    34 	std::ostream &output;
    51 	const char* INDENT = "\t";
       
    52 
       
    53 	ostream& output;
       
    54 	wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
       
    55 	vector<wstring> treePosition;
       
    56 
       
    57 	const string escapeXmlText(const wstring& value) {
       
    58 		// TODO: really bad performance → rewrite
       
    59 		// 72 % of whole relpipe-out-xml according to valgrind/callgrind
       
    60 		// Moved here from XmlHandler.h (relpipe-out-xml).
       
    61 		std::wstringstream result;
       
    62 
       
    63 		for (auto & ch : value) {
       
    64 			switch (ch) {
       
    65 				case L'&': result << L"&amp;";
       
    66 					break;
       
    67 				case L'<': result << L"&lt;";
       
    68 					break;
       
    69 				case L'>': result << L"&gt;";
       
    70 					break;
       
    71 				case L'\'': result << L"&apos;"; // TODO: escape ' and " only in attributes
       
    72 					break;
       
    73 				case L'"': result << L"&quot;"; // TODO: escape ' and " only in attributes
       
    74 					break;
       
    75 				default: result << ch;
       
    76 			}
       
    77 		}
       
    78 
       
    79 		return convertor.to_bytes(result.str());
       
    80 	}
       
    81 
       
    82 	const string escapeXmlAttribute(const wstring& value) {
       
    83 		// TODO: escaping of ' is not necessary.
       
    84 		return escapeXmlText(value);
       
    85 	}
       
    86 
       
    87 	const string escapeComment(const wstring& value) {
       
    88 		// TODO: really bad performance → rewrite
       
    89 		// 72 % of whole relpipe-out-xml according to valgrind/callgrind
       
    90 		std::wstringstream result;
       
    91 
       
    92 		for (auto & ch : value) {
       
    93 			switch (ch) {
       
    94 				case L'-': result << L"–";
       
    95 					break;
       
    96 				default: result << ch;
       
    97 			}
       
    98 		}
       
    99 
       
   100 		return convertor.to_bytes(result.str());
       
   101 	}
       
   102 
       
   103 	void checkName(const wstring& name) {
       
   104 		// FIXME: throw exception on invalid names
       
   105 	}
       
   106 
       
   107 	void writeAttributes(const vector<wstring>& attributes) {
       
   108 		if (attributes.size() % 2) throw RelpipeXMLWriterException(L"attributes vector must contain name/value pairs (even number of elements)");
       
   109 		for (int i = 0; i < attributes.size(); i = i + 2) {
       
   110 			wstring name = attributes[i];
       
   111 			wstring value = attributes[i + 1];
       
   112 			checkName(name);
       
   113 			output << " " << convertor.to_bytes(name) << "=\"" << escapeXmlAttribute(value) << "\"";
       
   114 		}
       
   115 	}
    35 
   116 
    36 public:
   117 public:
    37 
   118 
    38 	XMLWriter(std::ostream& output) : output(output) {
   119 	XMLWriter(std::ostream& output) : output(output) {
       
   120 		output << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl;
    39 	}
   121 	}
    40 
   122 
    41 	virtual ~XMLWriter() {
   123 	virtual ~XMLWriter() {
    42 		output.flush();
   124 		output.flush();
    43 	}
   125 	}
    44 
   126 
    45 	void writeStartElement(const wstring& name, const vector<wstring>& attributes) {
   127 	void writeStartElement(const wstring& name, const vector<wstring>& attributes = {}) {
    46 		throw RelpipeXMLWriterException(L"not yet implemented");
   128 		// FIXME: indentation
       
   129 		checkName(name);
       
   130 		treePosition.push_back(name);
       
   131 		output << "<" << convertor.to_bytes(name);
       
   132 		writeAttributes(attributes);
       
   133 		output << ">";
    47 	}
   134 	}
    48 
   135 
    49 	void writeEndElement() {
   136 	void writeEndElement() {
    50 		throw RelpipeXMLWriterException(L"not yet implemented");
   137 		// FIXME: indentation
       
   138 		if (treePosition.empty()) throw RelpipeXMLWriterException(L"unable to close element – all elements are already closed");
       
   139 		output << "</" << convertor.to_bytes(treePosition.back()) << ">";
       
   140 		treePosition.pop_back();
    51 	}
   141 	}
    52 
   142 
    53 	void writeEmptyElement(const wstring& name, const vector<wstring>& attributes) {
   143 	void writeEmptyElement(const wstring& name, const vector<wstring>& attributes = {}) {
    54 		throw RelpipeXMLWriterException(L"not yet implemented");
   144 		// FIXME: indentation
       
   145 		checkName(name);
       
   146 		output << "<" << convertor.to_bytes(name);
       
   147 		writeAttributes(attributes);
       
   148 		output << "/>";
       
   149 
    55 	}
   150 	}
    56 
   151 
    57 	void writeTextElement(const wstring& name, const vector<wstring>& attributes, const wstring& text) {
   152 	void writeTextElement(const wstring& name, const vector<wstring>& attributes, const wstring& text) {
    58 		throw RelpipeXMLWriterException(L"not yet implemented");
   153 		writeStartElement(name, attributes);
       
   154 		writeCharacters(text);
       
   155 		writeEndElement();
    59 	}
   156 	}
    60 
   157 
    61 	void writeCharacters(const wstring& text) {
   158 	void writeCharacters(const wstring& text) {
    62 		throw RelpipeXMLWriterException(L"not yet implemented");
   159 		output << escapeXmlText(text);
    63 	}
   160 	}
    64 
   161 
    65 	void writeComment(const wstring& text) {
   162 	void writeComment(const wstring& text, bool addSpaces = true) {
    66 		throw RelpipeXMLWriterException(L"not yet implemented");
   163 		output << addSpaces ? "<!-- " : "<!--";
       
   164 		output << escapeComment(text);
       
   165 		output << addSpaces ? " -->" : "-->";
    67 	}
   166 	}
    68 
   167 
    69 };
   168 };
    70 
   169 
    71 }
   170 }