move common XML code to relpipe-lib-xmlwriter (a header-only library) v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sun, 06 Jan 2019 22:15:37 +0100
branchv_0
changeset 2 f21d6ae71bf0
parent 1 7d3081e51970
child 3 edbebc6163e3
move common XML code to relpipe-lib-xmlwriter (a header-only library)
include/relpipe/xmlwriter/XMLWriter.h
--- a/include/relpipe/xmlwriter/XMLWriter.h	Sat Jan 05 20:05:32 2019 +0100
+++ b/include/relpipe/xmlwriter/XMLWriter.h	Sun Jan 06 22:15:37 2019 +0100
@@ -18,9 +18,11 @@
 #pragma once
 
 #include <iostream>
+#include <sstream>
 #include <string>
 #include <vector>
 #include <codecvt>
+#include <locale>
 
 #include "RelpipeXMLWriterException.h"
 
@@ -29,41 +31,138 @@
 namespace relpipe {
 namespace xmlwriter {
 
+/**
+ * A simple library for writing XML streams.
+ * 
+ * Does:
+ *  - convert wstring values to UTF-8
+ *  - escape values of text nodes and attributes
+ *  - indent elements
+ *  - preserve whitespace of text nodes
+ *  - in comments: replace - with –
+ * Does not:
+ *  - understand XML namespaces:
+ *     - handles xmlns:x="y" as ordinary attributes
+ *     - thus can not check whether elements/attributes belong to an declared namespace
+ *     - thus can generate invalid XML – namespaces and their prefixes must be checked on the caller side
+ */
 class XMLWriter {
 private:
-	std::ostream &output;
+	const char* INDENT = "\t";
+
+	ostream& output;
+	wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
+	vector<wstring> treePosition;
+
+	const string escapeXmlText(const wstring& value) {
+		// TODO: really bad performance → rewrite
+		// 72 % of whole relpipe-out-xml according to valgrind/callgrind
+		// Moved here from XmlHandler.h (relpipe-out-xml).
+		std::wstringstream result;
+
+		for (auto & ch : value) {
+			switch (ch) {
+				case L'&': result << L"&amp;";
+					break;
+				case L'<': result << L"&lt;";
+					break;
+				case L'>': result << L"&gt;";
+					break;
+				case L'\'': result << L"&apos;"; // TODO: escape ' and " only in attributes
+					break;
+				case L'"': result << L"&quot;"; // TODO: escape ' and " only in attributes
+					break;
+				default: result << ch;
+			}
+		}
+
+		return convertor.to_bytes(result.str());
+	}
+
+	const string escapeXmlAttribute(const wstring& value) {
+		// TODO: escaping of ' is not necessary.
+		return escapeXmlText(value);
+	}
+
+	const string escapeComment(const wstring& value) {
+		// TODO: really bad performance → rewrite
+		// 72 % of whole relpipe-out-xml according to valgrind/callgrind
+		std::wstringstream result;
+
+		for (auto & ch : value) {
+			switch (ch) {
+				case L'-': result << L"–";
+					break;
+				default: result << ch;
+			}
+		}
+
+		return convertor.to_bytes(result.str());
+	}
+
+	void checkName(const wstring& name) {
+		// FIXME: throw exception on invalid names
+	}
+
+	void writeAttributes(const vector<wstring>& attributes) {
+		if (attributes.size() % 2) throw RelpipeXMLWriterException(L"attributes vector must contain name/value pairs (even number of elements)");
+		for (int i = 0; i < attributes.size(); i = i + 2) {
+			wstring name = attributes[i];
+			wstring value = attributes[i + 1];
+			checkName(name);
+			output << " " << convertor.to_bytes(name) << "=\"" << escapeXmlAttribute(value) << "\"";
+		}
+	}
 
 public:
 
 	XMLWriter(std::ostream& output) : output(output) {
+		output << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl;
 	}
 
 	virtual ~XMLWriter() {
 		output.flush();
 	}
 
-	void writeStartElement(const wstring& name, const vector<wstring>& attributes) {
-		throw RelpipeXMLWriterException(L"not yet implemented");
+	void writeStartElement(const wstring& name, const vector<wstring>& attributes = {}) {
+		// FIXME: indentation
+		checkName(name);
+		treePosition.push_back(name);
+		output << "<" << convertor.to_bytes(name);
+		writeAttributes(attributes);
+		output << ">";
 	}
 
 	void writeEndElement() {
-		throw RelpipeXMLWriterException(L"not yet implemented");
+		// FIXME: indentation
+		if (treePosition.empty()) throw RelpipeXMLWriterException(L"unable to close element – all elements are already closed");
+		output << "</" << convertor.to_bytes(treePosition.back()) << ">";
+		treePosition.pop_back();
 	}
 
-	void writeEmptyElement(const wstring& name, const vector<wstring>& attributes) {
-		throw RelpipeXMLWriterException(L"not yet implemented");
+	void writeEmptyElement(const wstring& name, const vector<wstring>& attributes = {}) {
+		// FIXME: indentation
+		checkName(name);
+		output << "<" << convertor.to_bytes(name);
+		writeAttributes(attributes);
+		output << "/>";
+
 	}
 
 	void writeTextElement(const wstring& name, const vector<wstring>& attributes, const wstring& text) {
-		throw RelpipeXMLWriterException(L"not yet implemented");
+		writeStartElement(name, attributes);
+		writeCharacters(text);
+		writeEndElement();
 	}
 
 	void writeCharacters(const wstring& text) {
-		throw RelpipeXMLWriterException(L"not yet implemented");
+		output << escapeXmlText(text);
 	}
 
-	void writeComment(const wstring& text) {
-		throw RelpipeXMLWriterException(L"not yet implemented");
+	void writeComment(const wstring& text, bool addSpaces = true) {
+		output << addSpaces ? "<!-- " : "<!--";
+		output << escapeComment(text);
+		output << addSpaces ? " -->" : "-->";
 	}
 
 };