include/relpipe/xmlwriter/XMLWriter.h
author František Kučera <franta-hg@frantovo.cz>
Mon, 08 Apr 2019 13:37:14 +0200
branchv_0
changeset 8 60183af4604e
parent 3 edbebc6163e3
child 11 56c84049698d
permissions -rw-r--r--
Added tag v0.11 for changeset 5201d93f8bdd

/**
 * Relational pipes
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <codecvt>
#include <locale>

#include "RelpipeXMLWriterException.h"

using namespace std;

namespace relpipe {
namespace xmlwriter {

/**
 * A simple library for writing XML streams.
 * 
 * Does:
 *  - convert wstring values to UTF-8
 *  - escape values of text nodes and attributes
 *  - indent elements
 *  - preserve whitespace of text nodes
 *  - in comments: replace - with –
 * Does not:
 *  - understand XML namespaces:
 *     - handles xmlns:x="y" as ordinary attributes
 *     - thus can not check whether elements/attributes belong to an declared namespace
 *     - thus can generate invalid XML – namespaces and their prefixes must be checked on the caller side
 */
class XMLWriter {
private:
	const char* INDENT = "\t";

	ostream& output;
	wstring_convert<codecvt_utf8<wchar_t>> convertor; // XML output will be always in UTF-8
	vector<wstring> treePosition;
	bool lastWasTextNode = false;

	const string escapeXmlText(const wstring& value) {
		// TODO: really bad performance → rewrite
		// 72 % of whole relpipe-out-xml according to valgrind/callgrind
		// Moved here from XmlHandler.h (relpipe-out-xml).
		std::wstringstream result;

		for (auto & ch : value) {
			switch (ch) {
				case L'&': result << L"&amp;";
					break;
				case L'<': result << L"&lt;";
					break;
				case L'>': result << L"&gt;";
					break;
				case L'\'': result << L"&apos;"; // TODO: escape ' and " only in attributes
					break;
				case L'"': result << L"&quot;"; // TODO: escape ' and " only in attributes
					break;
				default: result << ch;
			}
		}

		return convertor.to_bytes(result.str());
	}

	const string escapeXmlAttribute(const wstring& value) {
		// TODO: escaping of ' is not necessary.
		return escapeXmlText(value);
	}

	const string escapeComment(const wstring& value) {
		// TODO: really bad performance → rewrite
		// 72 % of whole relpipe-out-xml according to valgrind/callgrind
		std::wstringstream result;

		for (auto & ch : value) {
			switch (ch) {
				case L'-': result << L"–";
					break;
				default: result << ch;
			}
		}

		return convertor.to_bytes(result.str());
	}

	void checkName(const wstring& name) {
		// FIXME: throw exception on invalid names
	}

	void writeAttributes(const vector<wstring>& attributes) {
		if (attributes.size() % 2) throw RelpipeXMLWriterException(L"attributes vector must contain name/value pairs (even number of elements)");
		for (int i = 0; i < attributes.size(); i = i + 2) {
			wstring name = attributes[i];
			wstring value = attributes[i + 1];
			checkName(name);
			output << " " << convertor.to_bytes(name) << "=\"" << escapeXmlAttribute(value) << "\"";
		}
	}

	void writeIndentation(bool resetState = true) {
		if (lastWasTextNode) {
			if (resetState) lastWasTextNode = false;
		} else {
			output << endl;
			for (int i = 0; i < treePosition.size(); i++) {
				output << INDENT;
			}
		}
	}

public:

	XMLWriter(std::ostream& output) : output(output) {
		output << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
	}

	virtual ~XMLWriter() {
		output << endl;
		output.flush();
	}

	void writeStartElement(const wstring& name, const vector<wstring>& attributes = {}) {
		checkName(name);
		writeIndentation();
		treePosition.push_back(name);
		output << "<" << convertor.to_bytes(name);
		writeAttributes(attributes);
		output << ">";
	}

	void writeEndElement() {
		if (treePosition.empty()) throw RelpipeXMLWriterException(L"unable to close element – all elements are already closed");
		wstring name = treePosition.back();
		treePosition.pop_back();
		writeIndentation();
		output << "</" << convertor.to_bytes(name) << ">";
	}

	void writeEmptyElement(const wstring& name, const vector<wstring>& attributes = {}) {
		checkName(name);
		writeIndentation();
		output << "<" << convertor.to_bytes(name);
		writeAttributes(attributes);
		output << "/>";

	}

	void writeTextElement(const wstring& name, const vector<wstring>& attributes, const wstring& text) {
		writeStartElement(name, attributes);
		writeCharacters(text);
		writeEndElement();
	}

	void writeCharacters(const wstring& text) {
		output << escapeXmlText(text);
		lastWasTextNode = true;
	}

	void writeComment(const wstring& text, bool addSpaces = true) {
		writeIndentation(false);
		output << (addSpaces ? "<!-- " : "<!--");
		output << escapeComment(text);
		output << (addSpaces ? " -->" : "-->");
	}

};

}
}