relpipe/relpipe-out-ini.cpp: src/INIWriter.h@bee7acb57330


/**
 * Relational pipes
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <string>
#include <sstream>
#include <regex>

#include <relpipe/common/type/typedefs.h>
#include <relpipe/reader/RelpipeReaderException.h>

#include "uri.h"
#include "Dialect.h"
#include "EscapingProcessor.h"

namespace relpipe {
namespace out {
namespace ini {

class INIWriter {
private:
	std::ostream& output;
	std::wstring_convert<std::codecvt_utf8<wchar_t>> convertor; // TODO: local system encoding or generate INI always in UTF-8 like XML?

	std::string keyValueSeparator = " = ";
	std::string commentSeparatorForSections = " ; ";
	std::string commentSeparatorForEntries = " ; ";
	std::string commentSeparatorStandalone = "; ";

	relpipe::common::type::StringX hierarchySeparator = L"/";
	bool allowSections = true;

	bool hasContent = false;

	std::vector<relpipe::common::type::StringX> currentSection;

	relpipe::common::type::StringX getCurrentSectionFullName() {
		std::wstringstream result;

		// TODO: configurable hierarchy delimiter
		// TODO: escape delimiter characters that are part of the section names
		for (int i = 0; i < currentSection.size(); i++) {
			if (i > 0)result << hierarchySeparator;
			result << currentSection[i];
		}

		return result.str();
	}

	/**
	 * TODO: use a common method
	 */
	bool parseBoolean(const relpipe::common::type::StringX& value) {
		if (value == L"true") return true;
		else if (value == L"false") return false;
		else throw relpipe::reader::RelpipeReaderException(L"Unable to parse boolean value: " + value + L" (expecting true or false)");
	}

	EscapingProcessor::QuotingType parseQuotingType(const relpipe::common::type::StringX& value) {
		if (value == quoting::None) return EscapingProcessor::QuotingType::None;
		else if (value == quoting::Apostrophes) return EscapingProcessor::QuotingType::Apostrophes;
		else if (value == quoting::Quotes) return EscapingProcessor::QuotingType::Quotes;
		else throw relpipe::reader::RelpipeReaderException(L"Unsupported quoting type:" + value);
	}

	class QuotingRule {
	public:
		EscapingProcessor::QuotingType type = EscapingProcessor::QuotingType::None;
		relpipe::common::type::StringX pattern;

		EscapingProcessor::QuotingType findEffectiveQuotingType(const relpipe::common::type::StringX& value) {
			if (type == EscapingProcessor::QuotingType::None) return type;
			else if (pattern.empty()) return type;
			else return std::regex_match(value, std::wregex(pattern)) ? type : EscapingProcessor::QuotingType::None;
		}
	};

	QuotingRule quotingForSections;
	QuotingRule quotingForKeys;
	QuotingRule quotingForValues;

	std::string escape(const relpipe::common::type::StringX& value, EscapingProcessor::TextType type) {
		relpipe::common::type::StringX result = value;

		EscapingProcessor::QuotingType quotingType;
		if (type == EscapingProcessor::TextType::SectionName || type == EscapingProcessor::TextType::SectionTag) quotingType = quotingForSections.findEffectiveQuotingType(value);
		else if (type == EscapingProcessor::TextType::EntryKey || type == EscapingProcessor::TextType::EntrySubKey) quotingType = quotingForKeys.findEffectiveQuotingType(value);
		else if (type == EscapingProcessor::TextType::EntryValue) quotingType = quotingForValues.findEffectiveQuotingType(value);
		else quotingType = EscapingProcessor::QuotingType::None;

		for (ConfiguredEscapingProcessor p : escapingProcessors) if (p.enbaled) result = p.processor->escape(result, type, quotingType);

		if (quotingType == EscapingProcessor::QuotingType::Quotes) result = L"\"" + result + L"\"";
		else if (quotingType == EscapingProcessor::QuotingType::Apostrophes) result = L"'" + result + L"'";

		return convertor.to_bytes(result);
	}

	class ConfiguredEscapingProcessor {
	public:
		std::shared_ptr<EscapingProcessor> processor;
		const relpipe::common::type::StringX uri;
		bool enbaled;

		ConfiguredEscapingProcessor(std::shared_ptr<EscapingProcessor> processor, const relpipe::common::type::StringX uri, bool enbaled) : processor(processor), uri(uri), enbaled(enbaled) {
		}

	};

	std::vector<ConfiguredEscapingProcessor> escapingProcessors;

	bool setEscaping(const relpipe::common::type::StringX& uri, const relpipe::common::type::StringX& value) {
		for (ConfiguredEscapingProcessor& p : escapingProcessors) {
			if (p.uri == uri) {
				p.enbaled = parseBoolean(value);
				return true;
			}
		}
		return false;
	}

	class ConfiguredDialect {
	public:
		std::shared_ptr<Dialect> dialect;
		const relpipe::common::type::StringX uri;

		ConfiguredDialect(std::shared_ptr<Dialect> dialect, const relpipe::common::type::StringX uri) : dialect(dialect), uri(uri) {
		}

	};

	std::vector<ConfiguredDialect> dialects;

	void setDialect(const relpipe::common::type::StringX& uri) {
		for (ConfiguredDialect& d : dialects) {
			if (d.uri == uri) {
				d.dialect->apply(*this);
				return;
			}
		}
		throw relpipe::reader::RelpipeReaderException(L"Unsupported INI dialect: " + uri);
	}

public:

	class SectionStartEvent {
	public:
		relpipe::common::type::StringX comment;
		relpipe::common::type::StringX name;
		relpipe::common::type::StringX tag;
	};

	class EntryEvent {
	public:
		relpipe::common::type::StringX comment;
		relpipe::common::type::StringX key;
		relpipe::common::type::StringX subKey;
		relpipe::common::type::StringX value;
	};

	class CommentEvent {
	public:
		relpipe::common::type::StringX comment;
	};

	class WhitespaceEvent {
	public:
		relpipe::common::type::StringX whitespace;
	};

	INIWriter(std::ostream& output) : output(output) {
	}

	virtual ~INIWriter() {
	};

	void setOption(relpipe::common::type::StringX uri, relpipe::common::type::StringX value) {
		if (uri == option::Dialect) setDialect(value);
		else if (uri == option::CommentSeparatorForSections) commentSeparatorForSections = convertor.to_bytes(value);
		else if (uri == option::CommentSeparatorForEntries) commentSeparatorForEntries = convertor.to_bytes(value);
		else if (uri == option::CommentSeparatorStandalone) commentSeparatorStandalone = convertor.to_bytes(value);
		else if (uri == option::KeyValueSeparator) keyValueSeparator = convertor.to_bytes(value);
		else if (uri == option::HierarchySeparator) hierarchySeparator = value;
		else if (uri == option::AllowSections) allowSections = parseBoolean(value);
		else if (uri == option::QuotesTypeForSections) quotingForSections.type = parseQuotingType(value);
		else if (uri == option::QuotesTypeForKeys) quotingForKeys.type = parseQuotingType(value);
		else if (uri == option::QuotesTypeForValues) quotingForValues.type = parseQuotingType(value);
		else if (uri == option::QuotesPatternForSections) quotingForSections.pattern = value;
		else if (uri == option::QuotesPatternForKeys) quotingForKeys.pattern = value;
		else if (uri == option::QuotesPatternForValues) quotingForValues.pattern = value;
		else if (setEscaping(uri, value));
		else throw relpipe::reader::RelpipeReaderException(L"Unsupported writer option: " + uri);
	}

	void addDialect(std::shared_ptr<Dialect> dialect, const relpipe::common::type::StringX uri, bool enabledByDefault) {
		dialects.push_back({dialect, uri});
		if (enabledByDefault) dialect->apply(*this);
	}

	void addEscapingProcessor(std::shared_ptr<EscapingProcessor> processor, const relpipe::common::type::StringX uri, bool enabledByDefault) {
		escapingProcessors.push_back({processor, uri, enabledByDefault});
	}

	void startDocument() {
	}

	void endDocument() {
		if (currentSection.size()) throw relpipe::reader::RelpipeReaderException(L"There are still " + std::to_wstring(currentSection.size()) + L" open sections during the endDocument() call. Need to call endSection() before.");
		output.flush();
	}

	void startSection(const SectionStartEvent& event) {
		currentSection.push_back(event.name);

		if (allowSections) {
			if (hasContent) output << std::endl;
			output << "[" << escape(getCurrentSectionFullName(), EscapingProcessor::TextType::SectionName) << "]";
			if (event.tag.size()) output << "[" << escape(event.tag, EscapingProcessor::TextType::SectionTag) << "]";
			if (event.comment.size()) output << commentSeparatorForSections << escape(event.comment, EscapingProcessor::TextType::SectionComment);
			output << std::endl;
		}

		hasContent = true;
	}

	void endSection() {
		currentSection.pop_back();
		output.flush();
	}

	void entry(const EntryEvent& event) {
		// TODO: escape/quote parts separately + configurable quoting of parts or whole + the same for sub keys
		if (!allowSections && currentSection.size()) output << escape(getCurrentSectionFullName(), EscapingProcessor::TextType::EntryKey) << convertor.to_bytes(hierarchySeparator);
		output << escape(event.key, EscapingProcessor::TextType::EntryKey);
		if (event.subKey.size()) output << "[" << escape(event.subKey, EscapingProcessor::TextType::EntrySubKey) << "]";
		output << keyValueSeparator << escape(event.value, EscapingProcessor::TextType::EntryValue);
		if (event.comment.size()) output << commentSeparatorForEntries << escape(event.comment, EscapingProcessor::TextType::EntryComment);
		output << std::endl;
		hasContent = true;
	}

	void comment(const CommentEvent& event) {
		output << commentSeparatorStandalone << escape(event.comment, EscapingProcessor::TextType::StandaloneComment);
		output << std::endl;
		hasContent = true;
	}

	void whitespace(const WhitespaceEvent& event) {
		for (wchar_t ch : event.whitespace) {
			if (ch == L' ') output << " ";
			else if (ch == L'\t') output << "\t";
			else if (ch == L'\n') output << "\n";
			else if (ch == L'\r'); // TODO: keep CR?
			else; // TODO: throw exception if whitespace contains unexpected data? (should not happen)
		}
		hasContent = true;
	}



};

}
}
}
author	František Kučera <franta-hg@frantovo.cz>
	Sat, 12 Dec 2020 19:52:38 +0100
branch	v_0
changeset 5	bee7acb57330
parent 4	372b161669e4
child 8	7c85dc9a310b
permissions	-rw-r--r--