src/INICommand.cpp
author František Kučera <franta-hg@frantovo.cz>
Wed, 29 Sep 2021 00:54:55 +0200
branchv_0
changeset 35 930f17f16fd7
parent 33 3b81fbeb5f3b
permissions -rw-r--r--
partially implement --parser-option allow-line-continuation-with-space and java-manifest-mf dialect (MANIFEST.MF)

/**
 * Relational pipes
 * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#include <cstdlib>
#include <vector>
#include <sstream>
#include <memory>

#include <relpipe/writer/RelationalWriter.h>
#include <relpipe/writer/RelpipeWriterException.h>
#include <relpipe/writer/AttributeMetadata.h>
#include <relpipe/common/type/typedefs.h>

#include <relpipe/cli/CLI.h>

#include "INICommand.h"
#include "lib/uri.h"
#include "lib/INIReader.h"
#include "lib/BasicUnescapingProcessor.h"
#include "lib/BackspaceUnescapingProcessor.h"
#include "lib/JavaPropertiesUnescapingProcessor.h"
#include "lib/JavaPropertiesDialect.h"
#include "lib/JavaManifestMFDialect.h"

using namespace std;
using namespace relpipe::writer;
using namespace relpipe::in::ini::lib;

namespace relpipe {
namespace in {
namespace ini {

class FlatINIContentHandler : public INIContentHandler {
private:
	wstring_convert < codecvt_utf8<wchar_t>> convertor; // INI parser works with UTF-8
	std::shared_ptr<writer::RelationalWriter> writer;
	Configuration& configuration;
	std::vector<std::string> currentSection;
	bool inDocument = false;

	std::string getCurrentSectionFullName() {
		std::stringstream result;

		// TODO: configurable hierarchy delimiter
		// TODO: escape delimiter characters that are part of the section names
		// TODO: hierarchical sections should be returned as an array/sequence when such data type is supported in the Relational pipes format and API
		for (int i = 0; i < currentSection.size(); i++) {
			if (i > 0)result << "/";
			result << currentSection[i];
		}

		return result.str();
	}

	class Record {
	public:
		relpipe::common::type::Integer lineNumber = -1;
		relpipe::common::type::Integer eventNumber = -1;
		std::string key;
		std::string subKey;
		std::string value;
		std::string comment;
		std::string whitespace;

		Record(const Event * const event) : lineNumber(event->lineNumber), eventNumber(event->eventNumber) {
		}

	};

	void write(const Record& record) {
		if (configuration.enableLineNumbers) writer->writeAttribute(&record.lineNumber, typeid (record.lineNumber));
		if (configuration.enableEventNumbers) writer->writeAttribute(&record.eventNumber, typeid (record.eventNumber));

		std::string section = getCurrentSectionFullName();
		std::string key = record.key;
		if (configuration.enableSections) writer->writeAttribute(convertor.from_bytes(section));
		else if (section.size()) key = section + "/" + record.key;
		writer->writeAttribute(convertor.from_bytes(key));
		if (configuration.enableSubKeys) writer->writeAttribute(convertor.from_bytes(record.subKey));

		writer->writeAttribute(convertor.from_bytes(record.value));
		if (configuration.enableComments) writer->writeAttribute(convertor.from_bytes(record.comment));
		if (configuration.enableWhitespace) writer->writeAttribute(convertor.from_bytes(record.whitespace));
	}

public:

	FlatINIContentHandler(std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) : writer(writer), configuration(configuration) {
	}

	virtual ~FlatINIContentHandler() {
	}

	void startDocument() override {
		if (inDocument) throw std::out_of_range("Lunatic INI parser tried to start a document without ending the previous one.");
		inDocument = true;
		vector<AttributeMetadata> metadata;
		if (configuration.enableLineNumbers) metadata.push_back({L"line", TypeId::INTEGER});
		if (configuration.enableEventNumbers) metadata.push_back({L"event", TypeId::INTEGER});
		if (configuration.enableSections) metadata.push_back({L"section", TypeId::STRING});
		metadata.push_back({L"key", TypeId::STRING});
		if (configuration.enableSubKeys) metadata.push_back({L"sub_key", TypeId::STRING});
		metadata.push_back({L"value", TypeId::STRING});
		if (configuration.enableComments) metadata.push_back({L"comment", TypeId::STRING});
		if (configuration.enableWhitespace) metadata.push_back({L"whitespace", TypeId::STRING});
		writer->startRelation(configuration.relation, metadata, true);
	};

	void endDocument() override {
		if (!inDocument) throw std::out_of_range("Lunatic INI parser tried to end a document without starting it before.");
		inDocument = false;
		currentSection.clear();
	};

	void startSection(const SectionStartEvent& event) override {
		if (!inDocument) throw std::out_of_range("Lunatic INI parser tried to start a section without starting a document.");
		currentSection.push_back(event.name);

		if (configuration.enableComments && event.comment.size()) {
			Record record(&event);
			record.comment = event.comment;
			write(record);
		}
	};

	void endSection() override {
		if (currentSection.empty()) throw std::out_of_range("Lunatic INI parser tried to end a section without starting it before.");
		currentSection.pop_back();
	};

	void entry(const EntryEvent& event) override {
		if (!inDocument) throw std::out_of_range("Lunatic INI parser tried to emit an entry without starting a document.");
		Record record(&event);
		record.comment = event.comment;
		record.key = configuration.enableSubKeys ? event.key : event.fullKey;
		record.subKey = event.subKey;
		record.value = event.value;
		write(record);
	};

	void comment(const CommentEvent& event) override {
		if (!inDocument) throw std::out_of_range("Lunatic INI parser tried to emit a comment without starting a document.");
		if (configuration.enableComments) {
			Record record(&event);
			record.comment = event.comment;
			write(record);
		}
	}

	void whitespace(const WhitespaceEvent& event) override {
		if (!inDocument) throw std::out_of_range("Lunatic INI parser tried to emit a whitespace without starting a document.");
		if (configuration.enableWhitespace) {
			Record record(&event);
			record.whitespace = event.whitespace;
			write(record);
		}
	}

};

void INICommand::process(std::istream& input, std::shared_ptr<writer::RelationalWriter> writer, Configuration& configuration) {
	FlatINIContentHandler handler(writer, configuration);
	std::shared_ptr<INIReader> reader(INIReader::create(input));
	reader->addUnescapingProcessor(std::make_shared<BasicUnescapingProcessor>(), unescaping::Basic, true);
	reader->addUnescapingProcessor(std::make_shared<JavaPropertiesUnescapingProcessor>(), unescaping::JavaProperties, false);
	reader->addUnescapingProcessor(std::make_shared<BackspaceUnescapingProcessor>(), unescaping::Backspace, true);
	reader->addDialect(std::make_shared<JavaPropertiesDialect>(), dialect::JavaProperties, false);
	reader->addDialect(std::make_shared<JavaManifestMFDialect>(), dialect::JavaManifestMF, false);
	reader->addHandler(&handler);
	// TODO: smart pointers vs. references: are we going to call addUnescapingProcessor() dynamically/conditionally or share instances? Then pointers will be better.
	for (ParserOptionRecipe option : configuration.parserOptions) reader->setOption(convertor.to_bytes(option.uri), convertor.to_bytes(option.value));
	reader->process();

}

}
}
}