# HG changeset patch # User František Kučera # Date 1607727717 -3600 # Node ID ae8775e0bc7a7323020702405840b146699e5eef # Parent e753a7f967c8a6afb6054160a70b8a0dc9fcdb03 configurable/modular dialects and escaping diff -r e753a7f967c8 -r ae8775e0bc7a bash-completion.sh --- a/bash-completion.sh Fri Dec 11 12:34:42 2020 +0100 +++ b/bash-completion.sh Sat Dec 12 00:01:57 2020 +0100 @@ -37,6 +37,7 @@ DIALECTS=( "java-properties" + "java-manifest-mf" ); WRITER_OPTIONS=( diff -r e753a7f967c8 -r ae8775e0bc7a src/BasicEscapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/BasicEscapingProcessor.h Sat Dec 12 00:01:57 2020 +0100 @@ -0,0 +1,54 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include + +#include + +#include "EscapingProcessor.h" + + +namespace relpipe { +namespace out { +namespace ini { + +class BasicEscapingProcessor : public EscapingProcessor { +private: +public: + + relpipe::common::type::StringX escape(const relpipe::common::type::StringX& s, const TextType textType, const QuotingType quotingType) override { + std::wstringstream result; + + for (auto ch : s) { + if (ch == L'\\') result.put(ESC).put(ESC); + else if (ch == L'\n') result.put(ESC).put(L'n'); + else if (ch == L'\r'); + else if (ch == L'\t') result.put(ESC).put(L't'); + else if (ch == L'"' && quotingType != QuotingType::Apostrophes) result.put(ESC).put(ch); + else if (ch == L'\'' && quotingType != QuotingType::Quotes) result.put(ESC).put(ch); + else result.put(ch); + } + + return result.str(); + } + +}; + +} +} +} diff -r e753a7f967c8 -r ae8775e0bc7a src/Dialect.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Dialect.h Sat Dec 12 00:01:57 2020 +0100 @@ -0,0 +1,44 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include "INIWriter.h" + +namespace relpipe { +namespace out { +namespace ini { + +class INIWriter; + +class Dialect { +public: + /** + * Configure the writer by calling its setOption() method. + * + * The dialect may be also derived from another dialect (extend or modify it), + * but obviously there might be no cyclic dependencies among them. + */ + virtual void apply(INIWriter& writer) = 0; + + virtual ~Dialect() { + } + +}; + +} +} +} diff -r e753a7f967c8 -r ae8775e0bc7a src/EscapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/EscapingProcessor.h Sat Dec 12 00:01:57 2020 +0100 @@ -0,0 +1,60 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include + +#include + + +namespace relpipe { +namespace out { +namespace ini { + +class EscapingProcessor { +private: +protected: + const char ESC = '\\'; + +public: + + enum class TextType { + SectionName, + SectionTag, + SectionComment, + EntryKey, + EntrySubKey, + EntryValue, + EntryComment, + StandaloneComment, + }; + + enum class QuotingType { + None, + Quotes, + Apostrophes, + }; + + virtual relpipe::common::type::StringX escape(const relpipe::common::type::StringX& s, const TextType textType, const QuotingType quotingType) = 0; + + virtual ~EscapingProcessor() { + } +}; + +} +} +} diff -r e753a7f967c8 -r ae8775e0bc7a src/INIDispatchHandler.h --- a/src/INIDispatchHandler.h Fri Dec 11 12:34:42 2020 +0100 +++ b/src/INIDispatchHandler.h Sat Dec 12 00:01:57 2020 +0100 @@ -36,6 +36,11 @@ #include "INIStandardHandler.h" #include "INILiteralHandler.h" #include "INISectionFirstHandler.h" +#include "JavaPropertiesDialect.h" +#include "JavaManifestMFDialect.h" +#include "JavaPropertiesEscapingProcessor.h" +#include "BasicEscapingProcessor.h" +#include "uri.h" namespace relpipe { namespace out { @@ -80,6 +85,10 @@ public: INIDispatchHandler(std::ostream& output, Configuration& configuration) : output(output), writer(output), configuration(configuration) { + writer.addDialect(std::make_shared(), dialect::JavaProperties, false); + writer.addDialect(std::make_shared(), dialect::JavaManifestMF, false); + writer.addEscapingProcessor(std::make_shared(), escaping::Basic, true); + writer.addEscapingProcessor(std::make_shared(), escaping::JavaProperties, false); for (auto o : configuration.writerOptions) writer.setOption(o.uri, o.value); } diff -r e753a7f967c8 -r ae8775e0bc7a src/INIWriter.h --- a/src/INIWriter.h Fri Dec 11 12:34:42 2020 +0100 +++ b/src/INIWriter.h Sat Dec 12 00:01:57 2020 +0100 @@ -22,6 +22,10 @@ #include #include +#include "uri.h" +#include "Dialect.h" +#include "EscapingProcessor.h" + namespace relpipe { namespace out { namespace ini { @@ -35,34 +39,68 @@ std::string commentSeparatorForSections = " ; "; std::string commentSeparatorForEntries = " ; "; std::string commentSeparatorStandalone = "; "; - + bool hasContent = false; - enum class TokenType { - SectionName, - SectionTag, - SectionComment, - EntryKey, - EntrySubKey, - EntryValue, - EntryComment, - StandaloneComment, + /** + * TODO: use a common method + */ + bool parseBoolean(const relpipe::common::type::StringX& value) { + if (value == L"true") return true; + else if (value == L"false") return false; + else throw relpipe::reader::RelpipeReaderException(L"Unable to parse boolean value: " + value + L" (expecting true or false)"); + } + + std::string escape(const relpipe::common::type::StringX& value, EscapingProcessor::TextType type) { + relpipe::common::type::StringX result = value; + EscapingProcessor::QuotingType quotingType = EscapingProcessor::QuotingType::None; + for (ConfiguredEscapingProcessor p : escapingProcessors) if (p.enbaled) result = p.processor->escape(result, type, quotingType); + return convertor.to_bytes(result); + } + + class ConfiguredEscapingProcessor { + public: + std::shared_ptr processor; + const relpipe::common::type::StringX uri; + bool enbaled; + + ConfiguredEscapingProcessor(std::shared_ptr processor, const relpipe::common::type::StringX uri, bool enbaled) : processor(processor), uri(uri), enbaled(enbaled) { + } + }; - std::string escape(TokenType type, relpipe::common::type::StringX value) { - std::wstringstream result; + std::vector escapingProcessors; - for (wchar_t ch : value) { - if (ch == L'\\') result << "\\\\"; - else if (ch == L'\n') result << L"\\n"; - else if (ch == L'\r') result << L"\\r"; - else if (ch == L'\t') result << L"\\t"; - else if (ch == L'"') result << "\\\""; - else result.put(ch); + bool setEscaping(const relpipe::common::type::StringX& uri, const relpipe::common::type::StringX& value) { + for (ConfiguredEscapingProcessor& p : escapingProcessors) { + if (p.uri == uri) { + p.enbaled = parseBoolean(value); + return true; + } + } + return false; + } + + class ConfiguredDialect { + public: + std::shared_ptr dialect; + const relpipe::common::type::StringX uri; + + ConfiguredDialect(std::shared_ptr dialect, const relpipe::common::type::StringX uri) : dialect(dialect), uri(uri) { } - // TODO: modular escaping (like unescaping in relpipe-in-ini) - return convertor.to_bytes(result.str()); + }; + + std::vector dialects; + + void setDialect(const relpipe::common::type::StringX& uri) { + for (ConfiguredDialect& d : dialects) { + if (d.uri == uri) { + d.dialect->apply(*this); + return; + } + } + throw relpipe::reader::RelpipeReaderException(L"Unsupported INI dialect: " + uri); } public: @@ -99,18 +137,27 @@ }; void setOption(relpipe::common::type::StringX uri, relpipe::common::type::StringX value) { - // TODO: setOption() - if (uri == L"dialect"); - else if (uri == L"comment-separator-for-sections") commentSeparatorForSections = convertor.to_bytes(value); - else if (uri == L"comment-separator-for-entries") commentSeparatorForEntries = convertor.to_bytes(value); - else if (uri == L"comment-separator-standalone") commentSeparatorStandalone = convertor.to_bytes(value); - else if (uri == L"key-value-separator") keyValueSeparator = convertor.to_bytes(value); - else if (uri == L"escape-backspace"); - else if (uri == L"escape-basic"); - else if (uri == L"escape-java-properties"); + // TODO: setOption(): escaping, quotes, allow-sections + if (uri == option::Dialect) setDialect(value); + else if (uri == option::CommentSeparatorForSections) commentSeparatorForSections = convertor.to_bytes(value); + else if (uri == option::CommentSeparatorForEntries) commentSeparatorForEntries = convertor.to_bytes(value); + else if (uri == option::CommentSeparatorStandalone) commentSeparatorStandalone = convertor.to_bytes(value); + else if (uri == option::KeyValueSeparator) keyValueSeparator = convertor.to_bytes(value); + else if (uri == option::AllowSections); + else if (uri == option::Quotes); + else if (setEscaping(uri, value)); else throw relpipe::reader::RelpipeReaderException(L"Unsupported writer option: " + uri); } + void addDialect(std::shared_ptr dialect, const relpipe::common::type::StringX uri, bool enabledByDefault) { + dialects.push_back({dialect, uri}); + if (enabledByDefault) dialect->apply(*this); + } + + void addEscapingProcessor(std::shared_ptr processor, const relpipe::common::type::StringX uri, bool enabledByDefault) { + escapingProcessors.push_back({processor, uri, enabledByDefault}); + } + void startDocument() { } @@ -120,9 +167,9 @@ void startSection(const SectionStartEvent& event) { if (hasContent) output << std::endl; - output << "[" << escape(TokenType::SectionName, event.name) << "]"; - if (event.tag.size()) output << "[" << escape(TokenType::SectionTag, event.tag) << "]"; - if (event.comment.size()) output << commentSeparatorForSections << escape(TokenType::SectionComment, event.comment); + output << "[" << escape(event.name, EscapingProcessor::TextType::SectionName) << "]"; + if (event.tag.size()) output << "[" << escape(event.tag, EscapingProcessor::TextType::SectionTag) << "]"; + if (event.comment.size()) output << commentSeparatorForSections << escape(event.comment, EscapingProcessor::TextType::SectionComment); output << std::endl; hasContent = true; } @@ -132,16 +179,16 @@ } void entry(const EntryEvent& event) { - output << escape(TokenType::EntryKey, event.key); - if (event.subKey.size()) output << "[" << escape(TokenType::EntrySubKey, event.subKey) << "]"; - output << keyValueSeparator << escape(TokenType::EntryValue, event.value); - if (event.comment.size()) output << commentSeparatorForEntries << escape(TokenType::EntryComment, event.comment); + output << escape(event.key, EscapingProcessor::TextType::EntryKey); + if (event.subKey.size()) output << "[" << escape(event.subKey, EscapingProcessor::TextType::EntrySubKey) << "]"; + output << keyValueSeparator << escape(event.value, EscapingProcessor::TextType::EntryValue); + if (event.comment.size()) output << commentSeparatorForEntries << escape(event.comment, EscapingProcessor::TextType::EntryComment); output << std::endl; hasContent = true; } void comment(const CommentEvent& event) { - output << commentSeparatorStandalone << escape(TokenType::StandaloneComment, event.comment); + output << commentSeparatorStandalone << escape(event.comment, EscapingProcessor::TextType::StandaloneComment); output << std::endl; hasContent = true; } diff -r e753a7f967c8 -r ae8775e0bc7a src/JavaManifestMFDialect.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/JavaManifestMFDialect.h Sat Dec 12 00:01:57 2020 +0100 @@ -0,0 +1,40 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include "Dialect.h" +#include "uri.h" + +namespace relpipe { +namespace out { +namespace ini { + +class INIWriter; + +class JavaManifestMFDialect : public Dialect { +public: + + void apply(INIWriter& writer) override { + writer.setOption(option::Dialect, dialect::JavaProperties); + writer.setOption(option::KeyValueSeparator, L": "); + } + +}; + +} +} +} diff -r e753a7f967c8 -r ae8775e0bc7a src/JavaPropertiesDialect.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/JavaPropertiesDialect.h Sat Dec 12 00:01:57 2020 +0100 @@ -0,0 +1,46 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include "Dialect.h" +#include "uri.h" + +namespace relpipe { +namespace out { +namespace ini { + +class INIWriter; + +class JavaPropertiesDialect : public Dialect { +public: + + void apply(INIWriter& writer) override { + writer.setOption(option::AllowSections, L"false"); + writer.setOption(option::Quotes, L""); + writer.setOption(option::KeyValueSeparator, L"="); + writer.setOption(option::CommentSeparatorForSections, L""); + writer.setOption(option::CommentSeparatorForEntries, L""); + writer.setOption(option::CommentSeparatorStandalone, L"# "); + // writer.setOption(escaping::Basic, L"false"); // TODO: basic vs. .properties + writer.setOption(escaping::JavaProperties, L"true"); + } + +}; + +} +} +} diff -r e753a7f967c8 -r ae8775e0bc7a src/JavaPropertiesEscapingProcessor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/JavaPropertiesEscapingProcessor.h Sat Dec 12 00:01:57 2020 +0100 @@ -0,0 +1,51 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include + +#include + +#include "EscapingProcessor.h" + + +namespace relpipe { +namespace out { +namespace ini { + +class JavaPropertiesEscapingProcessor : public EscapingProcessor { +private: +public: + + relpipe::common::type::StringX escape(const relpipe::common::type::StringX& s, const TextType textType, const QuotingType quotingType) override { + std::wstringstream result; + + for (auto ch : s) { + if (ch == L'\\') result.put(ESC).put(ESC); + else if (ch == L'\n') result.put(ESC).put(L'n'); + // TODO: escape unicode + else result.put(ch); + } + + return result.str(); + } + +}; + +} +} +} diff -r e753a7f967c8 -r ae8775e0bc7a src/uri.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/uri.h Sat Dec 12 00:01:57 2020 +0100 @@ -0,0 +1,53 @@ +/** + * Relational pipes + * Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info; + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +namespace relpipe { +namespace out { +namespace ini { + +// TODO: these strings will become globally unique URIs (or IRIs) after moving to alt2xml and relative/unprefixed names should also work + +/** general options of the INI writer */ +namespace option { +static const wchar_t* AllowSections = L"allow-sections"; +// static const wchar_t* AllowSectionTags = L"allow-section-tags"; +// static const wchar_t* AllowSubKeys = L"allow-sub-keys"; +static const wchar_t* CommentSeparatorForSections = L"comment-separator-for-sections"; +static const wchar_t* CommentSeparatorForEntries = L"comment-separator-for-entries"; +static const wchar_t* CommentSeparatorStandalone = L"comment-separator-standalone"; +static const wchar_t* KeyValueSeparator = L"key-value-separator"; +static const wchar_t* Quotes = L"quotes"; +static const wchar_t* Dialect = L"dialect"; +} + +/** names of dynamically registered escaping processors; they are also options */ +namespace escaping { +static const wchar_t* Basic = L"escape-basic"; +static const wchar_t* JavaProperties = L"escape-java-properties"; +static const wchar_t* Backspace = L"escape-backspace"; +} + +/** not options but a values of the dialect option */ +namespace dialect { +static const wchar_t* JavaProperties = L"java-properties"; +static const wchar_t* JavaManifestMF = L"java-manifest-mf"; +} + +} +} +}