configuration: --parser-option encoding, parse-encapsulated, root-name, tree-style, tree-with-namespaces v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Thu, 22 Jul 2021 20:01:03 +0200
branchv_0
changeset 40 85b6f13f1088
parent 39 6ef41443211e
child 41 12acb6c02d32
configuration: --parser-option encoding, parse-encapsulated, root-name, tree-style, tree-with-namespaces
bash-completion.sh
src/XMLDocumentConstructor.h
src/lib/ASN1ContentHandler.h
src/lib/ASN1Reader.h
src/lib/BasicASN1Reader.h
src/lib/GenericASN1ContentHandler.h
src/lib/XMLContentHandler.h
src/lib/uri.h
--- a/bash-completion.sh	Thu Jul 22 01:06:14 2021 +0200
+++ b/bash-completion.sh	Thu Jul 22 20:01:03 2021 +0200
@@ -59,6 +59,30 @@
 		"http://docbook.org/ns/docbook"
 	)
 
+	# TODO: introspection: after moving to alt2xml the available options and their values should be provided by the parser
+
+	PARSER_OPTIONS=(
+		"encoding"
+		"parse-encapsulated"
+		"tree-style"
+		"tree-with-namespaces"
+		"root-name"
+	);
+
+	ENCODINGS=(
+		"BER"
+		"DER"
+		"CER"
+		"PER"
+		"XER"
+		"ASN.1"
+	);
+
+	TREE_STYLES=(
+		"standard"
+		"literal"
+	)
+
 
 	if   [[ "$w1" == "--relation"                      && "x$w0" == "x" ]];    then COMPREPLY=("''")
 	elif [[ "$w1" == "--records"                       && "x$w0" == "x" ]];    then COMPREPLY=("'/'")
@@ -77,9 +101,13 @@
 	elif [[ "$w2" == "--raw-xml-attribute-wrapper"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
 	elif [[ "$w3" == "--raw-xml-attribute-wrapper"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
 
-	elif [[ "$w1" == "--parser-option"                                  ]];    then COMPREPLY=("''")
-	elif [[ "$w2" == "--parser-option"                 && "x$w0" == "x" ]];    then COMPREPLY=("''")
-
+	elif [[ "$w1" == "--parser-option"                                                        ]];    then COMPREPLY=($(compgen -W "${PARSER_OPTIONS[*]}" -- "$w0"))
+	elif [[ "$w2" == "--parser-option" && "$w1" == "encoding"                                 ]];    then COMPREPLY=($(compgen -W "${ENCODINGS[*]}" -- "$w0"))
+	elif [[ "$w2" == "--parser-option" && "$w1" == "parse-encapsulated"                       ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+	elif [[ "$w2" == "--parser-option" && "$w1" == "tree-style"                               ]];    then COMPREPLY=($(compgen -W "${TREE_STYLES[*]}" -- "$w0"))
+	elif [[ "$w2" == "--parser-option" && "$w1" == "tree-with-namespaces"                     ]];    then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0"))
+	elif [[ "$w2" == "--parser-option" && "$w1" == "root-name"               && "x$w0" == "x" ]];    then COMPREPLY=("'asn1'")
+	
 	else
 		OPTIONS=(
 			"--namespace"
--- a/src/XMLDocumentConstructor.h	Thu Jul 22 01:06:14 2021 +0200
+++ b/src/XMLDocumentConstructor.h	Thu Jul 22 20:01:03 2021 +0200
@@ -34,24 +34,27 @@
 private:
 	std::istream* input = nullptr;
 	xmlpp::DomParser* parser = nullptr;
+	relpipe::in::asn1::lib::BasicASN1Reader reader;
+	std::shared_ptr<relpipe::in::asn1::lib::GenericASN1ContentHandler> asn1handler;
+	std::shared_ptr<relpipe::in::asn1::lib::DOMBuildingXMLContentHandler> saxHandler;
 public:
 
 	XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) {
+		asn1handler = make_shared<relpipe::in::asn1::lib::GenericASN1ContentHandler>();
+		saxHandler = make_shared<relpipe::in::asn1::lib::DOMBuildingXMLContentHandler>(parser->get_document());
+		asn1handler->addHandler(saxHandler);
+		reader.addHandler(asn1handler);
 	}
 
 	void setOption(const std::string& uri, const std::string& value) {
+		int n = 0;
+		n += reader.setOption(uri, value);
+		n += asn1handler->setOption(uri, value);
+		n += saxHandler->setOption(uri, value);
+		if (n == 0) throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
 	}
 
 	void process() {
-
-		relpipe::in::asn1::lib::BasicASN1Reader reader;
-		std::shared_ptr<relpipe::in::asn1::lib::GenericASN1ContentHandler> asn1handler = make_shared<relpipe::in::asn1::lib::GenericASN1ContentHandler>();
-		std::shared_ptr<relpipe::in::asn1::lib::DOMBuildingXMLContentHandler> saxHandler = make_shared<relpipe::in::asn1::lib::DOMBuildingXMLContentHandler>(parser->get_document());
-
-		asn1handler->addHandler(saxHandler);
-		reader.addHandler(asn1handler);
-
-
 		try {
 			// TODO: buffering? (reader itself also buffers)
 			for (uint8_t b = input->get(); input->good(); b = input->get()) reader.write(&b, 1);
--- a/src/lib/ASN1ContentHandler.h	Thu Jul 22 01:06:14 2021 +0200
+++ b/src/lib/ASN1ContentHandler.h	Thu Jul 22 20:01:03 2021 +0200
@@ -260,6 +260,15 @@
 	// TODO: more metadata, support OID decoding and ASN.1 modules (schema), probably through a plug-in
 	// TODO: support also extension extractor plug-ins? (could decode some opaque structures like octet strings and replace them with nested elements) e.g. subjectAltName in https://datatracker.ietf.org/doc/html/rfc5280#section-4.2.1.6
 
+	/**
+	 * @param uri identifier of the option
+	 * @param value value of the option
+	 * @return whether this option is supported and was applied here
+	 */
+	virtual bool setOption(const std::string& uri, const std::string& value) {
+		return false;
+	}
+
 	virtual void writeStreamStart() = 0;
 	virtual void writeStreamEnd() = 0;
 
--- a/src/lib/ASN1Reader.h	Thu Jul 22 01:06:14 2021 +0200
+++ b/src/lib/ASN1Reader.h	Thu Jul 22 20:01:03 2021 +0200
@@ -34,6 +34,15 @@
 		handlers->addHandler(handler);
 	};
 
+	/**
+	 * @param uri identifier of the option
+	 * @param value value of the option
+	 * @return whether this option is supported and was applied here
+	 */
+	virtual bool setOption(const std::string& uri, const std::string& value) {
+		return false;
+	}
+
 protected:
 	std::shared_ptr<ASN1ContentHandlerProxy> handlers = std::make_shared<ASN1ContentHandlerProxy>();
 };
--- a/src/lib/BasicASN1Reader.h	Thu Jul 22 01:06:14 2021 +0200
+++ b/src/lib/BasicASN1Reader.h	Thu Jul 22 20:01:03 2021 +0200
@@ -24,6 +24,7 @@
 
 #include "ASN1Reader.h"
 #include "ValidatingASN1ContentHandler.h"
+#include "uri.h"
 
 namespace relpipe {
 namespace in {
@@ -38,6 +39,17 @@
 
 	bool started = false;
 
+	bool parseEncapsulated = true;
+
+	/**
+	 * TODO: use a common method
+	 */
+	bool parseBoolean(const std::string& value) {
+		if (value == "true") return true;
+		else if (value == "false") return false;
+		else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
+	}
+
 	class BasicHeader : public ASN1ContentHandler::Header {
 	public:
 		bool definiteLength;
@@ -305,7 +317,7 @@
 	bool processEncapsulatedContent(const BasicHeader& typeHeader, const std::string& input) {
 		// TODO: avoid double parsing + encapsulated content might be also processed at the XML/DOM level where we may even do conditional processing based on XPath (evaluate only certain octet- or bit- strings)
 		// We may also do the same as with SEQUENCE or SET (continue nested reading in this ASN1Rreader instance), but it would require valid encapsulated data and would avoid easy fallback to raw OCTET or BIT STRING. We would also have to check the boundaries of the nested part.
-		if (isValidBER(input)) {
+		if (parseEncapsulated && isValidBER(input)) {
 			handlers->writeCollectionStart(typeHeader);
 
 			BasicASN1Reader encapsulatedReader;
@@ -331,6 +343,20 @@
 
 public:
 
+	bool setOption(const std::string& uri, const std::string& value) override {
+		if (uri == option::Encoding && value == encoding::ber); // currently, we support only BER (and thus also CER and DER) encoding, but options have no actual effect – we just validate them
+		else if (uri == option::Encoding && value == encoding::cer); // in future versions, this might switch the parser into more strict mode
+		else if (uri == option::Encoding && value == encoding::der); // in future versions, this might switch the parser into more strict mode
+		else if (uri == option::Encoding && value == encoding::per) throw std::invalid_argument("PER encoding is not yet supported");
+		else if (uri == option::Encoding && value == encoding::xer) throw std::invalid_argument("XER encoding is not yet supported");
+		else if (uri == option::Encoding && value == encoding::asn1) throw std::invalid_argument("ASN.1 encoding is not yet supported");
+		else if (uri == option::Encoding) throw std::invalid_argument("Unsupported ASN.1 encoding: " + value);
+		else if (uri == option::ParseEncapsulated) parseEncapsulated = parseBoolean(value);
+		else return false;
+
+		return true;
+	}
+
 	void close() override {
 		if (hasAvailableForReading()) throw std::logic_error("Unexpected content at the end of the stream"); // TODO: better exception
 
--- a/src/lib/GenericASN1ContentHandler.h	Thu Jul 22 01:06:14 2021 +0200
+++ b/src/lib/GenericASN1ContentHandler.h	Thu Jul 22 20:01:03 2021 +0200
@@ -22,6 +22,7 @@
 
 #include "ASN1ContentHandler.h"
 #include "XMLContentHandler.h"
+#include "uri.h"
 
 namespace relpipe {
 namespace in {
@@ -37,6 +38,18 @@
 private:
 	XMLContentHandlerProxy handlers;
 
+	std::string rootName = "asn1";
+	bool treeWithNamespaces = false;
+
+	/**
+	 * TODO: use a common method
+	 */
+	bool parseBoolean(const std::string& value) {
+		if (value == "true") return true;
+		else if (value == "false") return false;
+		else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
+	}
+
 	std::vector<std::string> getCommonAttributes(const Header& header, std::vector<std::string> attributes = {}) {
 		std::string tag = std::to_string(header.tag);
 		std::string tagClass = std::to_string((uint64_t) header.tagClass);
@@ -80,8 +93,20 @@
 	virtual ~GenericASN1ContentHandler() {
 	}
 
+	bool setOption(const std::string& uri, const std::string& value) override {
+		if (uri == xml::RootName) rootName = value;
+		else if (uri == xml::TreeWithNamespaces) treeWithNamespaces = parseBoolean(value);
+		else if (uri == xml::TreeStyle && value == "standard"); // the only style currently supported
+		else if (uri == xml::TreeStyle && value == "literal") throw std::invalid_argument("Tree style 'literal' is not yet supported"); // will require ASN.1 schema, might be implemented in another class
+		else if (uri == xml::TreeStyle) throw std::invalid_argument("Unsupported tree-style: " + value);
+		else return false;
+
+		return true;
+	}
+
 	void writeStreamStart() override {
-		handlers.writeStartElement("asn1");
+		if (treeWithNamespaces) handlers.writeStartElement(rootName,{"xmlns", xml::XMLNS}); // TODO: actual namespace instead of a mere attribute
+		else handlers.writeStartElement(rootName);
 	}
 
 	void writeStreamEnd() override {
--- a/src/lib/XMLContentHandler.h	Thu Jul 22 01:06:14 2021 +0200
+++ b/src/lib/XMLContentHandler.h	Thu Jul 22 20:01:03 2021 +0200
@@ -28,6 +28,15 @@
 
 	virtual ~XMLContentHandler() = default;
 
+	/**
+	 * @param uri identifier of the option
+	 * @param value value of the option
+	 * @return whether this option is supported and was applied here
+	 */
+	virtual bool setOption(const std::string& uri, const std::string& value) {
+		return false;
+	}
+
 	// FIXME: namespaces, check names
 
 	virtual void writeStartElement(const std::string& name, const std::vector<std::string>& attributes = {}) = 0;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/uri.h	Thu Jul 22 20:01:03 2021 +0200
@@ -0,0 +1,53 @@
+/**
+ * Relational pipes
+ * Copyright © 2021 František Kučera (Frantovo.cz, GlobalCode.info;
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+namespace relpipe {
+namespace in {
+namespace asn1 {
+namespace lib {
+
+// TODO: these strings will become globally unique URIs (or IRIs) after moving to alt2xml and relative/unprefixed names should also work
+
+/** general options of the ASN.1 parser */
+namespace option {
+static const char* Encoding = "encoding";
+static const char* ParseEncapsulated = "parse-encapsulated";
+}
+
+namespace encoding {
+static const char* ber = "BER";
+static const char* der = "DER";
+static const char* cer = "CER";
+static const char* xer = "XER";
+static const char* per = "PER";
+static const char* asn1 = "ASN.1"; // schema, model
+}
+
+/** options for configuring the stage where events from the ASN.1 parser are converted to SAX events or DOM building */
+namespace xml {
+static const char* TreeWithNamespaces = "tree-with-namespaces";
+static const char* TreeStyle = "tree-style";
+static const char* RootName = "root-name";
+
+static const char* XMLNS = "tag:globalcode.info,2018:alt2xml:TEMPORARY:asn1"; // not an option and might change, just preliminary namespace
+}
+
+}
+}
+}
+}