# HG changeset patch # User František Kučera # Date 1626976863 -7200 # Node ID 85b6f13f1088b8d40cd8b29b42723840412db449 # Parent 6ef41443211ea262c68916710a6b17ab3157bcee configuration: --parser-option encoding, parse-encapsulated, root-name, tree-style, tree-with-namespaces diff -r 6ef41443211e -r 85b6f13f1088 bash-completion.sh --- a/bash-completion.sh Thu Jul 22 01:06:14 2021 +0200 +++ b/bash-completion.sh Thu Jul 22 20:01:03 2021 +0200 @@ -59,6 +59,30 @@ "http://docbook.org/ns/docbook" ) + # TODO: introspection: after moving to alt2xml the available options and their values should be provided by the parser + + PARSER_OPTIONS=( + "encoding" + "parse-encapsulated" + "tree-style" + "tree-with-namespaces" + "root-name" + ); + + ENCODINGS=( + "BER" + "DER" + "CER" + "PER" + "XER" + "ASN.1" + ); + + TREE_STYLES=( + "standard" + "literal" + ) + if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''") elif [[ "$w1" == "--records" && "x$w0" == "x" ]]; then COMPREPLY=("'/'") @@ -77,9 +101,13 @@ elif [[ "$w2" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''") elif [[ "$w3" == "--raw-xml-attribute-wrapper" && "x$w0" == "x" ]]; then COMPREPLY=("''") - elif [[ "$w1" == "--parser-option" ]]; then COMPREPLY=("''") - elif [[ "$w2" == "--parser-option" && "x$w0" == "x" ]]; then COMPREPLY=("''") - + elif [[ "$w1" == "--parser-option" ]]; then COMPREPLY=($(compgen -W "${PARSER_OPTIONS[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "encoding" ]]; then COMPREPLY=($(compgen -W "${ENCODINGS[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "parse-encapsulated" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "tree-style" ]]; then COMPREPLY=($(compgen -W "${TREE_STYLES[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "tree-with-namespaces" ]]; then COMPREPLY=($(compgen -W "${BOOLEAN_VALUES[*]}" -- "$w0")) + elif [[ "$w2" == "--parser-option" && "$w1" == "root-name" && "x$w0" == "x" ]]; then COMPREPLY=("'asn1'") + else OPTIONS=( "--namespace" diff -r 6ef41443211e -r 85b6f13f1088 src/XMLDocumentConstructor.h --- a/src/XMLDocumentConstructor.h Thu Jul 22 01:06:14 2021 +0200 +++ b/src/XMLDocumentConstructor.h Thu Jul 22 20:01:03 2021 +0200 @@ -34,24 +34,27 @@ private: std::istream* input = nullptr; xmlpp::DomParser* parser = nullptr; + relpipe::in::asn1::lib::BasicASN1Reader reader; + std::shared_ptr asn1handler; + std::shared_ptr saxHandler; public: XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) { + asn1handler = make_shared(); + saxHandler = make_shared(parser->get_document()); + asn1handler->addHandler(saxHandler); + reader.addHandler(asn1handler); } void setOption(const std::string& uri, const std::string& value) { + int n = 0; + n += reader.setOption(uri, value); + n += asn1handler->setOption(uri, value); + n += saxHandler->setOption(uri, value); + if (n == 0) throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“"); } void process() { - - relpipe::in::asn1::lib::BasicASN1Reader reader; - std::shared_ptr asn1handler = make_shared(); - std::shared_ptr saxHandler = make_shared(parser->get_document()); - - asn1handler->addHandler(saxHandler); - reader.addHandler(asn1handler); - - try { // TODO: buffering? (reader itself also buffers) for (uint8_t b = input->get(); input->good(); b = input->get()) reader.write(&b, 1); diff -r 6ef41443211e -r 85b6f13f1088 src/lib/ASN1ContentHandler.h --- a/src/lib/ASN1ContentHandler.h Thu Jul 22 01:06:14 2021 +0200 +++ b/src/lib/ASN1ContentHandler.h Thu Jul 22 20:01:03 2021 +0200 @@ -260,6 +260,15 @@ // TODO: more metadata, support OID decoding and ASN.1 modules (schema), probably through a plug-in // TODO: support also extension extractor plug-ins? (could decode some opaque structures like octet strings and replace them with nested elements) e.g. subjectAltName in https://datatracker.ietf.org/doc/html/rfc5280#section-4.2.1.6 + /** + * @param uri identifier of the option + * @param value value of the option + * @return whether this option is supported and was applied here + */ + virtual bool setOption(const std::string& uri, const std::string& value) { + return false; + } + virtual void writeStreamStart() = 0; virtual void writeStreamEnd() = 0; diff -r 6ef41443211e -r 85b6f13f1088 src/lib/ASN1Reader.h --- a/src/lib/ASN1Reader.h Thu Jul 22 01:06:14 2021 +0200 +++ b/src/lib/ASN1Reader.h Thu Jul 22 20:01:03 2021 +0200 @@ -34,6 +34,15 @@ handlers->addHandler(handler); }; + /** + * @param uri identifier of the option + * @param value value of the option + * @return whether this option is supported and was applied here + */ + virtual bool setOption(const std::string& uri, const std::string& value) { + return false; + } + protected: std::shared_ptr handlers = std::make_shared(); }; diff -r 6ef41443211e -r 85b6f13f1088 src/lib/BasicASN1Reader.h --- a/src/lib/BasicASN1Reader.h Thu Jul 22 01:06:14 2021 +0200 +++ b/src/lib/BasicASN1Reader.h Thu Jul 22 20:01:03 2021 +0200 @@ -24,6 +24,7 @@ #include "ASN1Reader.h" #include "ValidatingASN1ContentHandler.h" +#include "uri.h" namespace relpipe { namespace in { @@ -38,6 +39,17 @@ bool started = false; + bool parseEncapsulated = true; + + /** + * TODO: use a common method + */ + bool parseBoolean(const std::string& value) { + if (value == "true") return true; + else if (value == "false") return false; + else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)"); + } + class BasicHeader : public ASN1ContentHandler::Header { public: bool definiteLength; @@ -305,7 +317,7 @@ bool processEncapsulatedContent(const BasicHeader& typeHeader, const std::string& input) { // TODO: avoid double parsing + encapsulated content might be also processed at the XML/DOM level where we may even do conditional processing based on XPath (evaluate only certain octet- or bit- strings) // We may also do the same as with SEQUENCE or SET (continue nested reading in this ASN1Rreader instance), but it would require valid encapsulated data and would avoid easy fallback to raw OCTET or BIT STRING. We would also have to check the boundaries of the nested part. - if (isValidBER(input)) { + if (parseEncapsulated && isValidBER(input)) { handlers->writeCollectionStart(typeHeader); BasicASN1Reader encapsulatedReader; @@ -331,6 +343,20 @@ public: + bool setOption(const std::string& uri, const std::string& value) override { + if (uri == option::Encoding && value == encoding::ber); // currently, we support only BER (and thus also CER and DER) encoding, but options have no actual effect – we just validate them + else if (uri == option::Encoding && value == encoding::cer); // in future versions, this might switch the parser into more strict mode + else if (uri == option::Encoding && value == encoding::der); // in future versions, this might switch the parser into more strict mode + else if (uri == option::Encoding && value == encoding::per) throw std::invalid_argument("PER encoding is not yet supported"); + else if (uri == option::Encoding && value == encoding::xer) throw std::invalid_argument("XER encoding is not yet supported"); + else if (uri == option::Encoding && value == encoding::asn1) throw std::invalid_argument("ASN.1 encoding is not yet supported"); + else if (uri == option::Encoding) throw std::invalid_argument("Unsupported ASN.1 encoding: " + value); + else if (uri == option::ParseEncapsulated) parseEncapsulated = parseBoolean(value); + else return false; + + return true; + } + void close() override { if (hasAvailableForReading()) throw std::logic_error("Unexpected content at the end of the stream"); // TODO: better exception diff -r 6ef41443211e -r 85b6f13f1088 src/lib/GenericASN1ContentHandler.h --- a/src/lib/GenericASN1ContentHandler.h Thu Jul 22 01:06:14 2021 +0200 +++ b/src/lib/GenericASN1ContentHandler.h Thu Jul 22 20:01:03 2021 +0200 @@ -22,6 +22,7 @@ #include "ASN1ContentHandler.h" #include "XMLContentHandler.h" +#include "uri.h" namespace relpipe { namespace in { @@ -37,6 +38,18 @@ private: XMLContentHandlerProxy handlers; + std::string rootName = "asn1"; + bool treeWithNamespaces = false; + + /** + * TODO: use a common method + */ + bool parseBoolean(const std::string& value) { + if (value == "true") return true; + else if (value == "false") return false; + else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)"); + } + std::vector getCommonAttributes(const Header& header, std::vector attributes = {}) { std::string tag = std::to_string(header.tag); std::string tagClass = std::to_string((uint64_t) header.tagClass); @@ -80,8 +93,20 @@ virtual ~GenericASN1ContentHandler() { } + bool setOption(const std::string& uri, const std::string& value) override { + if (uri == xml::RootName) rootName = value; + else if (uri == xml::TreeWithNamespaces) treeWithNamespaces = parseBoolean(value); + else if (uri == xml::TreeStyle && value == "standard"); // the only style currently supported + else if (uri == xml::TreeStyle && value == "literal") throw std::invalid_argument("Tree style 'literal' is not yet supported"); // will require ASN.1 schema, might be implemented in another class + else if (uri == xml::TreeStyle) throw std::invalid_argument("Unsupported tree-style: " + value); + else return false; + + return true; + } + void writeStreamStart() override { - handlers.writeStartElement("asn1"); + if (treeWithNamespaces) handlers.writeStartElement(rootName,{"xmlns", xml::XMLNS}); // TODO: actual namespace instead of a mere attribute + else handlers.writeStartElement(rootName); } void writeStreamEnd() override { diff -r 6ef41443211e -r 85b6f13f1088 src/lib/XMLContentHandler.h --- a/src/lib/XMLContentHandler.h Thu Jul 22 01:06:14 2021 +0200 +++ b/src/lib/XMLContentHandler.h Thu Jul 22 20:01:03 2021 +0200 @@ -28,6 +28,15 @@ virtual ~XMLContentHandler() = default; + /** + * @param uri identifier of the option + * @param value value of the option + * @return whether this option is supported and was applied here + */ + virtual bool setOption(const std::string& uri, const std::string& value) { + return false; + } + // FIXME: namespaces, check names virtual void writeStartElement(const std::string& name, const std::vector& attributes = {}) = 0; diff -r 6ef41443211e -r 85b6f13f1088 src/lib/uri.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/uri.h Thu Jul 22 20:01:03 2021 +0200 @@ -0,0 +1,53 @@ +/** + * Relational pipes + * Copyright © 2021 František Kučera (Frantovo.cz, GlobalCode.info; + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +namespace relpipe { +namespace in { +namespace asn1 { +namespace lib { + +// TODO: these strings will become globally unique URIs (or IRIs) after moving to alt2xml and relative/unprefixed names should also work + +/** general options of the ASN.1 parser */ +namespace option { +static const char* Encoding = "encoding"; +static const char* ParseEncapsulated = "parse-encapsulated"; +} + +namespace encoding { +static const char* ber = "BER"; +static const char* der = "DER"; +static const char* cer = "CER"; +static const char* xer = "XER"; +static const char* per = "PER"; +static const char* asn1 = "ASN.1"; // schema, model +} + +/** options for configuring the stage where events from the ASN.1 parser are converted to SAX events or DOM building */ +namespace xml { +static const char* TreeWithNamespaces = "tree-with-namespaces"; +static const char* TreeStyle = "tree-style"; +static const char* RootName = "root-name"; + +static const char* XMLNS = "tag:globalcode.info,2018:alt2xml:TEMPORARY:asn1"; // not an option and might change, just preliminary namespace +} + +} +} +} +}