src/XMLDocumentConstructor.h
author František Kučera <franta-hg@frantovo.cz>
Sat, 06 Feb 2021 20:24:55 +0100
branchv_0
changeset 4 dd8ff58fb29b
parent 3 e2d61907e75f
child 5 a3794fe5ea4b
permissions -rw-r--r--
add body/part content as CDATA

/**
 * Relational pipes
 * Copyright © 2021 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <codecvt>
#include <vector>

#include <libxml++-2.6/libxml++/libxml++.h>

#include <vmime/vmime.hpp>

#include "XMLNameCodec.h"

namespace relpipe {
namespace in {
namespace xmltable {

class XMLDocumentConstructor {
private:
	std::istream* input = nullptr;
	xmlpp::DomParser* parser = nullptr;
	XMLNameCodec nameCodec;

	std::string rootName = "mime-message";

	std::string format(std::shared_ptr<vmime::datetime> value) {
		std::stringstream timestamp;
		int tz = value->getZone();
		timestamp << value->getYear() << "-";
		timestamp << std::setw(2) << std::setfill('0') << value->getMonth() << "-";
		timestamp << std::setw(2) << std::setfill('0') << value->getDay() << "T";
		timestamp << std::setw(2) << std::setfill('0') << value->getHour() << ":";
		timestamp << std::setw(2) << std::setfill('0') << value->getMinute() << ":";
		timestamp << std::setw(2) << std::setfill('0') << value->getSecond() << (tz >= 0 ? "+" : "-");
		timestamp << std::setw(2) << std::setfill('0') << std::abs(tz / 60) << ":";
		timestamp << std::setw(2) << std::setfill('0') << std::abs(tz % 60);
		return timestamp.str();
	}

	std::string format(const vmime::mediaType& contentType) {
		return contentType.getType() + "/" + contentType.getSubType();
	}

	std::string fetchBody(std::shared_ptr<vmime::body> body) {
		std::stringstream result;
		vmime::utility::outputStreamAdapter resultAdapter(result);

		const vmime::charset targetEncoding = vmime::charset("utf-8");
		const vmime::charset sourceEncoding = body->getCharset();

		vmime::shared_ptr <vmime::charsetConverter> charsetConverter = vmime::charsetConverter::create(sourceEncoding, targetEncoding);
		vmime::shared_ptr <vmime::utility::charsetFilteredOutputStream> resultConverter = charsetConverter->getFilteredOutputStream(resultAdapter);

		body->getContents()->extract(*resultConverter);
		resultConverter->flush();

		return result.str();
	}

	void appendBody(xmlpp::Element* element, std::shared_ptr<vmime::body> body) {
		element->set_attribute("content-type", format(body->getContentType()));
		// element->set_attribute("content-type-charset", body->getCharset().getName());
		// element->set_attribute("content-transfer-encoding", body->getEncoding().getName());

		if (body->getPartCount() == 0) {
			element->add_child_cdata(fetchBody(body));
		} else {
			for (auto part : body->getPartList()) {
				xmlpp::Element* partElement = element->add_child("part");
				appendBody(partElement, part->getBody());
			}
		}
	}

public:

	XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) {
	}

	virtual ~XMLDocumentConstructor() {
	}

	void setOption(const std::string& uri, const std::string& value) {
		if (uri == "root-name") rootName = value;
		else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
	}

	void process() {
		vmime::utility::inputStreamAdapter is(*input);
		vmime::string data;
		vmime::utility::outputStreamStringAdapter os(data);
		vmime::utility::bufferedStreamCopy(is, os);

		vmime::message m;
		m.parse(data);

		// vmime::shared_ptr<vmime::utility::inputStreamAdapter> is = vmime::make_shared<vmime::utility::inputStreamAdapter>(*input);
		// m.parse(is, 0);

		vmime::charset ch(vmime::charsets::UTF_8);

		//std::cerr << "Subject:" << m.getHeader()->Subject()->getValue<vmime::text>()->getConvertedText(ch) << std::endl;

		xmlpp::Element* root = parser->get_document()->create_root_node(rootName);

		xmlpp::Element* headers = root->add_child("headers");

		for (std::shared_ptr<vmime::headerField> mimeField : m.getHeader()->getFieldList()) {
			// TODO: Are names always ASCII and subset of UTF-8?
			// TODO: Convert header names to lower case? (they should be case insensitive)
			xmlpp::Element* field = headers->add_child(nameCodec.encode(mimeField->getName()));


			if (auto value = mimeField->getValue<vmime::text>()) {
				field->add_child_text(value->getConvertedText(ch));
			} else if (auto value = mimeField->getValue<vmime::mailbox>()) {
				std::string name = value->getName().getConvertedText(ch);
				std::string email = value->getEmail().toString();
				if (name.size()) field->set_attribute("name", name);
				if (email.size()) field->add_child_text(email);
			} else if (auto value = mimeField->getValue<vmime::addressList>()) {
				for (auto address : value->getAddressList()) {
					xmlpp::Element* addressField = field->add_child("address");
					if (std::shared_ptr<vmime::mailbox> mailbox = std::dynamic_pointer_cast<vmime::mailbox> (address)) {
						std::string name = mailbox->getName().getConvertedText(ch);
						std::string email = mailbox->getEmail().toString();
						if (name.size()) addressField->set_attribute("name", name);
						if (email.size()) addressField->add_child_text(email);
					} else if (std::shared_ptr<vmime::mailboxGroup> mailbox = std::dynamic_pointer_cast<vmime::mailboxGroup> (address)) {
						// TODO: mailboxGroup?
					}
				}
			} else if (auto value = mimeField->getValue<vmime::datetime>()) {
				// TODO: keep particular timestamp bits attributes or not?
				field->set_attribute("year", std::to_string(value->getYear()));
				field->set_attribute("month", std::to_string(value->getMonth()));
				field->set_attribute("day", std::to_string(value->getDay()));
				field->set_attribute("hour", std::to_string(value->getHour()));
				field->set_attribute("minute", std::to_string(value->getMinute()));
				field->set_attribute("second", std::to_string(value->getSecond()));
				field->set_attribute("zone", std::to_string(value->getZone())); // timezone is in minutes
				field->add_child_text(format(value));
			} else if (auto value = mimeField->getValue<vmime::mediaType>()) {
				if (value) field->add_child_text(format(*value));
				// TODO: encoding from the "Content-Type: text/plain; charset=us-ascii" type header?
			} else if (auto value = mimeField->getValue<vmime::messageId>()) {
				field->add_child_text(value->getId());
			} else if (auto value = mimeField->getValue<vmime::messageIdSequence>()) {
				for (auto messageId : value->getMessageIdList()) {
					xmlpp::Element* messageIdField = field->add_child("Mssage-ID"); // TODO: lower case?
					messageIdField->add_child_text(messageId->getId());
				}
			} else if (auto value = mimeField->getValue<vmime::contentDisposition>()) {
				field->add_child_text(value->getName());
			} else if (auto value = mimeField->getValue<vmime::relay>()) {
				field->set_attribute("from", value->getFrom());
				field->set_attribute("via", value->getVia());
				field->set_attribute("by", value->getBy());
				field->set_attribute("id", value->getId());
				field->set_attribute("for", value->getFor());
				// TODO: date of Received/relay
				// TODO: missing values or incomplete parsing of Received/relay in vmime
			} else if (auto value = mimeField->getValue<vmime::path>()) {
				std::string local = value->getLocalPart();
				std::string domain = value->getDomain();
				if (local.size() && domain.size()) field->add_child_text(local + "@" + domain);
				else field->add_child_text(local + domain);
			} else if (auto value = mimeField->getValue<vmime::encoding>()) {
				field->add_child_text(value->getName());
			} else {
				field->add_child_text("TODO: unknown header type"); // TODO: generic conversion as fallback?
			}
		}

		// TODO: check null pointers

		xmlpp::Element* body = root->add_child("body");
		appendBody(body, m.getBody());

	}
};

}
}
}