/**
* Relational pipes
* Copyright © 2021 František Kučera (Frantovo.cz, GlobalCode.info)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <codecvt>
#include <vector>
#include <libxml++-2.6/libxml++/libxml++.h>
#include <vmime/vmime.hpp>
#include "XMLNameCodec.h"
namespace relpipe {
namespace in {
namespace xmltable {
class XMLDocumentConstructor {
private:
std::istream* input = nullptr;
xmlpp::DomParser* parser = nullptr;
XMLNameCodec nameCodec;
std::string rootName = "mime-message";
std::string format(std::shared_ptr<vmime::datetime> value) {
std::stringstream timestamp;
int tz = value->getZone();
timestamp << value->getYear() << "-";
timestamp << std::setw(2) << std::setfill('0') << value->getMonth() << "-";
timestamp << std::setw(2) << std::setfill('0') << value->getDay() << "T";
timestamp << std::setw(2) << std::setfill('0') << value->getHour() << ":";
timestamp << std::setw(2) << std::setfill('0') << value->getMinute() << ":";
timestamp << std::setw(2) << std::setfill('0') << value->getSecond() << (tz >= 0 ? "+" : "-");
timestamp << std::setw(2) << std::setfill('0') << std::abs(tz / 60) << ":";
timestamp << std::setw(2) << std::setfill('0') << std::abs(tz % 60);
return timestamp.str();
}
std::string format(const vmime::mediaType& contentType) {
return contentType.getType() + "/" + contentType.getSubType();
}
std::string toLowerCase(const std::string& value) {
std::string result = value;
std::transform(result.begin(), result.end(), result.begin(), ::tolower);
return result;
}
std::string toHex(const std::string& value) {
static const char* const hexSymbols = "0123456789abcdef";
size_t length = value.length();
std::string result;
result.reserve(3 * length - 1);
for (size_t i = 0; i < length; i++) {
const unsigned char ch = value[i];
result.push_back(hexSymbols[ch >> 4]);
result.push_back(hexSymbols[ch & 15]);
if (i < length - 1) result.push_back(' ');
}
return result;
}
std::string fetchBodyText(std::shared_ptr<vmime::body> body) {
std::stringstream result;
vmime::utility::outputStreamAdapter resultAdapter(result);
const vmime::charset targetEncoding = vmime::charset("utf-8");
const vmime::charset sourceEncoding = body->getCharset();
vmime::shared_ptr <vmime::charsetConverter> charsetConverter = vmime::charsetConverter::create(sourceEncoding, targetEncoding);
vmime::shared_ptr <vmime::utility::charsetFilteredOutputStream> resultConverter = charsetConverter->getFilteredOutputStream(resultAdapter);
body->getContents()->extract(*resultConverter);
resultConverter->flush();
return result.str();
}
std::string fetchBodyBinary(std::shared_ptr<vmime::body> body) {
std::stringstream result;
vmime::utility::outputStreamAdapter resultAdapter(result);
body->getContents()->extract(resultAdapter);
resultAdapter.flush();
return toHex(result.str());
}
void appendBody(xmlpp::Element* element, std::shared_ptr<vmime::body> body) {
element->set_attribute("content-type", format(body->getContentType()));
// element->set_attribute("content-type-charset", body->getCharset().getName());
// element->set_attribute("content-transfer-encoding", body->getEncoding().getName());
// TODO: size of raw data
if (body->getPartCount() == 0) {
if (body->getContentType().getType() == "text") element->add_child_cdata(fetchBodyText(body));
else element->add_child_text(fetchBodyBinary(body));
// TODO: if content is valid XML, import it in the DOM tree instead of pasting as a nested text/cdata
// TODO: optional trim of long data
} else {
for (auto part : body->getPartList()) {
xmlpp::Element* partElement = element->add_child("part");
appendBody(partElement, part->getBody());
}
}
}
public:
XMLDocumentConstructor(std::istream* input, xmlpp::DomParser* parser) : input(input), parser(parser) {
}
virtual ~XMLDocumentConstructor() {
}
void setOption(const std::string& uri, const std::string& value) {
if (uri == "root-name") rootName = value;
else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
}
void process() {
vmime::utility::inputStreamAdapter is(*input);
vmime::string data;
vmime::utility::outputStreamStringAdapter os(data);
vmime::utility::bufferedStreamCopy(is, os);
vmime::message m;
m.parse(data);
// vmime::shared_ptr<vmime::utility::inputStreamAdapter> is = vmime::make_shared<vmime::utility::inputStreamAdapter>(*input);
// m.parse(is, 0);
vmime::charset ch(vmime::charsets::UTF_8);
//std::cerr << "Subject:" << m.getHeader()->Subject()->getValue<vmime::text>()->getConvertedText(ch) << std::endl;
xmlpp::Element* root = parser->get_document()->create_root_node(rootName);
xmlpp::Element* headers = root->add_child("headers");
for (std::shared_ptr<vmime::headerField> mimeField : m.getHeader()->getFieldList()) {
// TODO: Are names always ASCII and subset of UTF-8?
xmlpp::Element* field = headers->add_child(toLowerCase(nameCodec.encode(mimeField->getName())));
if (auto value = mimeField->getValue<vmime::text>()) {
field->add_child_text(value->getConvertedText(ch));
} else if (auto value = mimeField->getValue<vmime::mailbox>()) {
std::string name = value->getName().getConvertedText(ch);
std::string email = value->getEmail().toString();
if (name.size()) field->set_attribute("name", name);
if (email.size()) field->add_child_text(email);
} else if (auto value = mimeField->getValue<vmime::addressList>()) {
for (auto address : value->getAddressList()) {
xmlpp::Element* addressField = field->add_child("address");
if (std::shared_ptr<vmime::mailbox> mailbox = std::dynamic_pointer_cast<vmime::mailbox> (address)) {
std::string name = mailbox->getName().getConvertedText(ch);
std::string email = mailbox->getEmail().toString();
if (name.size()) addressField->set_attribute("name", name);
if (email.size()) addressField->add_child_text(email);
} else if (std::shared_ptr<vmime::mailboxGroup> mailbox = std::dynamic_pointer_cast<vmime::mailboxGroup> (address)) {
// TODO: mailboxGroup?
}
}
} else if (auto value = mimeField->getValue<vmime::datetime>()) {
// TODO: keep particular timestamp bits attributes or not?
field->set_attribute("year", std::to_string(value->getYear()));
field->set_attribute("month", std::to_string(value->getMonth()));
field->set_attribute("day", std::to_string(value->getDay()));
field->set_attribute("hour", std::to_string(value->getHour()));
field->set_attribute("minute", std::to_string(value->getMinute()));
field->set_attribute("second", std::to_string(value->getSecond()));
field->set_attribute("zone", std::to_string(value->getZone())); // timezone is in minutes
field->add_child_text(format(value));
} else if (auto value = mimeField->getValue<vmime::mediaType>()) {
if (value) field->add_child_text(format(*value));
// TODO: encoding from the "Content-Type: text/plain; charset=us-ascii" type header?
} else if (auto value = mimeField->getValue<vmime::messageId>()) {
field->add_child_text(value->getId());
} else if (auto value = mimeField->getValue<vmime::messageIdSequence>()) {
for (auto messageId : value->getMessageIdList()) {
xmlpp::Element* messageIdField = field->add_child("message-id");
messageIdField->add_child_text(messageId->getId());
}
} else if (auto value = mimeField->getValue<vmime::contentDisposition>()) {
field->add_child_text(value->getName());
} else if (auto value = mimeField->getValue<vmime::relay>()) {
field->set_attribute("from", value->getFrom());
field->set_attribute("via", value->getVia());
field->set_attribute("by", value->getBy());
field->set_attribute("id", value->getId());
field->set_attribute("for", value->getFor());
// TODO: date of Received/relay
// TODO: missing values or incomplete parsing of Received/relay in vmime
} else if (auto value = mimeField->getValue<vmime::path>()) {
std::string local = value->getLocalPart();
std::string domain = value->getDomain();
if (local.size() && domain.size()) field->add_child_text(local + "@" + domain);
else field->add_child_text(local + domain);
} else if (auto value = mimeField->getValue<vmime::encoding>()) {
field->add_child_text(value->getName());
} else {
field->add_child_text("TODO: unknown header type"); // TODO: generic conversion as fallback?
}
}
// TODO: check null pointers
xmlpp::Element* body = root->add_child("body");
appendBody(body, m.getBody());
}
};
}
}
}