src/HashAttributeFinder.h
author František Kučera <franta-hg@frantovo.cz>
Sun, 10 Nov 2019 22:55:42 +0100
branchv_0
changeset 27 532953173cd5
parent 24 src/XattrAttributeFinder.h@4353cd19a6b5
permissions -rw-r--r--
file hash: md5, sha1, sha256, sha512

/**
 * Relational pipes
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
#pragma once

#include <vector>
#include <filesystem>

#include <relpipe/writer/typedefs.h>
#include <relpipe/writer/AttributeMetadata.h>
#include <relpipe/writer/RelationalWriter.h>
#include <regex>

#include "RequestedField.h"
#include "SystemProcess.h"
#include "AttributeFinder.h"

namespace relpipe {
namespace in {
namespace filesystem {

namespace fs = std::filesystem;
using namespace relpipe::writer;

class HashAttributeFinder : public AttributeFinder {
private:
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.

	std::wregex standardHashPattern = std::wregex(L"^([a-f0-9]+) .*");

	string_t getStandardHash(const fs::path& file, const std::string& hashCommand) {
		try {
			SystemProcess process({hashCommand, currentFileRaw});
			string_t output = convertor.from_bytes(process.execute());

			std::wsmatch match;
			if (regex_search(output, match, standardHashPattern)) return match[1];
			else throw RelpipeWriterException(L"Hash command returned wrong output: " + output);
		} catch (relpipe::cli::RelpipeCLIException& e) {
			// TODO: print warnings?
			// TODO: do not fork/exec if the file is not readable
			return L"";
		}
	}
protected:

	virtual void writeFieldOfExistingFile(RelationalWriter* writer, const RequestedField& field) override {
		// TODO: paralelization?
		// TODO: other formats, not only hex, but also base64 or binary
		if (field.group == RequestedField::GROUP_HASH) {
			for (string_t alias : field.getAliases()) {
				if (field.name == FIELD_MD5) writer->writeAttribute(getStandardHash(currentFile, "md5sum"));
				else if (field.name == FIELD_SHA1) writer->writeAttribute(getStandardHash(currentFile, "sha1sum"));
				else if (field.name == FIELD_SHA256) writer->writeAttribute(getStandardHash(currentFile, "sha256sum"));
				else if (field.name == FIELD_SHA512) writer->writeAttribute(getStandardHash(currentFile, "sha512sum"));
				else throw RelpipeWriterException(L"Unsupported field name in HashAttributeFinder: " + field.name);
			}
		}
	}

public:

	static const string_t FIELD_MD5;
	static const string_t FIELD_SHA1;
	static const string_t FIELD_SHA256;
	static const string_t FIELD_SHA512;

	virtual vector<AttributeMetadata> toMetadata(const RequestedField& field) override {
		if (field.group == RequestedField::GROUP_HASH) {
			vector<AttributeMetadata> metadata;
			for (string_t alias : field.getAliases()) metadata.push_back(AttributeMetadata{alias, TypeId::STRING});
			return metadata;
		} else {
			return {};
		}
	}

	virtual ~HashAttributeFinder() override {
	}
};

const string_t HashAttributeFinder::FIELD_MD5 = L"md5";
const string_t HashAttributeFinder::FIELD_SHA1 = L"sha1";
const string_t HashAttributeFinder::FIELD_SHA256 = L"sha256";
const string_t HashAttributeFinder::FIELD_SHA512 = L"sha512";

}
}
}