src/HashAttributeFinder.h
branchv_0
changeset 27 532953173cd5
parent 24 4353cd19a6b5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HashAttributeFinder.h	Sun Nov 10 22:55:42 2019 +0100
@@ -0,0 +1,102 @@
+/**
+ * Relational pipes
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <vector>
+#include <filesystem>
+
+#include <relpipe/writer/typedefs.h>
+#include <relpipe/writer/AttributeMetadata.h>
+#include <relpipe/writer/RelationalWriter.h>
+#include <regex>
+
+#include "RequestedField.h"
+#include "SystemProcess.h"
+#include "AttributeFinder.h"
+
+namespace relpipe {
+namespace in {
+namespace filesystem {
+
+namespace fs = std::filesystem;
+using namespace relpipe::writer;
+
+class HashAttributeFinder : public AttributeFinder {
+private:
+	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
+
+	std::wregex standardHashPattern = std::wregex(L"^([a-f0-9]+) .*");
+
+	string_t getStandardHash(const fs::path& file, const std::string& hashCommand) {
+		try {
+			SystemProcess process({hashCommand, currentFileRaw});
+			string_t output = convertor.from_bytes(process.execute());
+
+			std::wsmatch match;
+			if (regex_search(output, match, standardHashPattern)) return match[1];
+			else throw RelpipeWriterException(L"Hash command returned wrong output: " + output);
+		} catch (relpipe::cli::RelpipeCLIException& e) {
+			// TODO: print warnings?
+			// TODO: do not fork/exec if the file is not readable
+			return L"";
+		}
+	}
+protected:
+
+	virtual void writeFieldOfExistingFile(RelationalWriter* writer, const RequestedField& field) override {
+		// TODO: paralelization?
+		// TODO: other formats, not only hex, but also base64 or binary
+		if (field.group == RequestedField::GROUP_HASH) {
+			for (string_t alias : field.getAliases()) {
+				if (field.name == FIELD_MD5) writer->writeAttribute(getStandardHash(currentFile, "md5sum"));
+				else if (field.name == FIELD_SHA1) writer->writeAttribute(getStandardHash(currentFile, "sha1sum"));
+				else if (field.name == FIELD_SHA256) writer->writeAttribute(getStandardHash(currentFile, "sha256sum"));
+				else if (field.name == FIELD_SHA512) writer->writeAttribute(getStandardHash(currentFile, "sha512sum"));
+				else throw RelpipeWriterException(L"Unsupported field name in HashAttributeFinder: " + field.name);
+			}
+		}
+	}
+
+public:
+
+	static const string_t FIELD_MD5;
+	static const string_t FIELD_SHA1;
+	static const string_t FIELD_SHA256;
+	static const string_t FIELD_SHA512;
+
+	virtual vector<AttributeMetadata> toMetadata(const RequestedField& field) override {
+		if (field.group == RequestedField::GROUP_HASH) {
+			vector<AttributeMetadata> metadata;
+			for (string_t alias : field.getAliases()) metadata.push_back(AttributeMetadata{alias, TypeId::STRING});
+			return metadata;
+		} else {
+			return {};
+		}
+	}
+
+	virtual ~HashAttributeFinder() override {
+	}
+};
+
+const string_t HashAttributeFinder::FIELD_MD5 = L"md5";
+const string_t HashAttributeFinder::FIELD_SHA1 = L"sha1";
+const string_t HashAttributeFinder::FIELD_SHA256 = L"sha256";
+const string_t HashAttributeFinder::FIELD_SHA512 = L"sha512";
+
+}
+}
+}