file hash: md5, sha1, sha256, sha512 v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sun, 10 Nov 2019 22:55:42 +0100
branchv_0
changeset 27 532953173cd5
parent 26 1b14ef641c7b
child 28 9172bd97ae99
file hash: md5, sha1, sha256, sha512
bash-completion.sh
nbproject/configurations.xml
src/CLIParser.h
src/FilesystemCommand.h
src/HashAttributeFinder.h
src/RequestedField.h
src/SystemProcess.h
--- a/bash-completion.sh	Wed Oct 30 16:47:41 2019 +0100
+++ b/bash-completion.sh	Sun Nov 10 22:55:42 2019 +0100
@@ -50,6 +50,13 @@
 		"dublincore.rights"
 	)
 
+	HASH_FIELDS=(
+		"md5"
+		"sha1"
+		"sha256"
+		"sha512"
+	)
+
 
 	if   [[ "$w1" == "--relation"      && "x$w0" == "x" ]];    then COMPREPLY=("''")
 	elif [[ "$w1" == "--as"            && "x$w0" == "x" ]];    then COMPREPLY=("''")
@@ -57,11 +64,13 @@
 	elif [[ "$w2" == "--option"        && "x$w0" == "x" ]];    then COMPREPLY=("''")
 	elif [[ "$w1" == "--file"                           ]];    then COMPREPLY=($(compgen -W "${FILE_FIELDS[*]}"  -- "$w0"))
 	elif [[ "$w1" == "--xattr"                          ]];    then COMPREPLY=($(compgen -W "${XATTR_FIELDS[*]}" -- "$w0"))
+	elif [[ "$w1" == "--hash"                           ]];    then COMPREPLY=($(compgen -W "${HASH_FIELDS[*]}" -- "$w0"))
 	else
 		OPTIONS=(
 			"--relation"
 			"--file"
 			"--xattr"
+			"--hash"
 			"--as"
 			"--option"
 		)
--- a/nbproject/configurations.xml	Wed Oct 30 16:47:41 2019 +0100
+++ b/nbproject/configurations.xml	Sun Nov 10 22:55:42 2019 +0100
@@ -46,7 +46,9 @@
         <in>CLIParser.h</in>
         <in>Configuration.h</in>
         <in>FileAttributeFinder.h</in>
+        <in>HashAttributeFinder.h</in>
         <in>RequestedField.h</in>
+        <in>SystemProcess.h</in>
         <in>XattrAttributeFinder.h</in>
         <in>relpipe-in-filesystem.cpp</in>
       </df>
@@ -105,8 +107,12 @@
       </item>
       <item path="src/FileAttributeFinder.h" ex="false" tool="3" flavor2="0">
       </item>
+      <item path="src/HashAttributeFinder.h" ex="false" tool="3" flavor2="0">
+      </item>
       <item path="src/RequestedField.h" ex="false" tool="3" flavor2="0">
       </item>
+      <item path="src/SystemProcess.h" ex="false" tool="3" flavor2="0">
+      </item>
       <item path="src/XattrAttributeFinder.h" ex="false" tool="3" flavor2="0">
       </item>
       <item path="src/relpipe-in-filesystem.cpp" ex="false" tool="1" flavor2="0">
@@ -156,8 +162,12 @@
       </item>
       <item path="src/FileAttributeFinder.h" ex="false" tool="3" flavor2="0">
       </item>
+      <item path="src/HashAttributeFinder.h" ex="false" tool="3" flavor2="0">
+      </item>
       <item path="src/RequestedField.h" ex="false" tool="3" flavor2="0">
       </item>
+      <item path="src/SystemProcess.h" ex="false" tool="3" flavor2="0">
+      </item>
       <item path="src/XattrAttributeFinder.h" ex="false" tool="3" flavor2="0">
       </item>
       <item path="src/relpipe-in-filesystem.cpp" ex="false" tool="1" flavor2="0">
--- a/src/CLIParser.h	Wed Oct 30 16:47:41 2019 +0100
+++ b/src/CLIParser.h	Sun Nov 10 22:55:42 2019 +0100
@@ -51,6 +51,7 @@
 
 	static const string_t OPTION_FILE;
 	static const string_t OPTION_XATTR;
+	static const string_t OPTION_HASH;
 	static const string_t OPTION_AS;
 	static const string_t OPTION_OPTION;
 	static const string_t OPTION_RELATION;
@@ -67,7 +68,7 @@
 			for (int i = 0; i < arguments.size();) {
 				string_t option = readNext(arguments, i);
 
-				if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR) {
+				if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR || option == CLIParser::OPTION_HASH) {
 					addField(c, currentGroup, currentName, currentAliases, currentOptions); // previous field
 					currentGroup = option.substr(2); // cut off --
 					currentName = readNext(arguments, i);
@@ -112,6 +113,7 @@
 
 const string_t CLIParser::OPTION_FILE = L"--" + RequestedField::GROUP_FILE;
 const string_t CLIParser::OPTION_XATTR = L"--" + RequestedField::GROUP_XATTR;
+const string_t CLIParser::OPTION_HASH = L"--" + RequestedField::GROUP_HASH;
 const string_t CLIParser::OPTION_AS = L"--as";
 const string_t CLIParser::OPTION_OPTION = L"--option";
 const string_t CLIParser::OPTION_RELATION = L"--relation";
--- a/src/FilesystemCommand.h	Wed Oct 30 16:47:41 2019 +0100
+++ b/src/FilesystemCommand.h	Sun Nov 10 22:55:42 2019 +0100
@@ -37,6 +37,7 @@
 #include "AttributeFinder.h"
 #include "FileAttributeFinder.h"
 #include "XattrAttributeFinder.h"
+#include "HashAttributeFinder.h"
 
 namespace relpipe {
 namespace in {
@@ -50,10 +51,12 @@
 	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
 
 	FileAttributeFinder fileAttributeFinder;
+	HashAttributeFinder hashAttributeFinder;
 	XattrAttributeFinder xattrAttributeFinder;
 
 	std::map<string_t, AttributeFinder*> attributeFinders{
 		{RequestedField::GROUP_FILE, &fileAttributeFinder},
+		{RequestedField::GROUP_HASH, &hashAttributeFinder},
 		{RequestedField::GROUP_XATTR, &xattrAttributeFinder}};
 
 	void reset(std::stringstream& stream) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HashAttributeFinder.h	Sun Nov 10 22:55:42 2019 +0100
@@ -0,0 +1,102 @@
+/**
+ * Relational pipes
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <vector>
+#include <filesystem>
+
+#include <relpipe/writer/typedefs.h>
+#include <relpipe/writer/AttributeMetadata.h>
+#include <relpipe/writer/RelationalWriter.h>
+#include <regex>
+
+#include "RequestedField.h"
+#include "SystemProcess.h"
+#include "AttributeFinder.h"
+
+namespace relpipe {
+namespace in {
+namespace filesystem {
+
+namespace fs = std::filesystem;
+using namespace relpipe::writer;
+
+class HashAttributeFinder : public AttributeFinder {
+private:
+	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
+
+	std::wregex standardHashPattern = std::wregex(L"^([a-f0-9]+) .*");
+
+	string_t getStandardHash(const fs::path& file, const std::string& hashCommand) {
+		try {
+			SystemProcess process({hashCommand, currentFileRaw});
+			string_t output = convertor.from_bytes(process.execute());
+
+			std::wsmatch match;
+			if (regex_search(output, match, standardHashPattern)) return match[1];
+			else throw RelpipeWriterException(L"Hash command returned wrong output: " + output);
+		} catch (relpipe::cli::RelpipeCLIException& e) {
+			// TODO: print warnings?
+			// TODO: do not fork/exec if the file is not readable
+			return L"";
+		}
+	}
+protected:
+
+	virtual void writeFieldOfExistingFile(RelationalWriter* writer, const RequestedField& field) override {
+		// TODO: paralelization?
+		// TODO: other formats, not only hex, but also base64 or binary
+		if (field.group == RequestedField::GROUP_HASH) {
+			for (string_t alias : field.getAliases()) {
+				if (field.name == FIELD_MD5) writer->writeAttribute(getStandardHash(currentFile, "md5sum"));
+				else if (field.name == FIELD_SHA1) writer->writeAttribute(getStandardHash(currentFile, "sha1sum"));
+				else if (field.name == FIELD_SHA256) writer->writeAttribute(getStandardHash(currentFile, "sha256sum"));
+				else if (field.name == FIELD_SHA512) writer->writeAttribute(getStandardHash(currentFile, "sha512sum"));
+				else throw RelpipeWriterException(L"Unsupported field name in HashAttributeFinder: " + field.name);
+			}
+		}
+	}
+
+public:
+
+	static const string_t FIELD_MD5;
+	static const string_t FIELD_SHA1;
+	static const string_t FIELD_SHA256;
+	static const string_t FIELD_SHA512;
+
+	virtual vector<AttributeMetadata> toMetadata(const RequestedField& field) override {
+		if (field.group == RequestedField::GROUP_HASH) {
+			vector<AttributeMetadata> metadata;
+			for (string_t alias : field.getAliases()) metadata.push_back(AttributeMetadata{alias, TypeId::STRING});
+			return metadata;
+		} else {
+			return {};
+		}
+	}
+
+	virtual ~HashAttributeFinder() override {
+	}
+};
+
+const string_t HashAttributeFinder::FIELD_MD5 = L"md5";
+const string_t HashAttributeFinder::FIELD_SHA1 = L"sha1";
+const string_t HashAttributeFinder::FIELD_SHA256 = L"sha256";
+const string_t HashAttributeFinder::FIELD_SHA512 = L"sha512";
+
+}
+}
+}
--- a/src/RequestedField.h	Wed Oct 30 16:47:41 2019 +0100
+++ b/src/RequestedField.h	Sun Nov 10 22:55:42 2019 +0100
@@ -30,6 +30,7 @@
 public:
 	static const string_t GROUP_FILE;
 	static const string_t GROUP_XATTR;
+	static const string_t GROUP_HASH;
 	string_t group;
 	string_t name;
 	std::vector<string_t> aliases;
@@ -56,6 +57,7 @@
 
 const string_t RequestedField::GROUP_FILE = L"file";
 const string_t RequestedField::GROUP_XATTR = L"xattr";
+const string_t RequestedField::GROUP_HASH = L"hash";
 
 }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SystemProcess.h	Sun Nov 10 22:55:42 2019 +0100
@@ -0,0 +1,137 @@
+/**
+ * Relational pipes
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <vector>
+#include <sstream>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <ext/stdio_filebuf.h>
+
+#include <relpipe/writer/typedefs.h>
+#include <relpipe/cli/RelpipeCLIException.h>
+
+namespace relpipe {
+namespace in {
+namespace filesystem {
+
+/**
+ * Simple wrapper for a system process (fork+exec) that captures and returns just the STDOUT.
+ */
+class SystemProcess {
+private:
+	/**
+	 * the command + its arguments
+	 */
+	std::vector<std::string> commandLine;
+	int nullFile = -1;
+
+	/**
+	 * TODO: move to a common library (copied from the AWK module) 
+	 * @param args
+	 */
+	void execp(const std::vector<std::string>& args) {
+		const char** a = new const char*[args.size() + 1];
+		for (size_t i = 0; i < args.size(); i++) a[i] = args[i].c_str();
+		a[args.size()] = nullptr;
+
+		execvp(a[0], (char*const*) a);
+
+		delete[] a;
+		throw relpipe::cli::RelpipeCLIException(L"Unable to do execvp().", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?
+	}
+
+	/**
+	 * TODO: move to a common library (copied from the AWK module) 
+	 * @param readerFD
+	 * @param writerFD
+	 */
+	void createPipe(int& readerFD, int& writerFD) {
+		int fds[2];
+		int result = pipe(fds);
+		readerFD = fds[0];
+		writerFD = fds[1];
+		if (result < 0) throw relpipe::cli::RelpipeCLIException(L"Unable to create a pipe.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?
+	}
+
+	/**
+	 * TODO: move to a common library (copied from the AWK module) 
+	 */
+	void redirectFD(int oldfd, int newfd) {
+		int result = dup2(oldfd, newfd);
+		if (result < 0) throw relpipe::cli::RelpipeCLIException(L"Unable redirect FD.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?
+	}
+
+	/**
+	 * TODO: move to a common library (copied from the AWK module) 
+	 */
+	void closeOrThrow(int fd) {
+		int error = close(fd);
+		if (error) throw relpipe::cli::RelpipeCLIException(L"Unable to close FD: " + to_wstring(fd) + L" from PID: " + to_wstring(getpid()), relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?
+	}
+
+public:
+
+	SystemProcess(std::vector<std::string> commandLine) : commandLine(commandLine) {
+		nullFile = open("/dev/null", O_RDWR);
+	}
+
+	virtual ~SystemProcess() {
+		close(nullFile);
+	}
+
+	std::string execute() {
+
+		std::stringstream result;
+
+		int hashReaderFD;
+		int hashWriterFD;
+		createPipe(hashReaderFD, hashWriterFD);
+		
+		__pid_t hashPid = fork();
+
+		if (hashPid < 0) {
+			throw relpipe::cli::RelpipeCLIException(L"Unable to fork the hash process.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?
+		} else if (hashPid == 0) {
+			// Child process
+			closeOrThrow(hashReaderFD);
+			redirectFD(nullFile, STDIN_FILENO);
+			redirectFD(nullFile, STDERR_FILENO);
+			redirectFD(hashWriterFD, STDOUT_FILENO);
+			execp(commandLine);
+		} else {
+			// Parent process
+			closeOrThrow(hashWriterFD);
+
+			__gnu_cxx::stdio_filebuf<char> hashReaderBuffer(hashReaderFD, std::ios::in);
+			std::istream hashReader(&hashReaderBuffer);
+			
+			for (char ch; hashReader.read(&ch, 1).good();) result.put(ch);
+			
+			int waitError;
+			__pid_t waitPID = wait(&waitError);
+			if (waitError) throw relpipe::cli::RelpipeCLIException(L"The child process returned an error exit code.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?
+		}
+
+		return result.str();
+	}
+};
+
+}
+}
+}