# HG changeset patch # User František Kučera # Date 1573422942 -3600 # Node ID 532953173cd537238e46ba736f0c1ca63b43145c # Parent 1b14ef641c7b78577a34772ecc001d42c92ced81 file hash: md5, sha1, sha256, sha512 diff -r 1b14ef641c7b -r 532953173cd5 bash-completion.sh --- a/bash-completion.sh Wed Oct 30 16:47:41 2019 +0100 +++ b/bash-completion.sh Sun Nov 10 22:55:42 2019 +0100 @@ -50,6 +50,13 @@ "dublincore.rights" ) + HASH_FIELDS=( + "md5" + "sha1" + "sha256" + "sha512" + ) + if [[ "$w1" == "--relation" && "x$w0" == "x" ]]; then COMPREPLY=("''") elif [[ "$w1" == "--as" && "x$w0" == "x" ]]; then COMPREPLY=("''") @@ -57,11 +64,13 @@ elif [[ "$w2" == "--option" && "x$w0" == "x" ]]; then COMPREPLY=("''") elif [[ "$w1" == "--file" ]]; then COMPREPLY=($(compgen -W "${FILE_FIELDS[*]}" -- "$w0")) elif [[ "$w1" == "--xattr" ]]; then COMPREPLY=($(compgen -W "${XATTR_FIELDS[*]}" -- "$w0")) + elif [[ "$w1" == "--hash" ]]; then COMPREPLY=($(compgen -W "${HASH_FIELDS[*]}" -- "$w0")) else OPTIONS=( "--relation" "--file" "--xattr" + "--hash" "--as" "--option" ) diff -r 1b14ef641c7b -r 532953173cd5 nbproject/configurations.xml --- a/nbproject/configurations.xml Wed Oct 30 16:47:41 2019 +0100 +++ b/nbproject/configurations.xml Sun Nov 10 22:55:42 2019 +0100 @@ -46,7 +46,9 @@ CLIParser.h Configuration.h FileAttributeFinder.h + HashAttributeFinder.h RequestedField.h + SystemProcess.h XattrAttributeFinder.h relpipe-in-filesystem.cpp @@ -105,8 +107,12 @@ + + + + @@ -156,8 +162,12 @@ + + + + diff -r 1b14ef641c7b -r 532953173cd5 src/CLIParser.h --- a/src/CLIParser.h Wed Oct 30 16:47:41 2019 +0100 +++ b/src/CLIParser.h Sun Nov 10 22:55:42 2019 +0100 @@ -51,6 +51,7 @@ static const string_t OPTION_FILE; static const string_t OPTION_XATTR; + static const string_t OPTION_HASH; static const string_t OPTION_AS; static const string_t OPTION_OPTION; static const string_t OPTION_RELATION; @@ -67,7 +68,7 @@ for (int i = 0; i < arguments.size();) { string_t option = readNext(arguments, i); - if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR) { + if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR || option == CLIParser::OPTION_HASH) { addField(c, currentGroup, currentName, currentAliases, currentOptions); // previous field currentGroup = option.substr(2); // cut off -- currentName = readNext(arguments, i); @@ -112,6 +113,7 @@ const string_t CLIParser::OPTION_FILE = L"--" + RequestedField::GROUP_FILE; const string_t CLIParser::OPTION_XATTR = L"--" + RequestedField::GROUP_XATTR; +const string_t CLIParser::OPTION_HASH = L"--" + RequestedField::GROUP_HASH; const string_t CLIParser::OPTION_AS = L"--as"; const string_t CLIParser::OPTION_OPTION = L"--option"; const string_t CLIParser::OPTION_RELATION = L"--relation"; diff -r 1b14ef641c7b -r 532953173cd5 src/FilesystemCommand.h --- a/src/FilesystemCommand.h Wed Oct 30 16:47:41 2019 +0100 +++ b/src/FilesystemCommand.h Sun Nov 10 22:55:42 2019 +0100 @@ -37,6 +37,7 @@ #include "AttributeFinder.h" #include "FileAttributeFinder.h" #include "XattrAttributeFinder.h" +#include "HashAttributeFinder.h" namespace relpipe { namespace in { @@ -50,10 +51,12 @@ std::wstring_convert> convertor; // TODO: support also other encodings. FileAttributeFinder fileAttributeFinder; + HashAttributeFinder hashAttributeFinder; XattrAttributeFinder xattrAttributeFinder; std::map attributeFinders{ {RequestedField::GROUP_FILE, &fileAttributeFinder}, + {RequestedField::GROUP_HASH, &hashAttributeFinder}, {RequestedField::GROUP_XATTR, &xattrAttributeFinder}}; void reset(std::stringstream& stream) { diff -r 1b14ef641c7b -r 532953173cd5 src/HashAttributeFinder.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HashAttributeFinder.h Sun Nov 10 22:55:42 2019 +0100 @@ -0,0 +1,102 @@ +/** + * Relational pipes + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "RequestedField.h" +#include "SystemProcess.h" +#include "AttributeFinder.h" + +namespace relpipe { +namespace in { +namespace filesystem { + +namespace fs = std::filesystem; +using namespace relpipe::writer; + +class HashAttributeFinder : public AttributeFinder { +private: + std::wstring_convert> convertor; // TODO: support also other encodings. + + std::wregex standardHashPattern = std::wregex(L"^([a-f0-9]+) .*"); + + string_t getStandardHash(const fs::path& file, const std::string& hashCommand) { + try { + SystemProcess process({hashCommand, currentFileRaw}); + string_t output = convertor.from_bytes(process.execute()); + + std::wsmatch match; + if (regex_search(output, match, standardHashPattern)) return match[1]; + else throw RelpipeWriterException(L"Hash command returned wrong output: " + output); + } catch (relpipe::cli::RelpipeCLIException& e) { + // TODO: print warnings? + // TODO: do not fork/exec if the file is not readable + return L""; + } + } +protected: + + virtual void writeFieldOfExistingFile(RelationalWriter* writer, const RequestedField& field) override { + // TODO: paralelization? + // TODO: other formats, not only hex, but also base64 or binary + if (field.group == RequestedField::GROUP_HASH) { + for (string_t alias : field.getAliases()) { + if (field.name == FIELD_MD5) writer->writeAttribute(getStandardHash(currentFile, "md5sum")); + else if (field.name == FIELD_SHA1) writer->writeAttribute(getStandardHash(currentFile, "sha1sum")); + else if (field.name == FIELD_SHA256) writer->writeAttribute(getStandardHash(currentFile, "sha256sum")); + else if (field.name == FIELD_SHA512) writer->writeAttribute(getStandardHash(currentFile, "sha512sum")); + else throw RelpipeWriterException(L"Unsupported field name in HashAttributeFinder: " + field.name); + } + } + } + +public: + + static const string_t FIELD_MD5; + static const string_t FIELD_SHA1; + static const string_t FIELD_SHA256; + static const string_t FIELD_SHA512; + + virtual vector toMetadata(const RequestedField& field) override { + if (field.group == RequestedField::GROUP_HASH) { + vector metadata; + for (string_t alias : field.getAliases()) metadata.push_back(AttributeMetadata{alias, TypeId::STRING}); + return metadata; + } else { + return {}; + } + } + + virtual ~HashAttributeFinder() override { + } +}; + +const string_t HashAttributeFinder::FIELD_MD5 = L"md5"; +const string_t HashAttributeFinder::FIELD_SHA1 = L"sha1"; +const string_t HashAttributeFinder::FIELD_SHA256 = L"sha256"; +const string_t HashAttributeFinder::FIELD_SHA512 = L"sha512"; + +} +} +} diff -r 1b14ef641c7b -r 532953173cd5 src/RequestedField.h --- a/src/RequestedField.h Wed Oct 30 16:47:41 2019 +0100 +++ b/src/RequestedField.h Sun Nov 10 22:55:42 2019 +0100 @@ -30,6 +30,7 @@ public: static const string_t GROUP_FILE; static const string_t GROUP_XATTR; + static const string_t GROUP_HASH; string_t group; string_t name; std::vector aliases; @@ -56,6 +57,7 @@ const string_t RequestedField::GROUP_FILE = L"file"; const string_t RequestedField::GROUP_XATTR = L"xattr"; +const string_t RequestedField::GROUP_HASH = L"hash"; } } diff -r 1b14ef641c7b -r 532953173cd5 src/SystemProcess.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/SystemProcess.h Sun Nov 10 22:55:42 2019 +0100 @@ -0,0 +1,137 @@ +/** + * Relational pipes + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace relpipe { +namespace in { +namespace filesystem { + +/** + * Simple wrapper for a system process (fork+exec) that captures and returns just the STDOUT. + */ +class SystemProcess { +private: + /** + * the command + its arguments + */ + std::vector commandLine; + int nullFile = -1; + + /** + * TODO: move to a common library (copied from the AWK module) + * @param args + */ + void execp(const std::vector& args) { + const char** a = new const char*[args.size() + 1]; + for (size_t i = 0; i < args.size(); i++) a[i] = args[i].c_str(); + a[args.size()] = nullptr; + + execvp(a[0], (char*const*) a); + + delete[] a; + throw relpipe::cli::RelpipeCLIException(L"Unable to do execvp().", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception? + } + + /** + * TODO: move to a common library (copied from the AWK module) + * @param readerFD + * @param writerFD + */ + void createPipe(int& readerFD, int& writerFD) { + int fds[2]; + int result = pipe(fds); + readerFD = fds[0]; + writerFD = fds[1]; + if (result < 0) throw relpipe::cli::RelpipeCLIException(L"Unable to create a pipe.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception? + } + + /** + * TODO: move to a common library (copied from the AWK module) + */ + void redirectFD(int oldfd, int newfd) { + int result = dup2(oldfd, newfd); + if (result < 0) throw relpipe::cli::RelpipeCLIException(L"Unable redirect FD.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception? + } + + /** + * TODO: move to a common library (copied from the AWK module) + */ + void closeOrThrow(int fd) { + int error = close(fd); + if (error) throw relpipe::cli::RelpipeCLIException(L"Unable to close FD: " + to_wstring(fd) + L" from PID: " + to_wstring(getpid()), relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception? + } + +public: + + SystemProcess(std::vector commandLine) : commandLine(commandLine) { + nullFile = open("/dev/null", O_RDWR); + } + + virtual ~SystemProcess() { + close(nullFile); + } + + std::string execute() { + + std::stringstream result; + + int hashReaderFD; + int hashWriterFD; + createPipe(hashReaderFD, hashWriterFD); + + __pid_t hashPid = fork(); + + if (hashPid < 0) { + throw relpipe::cli::RelpipeCLIException(L"Unable to fork the hash process.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception? + } else if (hashPid == 0) { + // Child process + closeOrThrow(hashReaderFD); + redirectFD(nullFile, STDIN_FILENO); + redirectFD(nullFile, STDERR_FILENO); + redirectFD(hashWriterFD, STDOUT_FILENO); + execp(commandLine); + } else { + // Parent process + closeOrThrow(hashWriterFD); + + __gnu_cxx::stdio_filebuf hashReaderBuffer(hashReaderFD, std::ios::in); + std::istream hashReader(&hashReaderBuffer); + + for (char ch; hashReader.read(&ch, 1).good();) result.put(ch); + + int waitError; + __pid_t waitPID = wait(&waitError); + if (waitError) throw relpipe::cli::RelpipeCLIException(L"The child process returned an error exit code.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception? + } + + return result.str(); + } +}; + +} +} +}