# HG changeset patch # User František Kučera # Date 1573479733 -3600 # Node ID 9172bd97ae99edf5be8b8e7b30915b0e21d663e0 # Parent 532953173cd537238e46ba736f0c1ca63b43145c custom scripts for additional attributes diff -r 532953173cd5 -r 9172bd97ae99 bash-completion.sh --- a/bash-completion.sh Sun Nov 10 22:55:42 2019 +0100 +++ b/bash-completion.sh Mon Nov 11 14:42:13 2019 +0100 @@ -13,6 +13,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +_relpipe_in_filesystem_scripts() { + while read c; do echo ${c:31}; done < <(compgen -c "__relpipe_in_filesystem_script_"); +} + _relpipe_in_filesystem_completion() { local w0 w1 w2 @@ -65,12 +69,14 @@ elif [[ "$w1" == "--file" ]]; then COMPREPLY=($(compgen -W "${FILE_FIELDS[*]}" -- "$w0")) elif [[ "$w1" == "--xattr" ]]; then COMPREPLY=($(compgen -W "${XATTR_FIELDS[*]}" -- "$w0")) elif [[ "$w1" == "--hash" ]]; then COMPREPLY=($(compgen -W "${HASH_FIELDS[*]}" -- "$w0")) + elif [[ "$w1" == "--script" ]]; then COMPREPLY=($(compgen -W "$(_relpipe_in_filesystem_scripts)" -- "$w0")) else OPTIONS=( "--relation" "--file" "--xattr" "--hash" + "--script" "--as" "--option" ) diff -r 532953173cd5 -r 9172bd97ae99 nbproject/configurations.xml --- a/nbproject/configurations.xml Sun Nov 10 22:55:42 2019 +0100 +++ b/nbproject/configurations.xml Mon Nov 11 14:42:13 2019 +0100 @@ -48,6 +48,7 @@ FileAttributeFinder.h HashAttributeFinder.h RequestedField.h + ScriptAttributeFinder.h SystemProcess.h XattrAttributeFinder.h relpipe-in-filesystem.cpp @@ -111,6 +112,8 @@ + + @@ -166,6 +169,8 @@ + + diff -r 532953173cd5 -r 9172bd97ae99 script-examples/__relpipe_in_filesystem_script_inode --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script-examples/__relpipe_in_filesystem_script_inode Mon Nov 11 14:42:13 2019 +0100 @@ -0,0 +1,29 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# returns the inode number of given file +# not very useful – just a demo returning an integer attribute + +if [[ $# == 0 ]]; then + echo "1"; + echo "integer"; +elif [[ -f "$1" || -d "$1" ]]; then + ls -d -i "$1" | cut -d' ' -f1 | tr -d '\n'; +else + exit 40; +fi diff -r 532953173cd5 -r 9172bd97ae99 script-examples/__relpipe_in_filesystem_script_mime-type --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script-examples/__relpipe_in_filesystem_script_mime-type Mon Nov 11 14:42:13 2019 +0100 @@ -0,0 +1,28 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# returns the MIME type of given file + +if [[ $# == 0 ]]; then + echo "1"; + echo "string"; +elif [[ -f "$1" || -d "$1" ]]; then + file --preserve-date --brief --mime-type --dereference "$1" | tr -d '\n'; +else + exit 40; +fi diff -r 532953173cd5 -r 9172bd97ae99 script-examples/__relpipe_in_filesystem_script_pdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script-examples/__relpipe_in_filesystem_script_pdf Mon Nov 11 14:42:13 2019 +0100 @@ -0,0 +1,46 @@ +#!/bin/bash + +# Relational pipes +# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# Quite dirty hack to get some information about given PDF file +# TODO: better field names, more stable API +# TODO: call a PDF library rather than parse output of a commandline tool with human readable output + +if [[ $# == 0 ]]; then + echo "1"; + if [[ "x$field" == "xPages" ]]; then echo "integer"; + elif [[ -z "${field+x}" ]]; then echo "boolean"; + else echo "string"; + fi +elif [[ -f "$1" || -d "$1" ]]; then + info="`pdfinfo -isodates "$1"`"; + valid=$?; + if [[ "x$field" == "xPages" ]]; then + if [[ $valid == 0 ]]; then + echo "$info" | grep "^$field:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n'; + else + printf 0; + # exit 40; # TODO: null + fi + elif [[ -z "${field+x}" ]]; then + if [[ $valid == 0 ]]; then printf "true"; else printf "false"; fi + else + echo "$info" | grep "^$field:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n'; + fi +else + exit 40; +fi diff -r 532953173cd5 -r 9172bd97ae99 script-examples/__relpipe_in_filesystem_script_xpath --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script-examples/__relpipe_in_filesystem_script_xpath Mon Nov 11 14:42:13 2019 +0100 @@ -0,0 +1,51 @@ +#!/usr/bin/perl + +# Relational pipes +# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; +use warnings; + +use XML::LibXML; # documentation: https://metacpan.org/pod/XML::LibXML + +if (@ARGV == 0) { + print "1\n"; + if ($ENV{type}) { print "$ENV{type}\n"; } else { print "string\n"; } +} else { + my $dom = XML::LibXML->new->parse_file($ARGV[0]); + my $xpath = XML::LibXML::XPathContext->new($dom); + + # You can add your favorite XML namespaces here: + # $xpath->registerNs('relpipe', 'tag:globalcode.info,2018:relpipe'); + # $xpath->registerNs('xhtml', 'http://www.w3.org/1999/xhtml'); + # $xpath->registerNs('svg', 'http://www.w3.org/2000/svg'); + # $xpath->registerNs('atom', 'http://www.w3.org/2005/Atom'); + # $xpath->registerNs('maven', 'http://maven.apache.org/POM/4.0.0'); + # + # Or set environmental variables: + # export xmlns_r='tag:globalcode.info,2018:relpipe' + + # Load XML namespaces from options: + # usage: --option 'env:xmlns_r' 'tag:globalcode.info,2018:relpipe' → r="tag:globalcode.info,2018:relpipe" + for my $name (keys %ENV) { + if ($name =~ /xmlns_(.*)/) { $xpath->registerNs($1, $ENV{$name}); } + } + + # Execute XPath and concatenate results (usually should be only one): + # usage: --option env:xpath '//r:name' + for my $value ($xpath->find($ENV{xpath})) { + print $value; + } +} diff -r 532953173cd5 -r 9172bd97ae99 src/CLIParser.h --- a/src/CLIParser.h Sun Nov 10 22:55:42 2019 +0100 +++ b/src/CLIParser.h Mon Nov 11 14:42:13 2019 +0100 @@ -52,6 +52,7 @@ static const string_t OPTION_FILE; static const string_t OPTION_XATTR; static const string_t OPTION_HASH; + static const string_t OPTION_SCRIPT; static const string_t OPTION_AS; static const string_t OPTION_OPTION; static const string_t OPTION_RELATION; @@ -68,7 +69,7 @@ for (int i = 0; i < arguments.size();) { string_t option = readNext(arguments, i); - if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR || option == CLIParser::OPTION_HASH) { + if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR || option == CLIParser::OPTION_HASH || option == CLIParser::OPTION_SCRIPT) { addField(c, currentGroup, currentName, currentAliases, currentOptions); // previous field currentGroup = option.substr(2); // cut off -- currentName = readNext(arguments, i); @@ -114,6 +115,7 @@ const string_t CLIParser::OPTION_FILE = L"--" + RequestedField::GROUP_FILE; const string_t CLIParser::OPTION_XATTR = L"--" + RequestedField::GROUP_XATTR; const string_t CLIParser::OPTION_HASH = L"--" + RequestedField::GROUP_HASH; +const string_t CLIParser::OPTION_SCRIPT = L"--" + RequestedField::GROUP_SCRIPT; const string_t CLIParser::OPTION_AS = L"--as"; const string_t CLIParser::OPTION_OPTION = L"--option"; const string_t CLIParser::OPTION_RELATION = L"--relation"; diff -r 532953173cd5 -r 9172bd97ae99 src/FilesystemCommand.h --- a/src/FilesystemCommand.h Sun Nov 10 22:55:42 2019 +0100 +++ b/src/FilesystemCommand.h Mon Nov 11 14:42:13 2019 +0100 @@ -38,6 +38,7 @@ #include "FileAttributeFinder.h" #include "XattrAttributeFinder.h" #include "HashAttributeFinder.h" +#include "ScriptAttributeFinder.h" namespace relpipe { namespace in { @@ -52,11 +53,13 @@ FileAttributeFinder fileAttributeFinder; HashAttributeFinder hashAttributeFinder; + ScriptAttributeFinder scriptAttributeFinder; XattrAttributeFinder xattrAttributeFinder; std::map attributeFinders{ {RequestedField::GROUP_FILE, &fileAttributeFinder}, {RequestedField::GROUP_HASH, &hashAttributeFinder}, + {RequestedField::GROUP_SCRIPT, &scriptAttributeFinder}, {RequestedField::GROUP_XATTR, &xattrAttributeFinder}}; void reset(std::stringstream& stream) { diff -r 532953173cd5 -r 9172bd97ae99 src/RequestedField.h --- a/src/RequestedField.h Sun Nov 10 22:55:42 2019 +0100 +++ b/src/RequestedField.h Mon Nov 11 14:42:13 2019 +0100 @@ -31,6 +31,7 @@ static const string_t GROUP_FILE; static const string_t GROUP_XATTR; static const string_t GROUP_HASH; + static const string_t GROUP_SCRIPT; string_t group; string_t name; std::vector aliases; @@ -58,6 +59,7 @@ const string_t RequestedField::GROUP_FILE = L"file"; const string_t RequestedField::GROUP_XATTR = L"xattr"; const string_t RequestedField::GROUP_HASH = L"hash"; +const string_t RequestedField::GROUP_SCRIPT = L"script"; } } diff -r 532953173cd5 -r 9172bd97ae99 src/ScriptAttributeFinder.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ScriptAttributeFinder.h Mon Nov 11 14:42:13 2019 +0100 @@ -0,0 +1,123 @@ +/** + * Relational pipes + * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "RequestedField.h" +#include "SystemProcess.h" +#include "AttributeFinder.h" + +namespace relpipe { +namespace in { +namespace filesystem { + +namespace fs = std::filesystem; +using namespace relpipe::writer; + +class ScriptAttributeFinder : public AttributeFinder { +private: + std::wstring_convert> convertor; // TODO: support also other encodings. + + std::string getScriptCommand(const RequestedField& field) { + return SCRIPT_PREFIX + convertor.to_bytes(field.name); + } + + std::vector toEnvironmentalVariables(const std::vector& vector) { + std::vector result; + for (int i = 0; i < vector.size();) { + string_t name = vector[i++]; + string_t value = vector[i++]; + if (name.rfind(L"env:" == 0)) { + result.push_back(convertor.to_bytes(name.substr(4))); + result.push_back(convertor.to_bytes(value)); + } + } + return result; + } + + TypeId getAttributeType(const RequestedField& field, const string_t& alias) { + // TODO: put latest supported version in the environmental variable + // TODO: put alias in the environmental variable + SystemProcess process({getScriptCommand(field)}, toEnvironmentalVariables(field.options)); + std::string output = process.execute(); + std::regex pattern("(.*)\\n(.*)\\n"); + std::smatch match; + std::regex_match(output, match, pattern); + if (match.ready() && match[1] == "1") { + // TODO: move to a common library + if (match[2] == "boolean") return TypeId::BOOLEAN; + if (match[2] == "integer") return TypeId::INTEGER; + if (match[2] == "string") return TypeId::STRING; + throw RelpipeWriterException(L"Unsupported script data type – field: „" + field.name + L"“ type: „" + convertor.from_bytes(match[2]) + L"“"); + } else { + throw RelpipeWriterException(L"Unsupported script version – field: „" + field.name + L"“ output: „" + convertor.from_bytes(output) + L"“"); + } + + } + + string_t getScriptOutput(const fs::path& file, const RequestedField& field, const string_t& alias) { + try { + // TODO: put alias in the environmental variable + SystemProcess process({getScriptCommand(field), currentFileRaw}, toEnvironmentalVariables(field.options)); + return convertor.from_bytes(process.execute()); + } catch (relpipe::cli::RelpipeCLIException& e) { + // TODO: print warnings? + // TODO: do not fork/exec if the file is not readable + return L""; + } + } +protected: + + virtual void writeFieldOfExistingFile(RelationalWriter* writer, const RequestedField& field) override { + // TODO: paralelization? + if (field.group == RequestedField::GROUP_SCRIPT) { + for (string_t alias : field.getAliases()) { + writer->writeAttribute(getScriptOutput(currentFile, field, alias)); + } + } + } + +public: + + static const std::string SCRIPT_PREFIX; + + virtual vector toMetadata(const RequestedField& field) override { + if (field.group == RequestedField::GROUP_SCRIPT) { + vector metadata; + for (string_t alias : field.getAliases()) metadata.push_back(AttributeMetadata{alias, getAttributeType(field, alias)}); + return metadata; + } else { + return {}; + } + } + + virtual ~ScriptAttributeFinder() override { + } +}; + +const std::string ScriptAttributeFinder::SCRIPT_PREFIX = "__relpipe_in_filesystem_script_"; + +} +} +} diff -r 532953173cd5 -r 9172bd97ae99 src/SystemProcess.h --- a/src/SystemProcess.h Sun Nov 10 22:55:42 2019 +0100 +++ b/src/SystemProcess.h Mon Nov 11 14:42:13 2019 +0100 @@ -39,6 +39,7 @@ * the command + its arguments */ std::vector commandLine; + std::vector environment; int nullFile = -1; /** @@ -87,7 +88,7 @@ public: - SystemProcess(std::vector commandLine) : commandLine(commandLine) { + SystemProcess(const std::vector& commandLine, const std::vector& environment = {}) : commandLine(commandLine), environment(environment) { nullFile = open("/dev/null", O_RDWR); } @@ -99,10 +100,12 @@ std::stringstream result; + // FIXME: different kinds of exception or return the exit code (now it enters infinite loop if the execp() fails) + // TODO: rename (not specific to hash) int hashReaderFD; int hashWriterFD; createPipe(hashReaderFD, hashWriterFD); - + __pid_t hashPid = fork(); if (hashPid < 0) { @@ -113,6 +116,11 @@ redirectFD(nullFile, STDIN_FILENO); redirectFD(nullFile, STDERR_FILENO); redirectFD(hashWriterFD, STDOUT_FILENO); + for (int i = 0; i < environment.size();) { + std::string name = environment[i++]; + std::string value = environment[i++]; + setenv(name.c_str(), value.c_str(), true); + } execp(commandLine); } else { // Parent process @@ -120,9 +128,9 @@ __gnu_cxx::stdio_filebuf hashReaderBuffer(hashReaderFD, std::ios::in); std::istream hashReader(&hashReaderBuffer); - + for (char ch; hashReader.read(&ch, 1).good();) result.put(ch); - + int waitError; __pid_t waitPID = wait(&waitError); if (waitError) throw relpipe::cli::RelpipeCLIException(L"The child process returned an error exit code.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?