custom scripts for additional attributes v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Mon, 11 Nov 2019 14:42:13 +0100
branchv_0
changeset 28 9172bd97ae99
parent 27 532953173cd5
child 29 6f15f18d2abf
custom scripts for additional attributes
bash-completion.sh
nbproject/configurations.xml
script-examples/__relpipe_in_filesystem_script_inode
script-examples/__relpipe_in_filesystem_script_mime-type
script-examples/__relpipe_in_filesystem_script_pdf
script-examples/__relpipe_in_filesystem_script_xpath
src/CLIParser.h
src/FilesystemCommand.h
src/RequestedField.h
src/ScriptAttributeFinder.h
src/SystemProcess.h
--- a/bash-completion.sh	Sun Nov 10 22:55:42 2019 +0100
+++ b/bash-completion.sh	Mon Nov 11 14:42:13 2019 +0100
@@ -13,6 +13,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 
+_relpipe_in_filesystem_scripts() {
+	while read c; do echo ${c:31}; done < <(compgen -c "__relpipe_in_filesystem_script_");
+}
+
 _relpipe_in_filesystem_completion() {
 	local w0 w1 w2
 
@@ -65,12 +69,14 @@
 	elif [[ "$w1" == "--file"                           ]];    then COMPREPLY=($(compgen -W "${FILE_FIELDS[*]}"  -- "$w0"))
 	elif [[ "$w1" == "--xattr"                          ]];    then COMPREPLY=($(compgen -W "${XATTR_FIELDS[*]}" -- "$w0"))
 	elif [[ "$w1" == "--hash"                           ]];    then COMPREPLY=($(compgen -W "${HASH_FIELDS[*]}" -- "$w0"))
+	elif [[ "$w1" == "--script"                         ]];    then COMPREPLY=($(compgen -W "$(_relpipe_in_filesystem_scripts)" -- "$w0"))
 	else
 		OPTIONS=(
 			"--relation"
 			"--file"
 			"--xattr"
 			"--hash"
+			"--script"
 			"--as"
 			"--option"
 		)
--- a/nbproject/configurations.xml	Sun Nov 10 22:55:42 2019 +0100
+++ b/nbproject/configurations.xml	Mon Nov 11 14:42:13 2019 +0100
@@ -48,6 +48,7 @@
         <in>FileAttributeFinder.h</in>
         <in>HashAttributeFinder.h</in>
         <in>RequestedField.h</in>
+        <in>ScriptAttributeFinder.h</in>
         <in>SystemProcess.h</in>
         <in>XattrAttributeFinder.h</in>
         <in>relpipe-in-filesystem.cpp</in>
@@ -111,6 +112,8 @@
       </item>
       <item path="src/RequestedField.h" ex="false" tool="3" flavor2="0">
       </item>
+      <item path="src/ScriptAttributeFinder.h" ex="false" tool="3" flavor2="0">
+      </item>
       <item path="src/SystemProcess.h" ex="false" tool="3" flavor2="0">
       </item>
       <item path="src/XattrAttributeFinder.h" ex="false" tool="3" flavor2="0">
@@ -166,6 +169,8 @@
       </item>
       <item path="src/RequestedField.h" ex="false" tool="3" flavor2="0">
       </item>
+      <item path="src/ScriptAttributeFinder.h" ex="false" tool="3" flavor2="0">
+      </item>
       <item path="src/SystemProcess.h" ex="false" tool="3" flavor2="0">
       </item>
       <item path="src/XattrAttributeFinder.h" ex="false" tool="3" flavor2="0">
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script-examples/__relpipe_in_filesystem_script_inode	Mon Nov 11 14:42:13 2019 +0100
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# returns the inode number of given file
+# not very useful – just a demo returning an integer attribute
+
+if [[ $# == 0 ]]; then
+	echo "1";
+	echo "integer";
+elif [[ -f "$1" || -d "$1" ]]; then
+	ls -d -i "$1" | cut -d' ' -f1 | tr -d '\n';
+else
+	exit 40;
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script-examples/__relpipe_in_filesystem_script_mime-type	Mon Nov 11 14:42:13 2019 +0100
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# returns the MIME type of given file
+
+if [[ $# == 0 ]]; then
+	echo "1";
+	echo "string";
+elif [[ -f "$1" || -d "$1" ]]; then
+	file --preserve-date --brief --mime-type --dereference "$1" | tr -d '\n';
+else
+	exit 40;
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script-examples/__relpipe_in_filesystem_script_pdf	Mon Nov 11 14:42:13 2019 +0100
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Relational pipes
+# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# Quite dirty hack to get some information about given PDF file
+# TODO: better field names, more stable API
+# TODO: call a PDF library rather than parse output of a commandline tool with human readable output
+
+if [[ $# == 0 ]]; then
+	echo "1";
+	if [[ "x$field" == "xPages" ]]; then echo "integer";
+	elif [[ -z "${field+x}" ]]; then echo "boolean";
+	else echo "string";
+	fi
+elif [[ -f "$1" || -d "$1" ]]; then
+	info="`pdfinfo -isodates "$1"`";
+	valid=$?;
+	if [[ "x$field" == "xPages" ]]; then
+		if [[ $valid == 0 ]]; then
+			echo "$info" | grep "^$field:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';
+		else
+			printf 0;
+			# exit 40; # TODO: null
+		fi
+	elif [[ -z "${field+x}" ]]; then
+		if [[ $valid == 0 ]]; then printf "true"; else printf "false"; fi
+	else
+		echo "$info" | grep "^$field:" | sed -E 's/[^:]+:\s+(.*)/\1/g' | tr -d '\n';
+	fi
+else
+	exit 40;
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script-examples/__relpipe_in_filesystem_script_xpath	Mon Nov 11 14:42:13 2019 +0100
@@ -0,0 +1,51 @@
+#!/usr/bin/perl
+
+# Relational pipes
+# Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use strict;
+use warnings;
+
+use XML::LibXML; # documentation: https://metacpan.org/pod/XML::LibXML
+
+if (@ARGV == 0) {
+	print "1\n";
+	if ($ENV{type}) { print "$ENV{type}\n"; } else { print "string\n"; }
+} else {
+	my $dom = XML::LibXML->new->parse_file($ARGV[0]);
+	my $xpath = XML::LibXML::XPathContext->new($dom);
+	
+	# You can add your favorite XML namespaces here:
+	# $xpath->registerNs('relpipe',     'tag:globalcode.info,2018:relpipe');
+	# $xpath->registerNs('xhtml',       'http://www.w3.org/1999/xhtml');
+	# $xpath->registerNs('svg',         'http://www.w3.org/2000/svg');
+	# $xpath->registerNs('atom',        'http://www.w3.org/2005/Atom');
+	# $xpath->registerNs('maven',       'http://maven.apache.org/POM/4.0.0');
+	#
+	# Or set environmental variables:
+	# export xmlns_r='tag:globalcode.info,2018:relpipe'
+
+	# Load XML namespaces from options:
+	# usage: --option 'env:xmlns_r' 'tag:globalcode.info,2018:relpipe' → r="tag:globalcode.info,2018:relpipe"
+	for my $name (keys %ENV) {
+		if ($name =~ /xmlns_(.*)/) { $xpath->registerNs($1, $ENV{$name}); }
+	}
+
+	# Execute XPath and concatenate results (usually should be only one):
+	# usage: --option env:xpath '//r:name'
+	for my $value ($xpath->find($ENV{xpath})) {
+		print $value;
+	}
+}
--- a/src/CLIParser.h	Sun Nov 10 22:55:42 2019 +0100
+++ b/src/CLIParser.h	Mon Nov 11 14:42:13 2019 +0100
@@ -52,6 +52,7 @@
 	static const string_t OPTION_FILE;
 	static const string_t OPTION_XATTR;
 	static const string_t OPTION_HASH;
+	static const string_t OPTION_SCRIPT;
 	static const string_t OPTION_AS;
 	static const string_t OPTION_OPTION;
 	static const string_t OPTION_RELATION;
@@ -68,7 +69,7 @@
 			for (int i = 0; i < arguments.size();) {
 				string_t option = readNext(arguments, i);
 
-				if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR || option == CLIParser::OPTION_HASH) {
+				if (option == CLIParser::OPTION_FILE || option == CLIParser::OPTION_XATTR || option == CLIParser::OPTION_HASH || option == CLIParser::OPTION_SCRIPT) {
 					addField(c, currentGroup, currentName, currentAliases, currentOptions); // previous field
 					currentGroup = option.substr(2); // cut off --
 					currentName = readNext(arguments, i);
@@ -114,6 +115,7 @@
 const string_t CLIParser::OPTION_FILE = L"--" + RequestedField::GROUP_FILE;
 const string_t CLIParser::OPTION_XATTR = L"--" + RequestedField::GROUP_XATTR;
 const string_t CLIParser::OPTION_HASH = L"--" + RequestedField::GROUP_HASH;
+const string_t CLIParser::OPTION_SCRIPT = L"--" + RequestedField::GROUP_SCRIPT;
 const string_t CLIParser::OPTION_AS = L"--as";
 const string_t CLIParser::OPTION_OPTION = L"--option";
 const string_t CLIParser::OPTION_RELATION = L"--relation";
--- a/src/FilesystemCommand.h	Sun Nov 10 22:55:42 2019 +0100
+++ b/src/FilesystemCommand.h	Mon Nov 11 14:42:13 2019 +0100
@@ -38,6 +38,7 @@
 #include "FileAttributeFinder.h"
 #include "XattrAttributeFinder.h"
 #include "HashAttributeFinder.h"
+#include "ScriptAttributeFinder.h"
 
 namespace relpipe {
 namespace in {
@@ -52,11 +53,13 @@
 
 	FileAttributeFinder fileAttributeFinder;
 	HashAttributeFinder hashAttributeFinder;
+	ScriptAttributeFinder scriptAttributeFinder;
 	XattrAttributeFinder xattrAttributeFinder;
 
 	std::map<string_t, AttributeFinder*> attributeFinders{
 		{RequestedField::GROUP_FILE, &fileAttributeFinder},
 		{RequestedField::GROUP_HASH, &hashAttributeFinder},
+		{RequestedField::GROUP_SCRIPT, &scriptAttributeFinder},
 		{RequestedField::GROUP_XATTR, &xattrAttributeFinder}};
 
 	void reset(std::stringstream& stream) {
--- a/src/RequestedField.h	Sun Nov 10 22:55:42 2019 +0100
+++ b/src/RequestedField.h	Mon Nov 11 14:42:13 2019 +0100
@@ -31,6 +31,7 @@
 	static const string_t GROUP_FILE;
 	static const string_t GROUP_XATTR;
 	static const string_t GROUP_HASH;
+	static const string_t GROUP_SCRIPT;
 	string_t group;
 	string_t name;
 	std::vector<string_t> aliases;
@@ -58,6 +59,7 @@
 const string_t RequestedField::GROUP_FILE = L"file";
 const string_t RequestedField::GROUP_XATTR = L"xattr";
 const string_t RequestedField::GROUP_HASH = L"hash";
+const string_t RequestedField::GROUP_SCRIPT = L"script";
 
 }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ScriptAttributeFinder.h	Mon Nov 11 14:42:13 2019 +0100
@@ -0,0 +1,123 @@
+/**
+ * Relational pipes
+ * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <vector>
+#include <filesystem>
+
+#include <relpipe/writer/typedefs.h>
+#include <relpipe/writer/AttributeMetadata.h>
+#include <relpipe/writer/RelationalWriter.h>
+#include <regex>
+
+#include "RequestedField.h"
+#include "SystemProcess.h"
+#include "AttributeFinder.h"
+
+namespace relpipe {
+namespace in {
+namespace filesystem {
+
+namespace fs = std::filesystem;
+using namespace relpipe::writer;
+
+class ScriptAttributeFinder : public AttributeFinder {
+private:
+	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
+
+	std::string getScriptCommand(const RequestedField& field) {
+		return SCRIPT_PREFIX + convertor.to_bytes(field.name);
+	}
+
+	std::vector<std::string> toEnvironmentalVariables(const std::vector<string_t>& vector) {
+		std::vector<std::string> result;
+		for (int i = 0; i < vector.size();) {
+			string_t name = vector[i++];
+			string_t value = vector[i++];
+			if (name.rfind(L"env:" == 0)) {
+				result.push_back(convertor.to_bytes(name.substr(4)));
+				result.push_back(convertor.to_bytes(value));
+			}
+		}
+		return result;
+	}
+
+	TypeId getAttributeType(const RequestedField& field, const string_t& alias) {
+		// TODO: put latest supported version in the environmental variable
+		// TODO: put alias in the environmental variable
+		SystemProcess process({getScriptCommand(field)}, toEnvironmentalVariables(field.options));
+		std::string output = process.execute();
+		std::regex pattern("(.*)\\n(.*)\\n");
+		std::smatch match;
+		std::regex_match(output, match, pattern);
+		if (match.ready() && match[1] == "1") {
+			// TODO: move to a common library
+			if (match[2] == "boolean") return TypeId::BOOLEAN;
+			if (match[2] == "integer") return TypeId::INTEGER;
+			if (match[2] == "string") return TypeId::STRING;
+			throw RelpipeWriterException(L"Unsupported script data type – field: „" + field.name + L"“ type: „" + convertor.from_bytes(match[2]) + L"“");
+		} else {
+			throw RelpipeWriterException(L"Unsupported script version – field: „" + field.name + L"“ output: „" + convertor.from_bytes(output) + L"“");
+		}
+
+	}
+
+	string_t getScriptOutput(const fs::path& file, const RequestedField& field, const string_t& alias) {
+		try {
+			// TODO: put alias in the environmental variable
+			SystemProcess process({getScriptCommand(field), currentFileRaw}, toEnvironmentalVariables(field.options));
+			return convertor.from_bytes(process.execute());
+		} catch (relpipe::cli::RelpipeCLIException& e) {
+			// TODO: print warnings?
+			// TODO: do not fork/exec if the file is not readable
+			return L"";
+		}
+	}
+protected:
+
+	virtual void writeFieldOfExistingFile(RelationalWriter* writer, const RequestedField& field) override {
+		// TODO: paralelization?
+		if (field.group == RequestedField::GROUP_SCRIPT) {
+			for (string_t alias : field.getAliases()) {
+				writer->writeAttribute(getScriptOutput(currentFile, field, alias));
+			}
+		}
+	}
+
+public:
+
+	static const std::string SCRIPT_PREFIX;
+
+	virtual vector<AttributeMetadata> toMetadata(const RequestedField& field) override {
+		if (field.group == RequestedField::GROUP_SCRIPT) {
+			vector<AttributeMetadata> metadata;
+			for (string_t alias : field.getAliases()) metadata.push_back(AttributeMetadata{alias, getAttributeType(field, alias)});
+			return metadata;
+		} else {
+			return {};
+		}
+	}
+
+	virtual ~ScriptAttributeFinder() override {
+	}
+};
+
+const std::string ScriptAttributeFinder::SCRIPT_PREFIX = "__relpipe_in_filesystem_script_";
+
+}
+}
+}
--- a/src/SystemProcess.h	Sun Nov 10 22:55:42 2019 +0100
+++ b/src/SystemProcess.h	Mon Nov 11 14:42:13 2019 +0100
@@ -39,6 +39,7 @@
 	 * the command + its arguments
 	 */
 	std::vector<std::string> commandLine;
+	std::vector<std::string> environment;
 	int nullFile = -1;
 
 	/**
@@ -87,7 +88,7 @@
 
 public:
 
-	SystemProcess(std::vector<std::string> commandLine) : commandLine(commandLine) {
+	SystemProcess(const std::vector<std::string>& commandLine, const std::vector<std::string>& environment = {}) : commandLine(commandLine), environment(environment) {
 		nullFile = open("/dev/null", O_RDWR);
 	}
 
@@ -99,10 +100,12 @@
 
 		std::stringstream result;
 
+		// FIXME: different kinds of exception or return the exit code (now it enters infinite loop if the execp() fails)
+		// TODO: rename (not specific to hash)
 		int hashReaderFD;
 		int hashWriterFD;
 		createPipe(hashReaderFD, hashWriterFD);
-		
+
 		__pid_t hashPid = fork();
 
 		if (hashPid < 0) {
@@ -113,6 +116,11 @@
 			redirectFD(nullFile, STDIN_FILENO);
 			redirectFD(nullFile, STDERR_FILENO);
 			redirectFD(hashWriterFD, STDOUT_FILENO);
+			for (int i = 0; i < environment.size();) {
+				std::string name = environment[i++];
+				std::string value = environment[i++];
+				setenv(name.c_str(), value.c_str(), true);
+			}
 			execp(commandLine);
 		} else {
 			// Parent process
@@ -120,9 +128,9 @@
 
 			__gnu_cxx::stdio_filebuf<char> hashReaderBuffer(hashReaderFD, std::ios::in);
 			std::istream hashReader(&hashReaderBuffer);
-			
+
 			for (char ch; hashReader.read(&ch, 1).good();) result.put(ch);
-			
+
 			int waitError;
 			__pid_t waitPID = wait(&waitError);
 			if (waitError) throw relpipe::cli::RelpipeCLIException(L"The child process returned an error exit code.", relpipe::cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exception?