src/AwkHandler.h
author František Kučera <franta-hg@frantovo.cz>
Sat, 11 May 2019 22:55:21 +0200
branchv_0
changeset 24 c805c968b7ed
parent 23 bc20d499a458
child 25 13a1e1134797
permissions -rw-r--r--
escape reserved AWK and our names: prefix conflicting variables with _
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
 * the Free Software Foundation, either version 3 of the License, or
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 * (at your option) any later version.
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 *
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * This program is distributed in the hope that it will be useful,
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 * GNU General Public License for more details.
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 *
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
 */
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#pragma once
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
    20
#include<functional>
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <memory>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <string>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <vector>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
#include <iostream>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <sstream>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <locale>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
#include <codecvt>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <regex>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    30
#include <unistd.h>
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    31
#include <wait.h>
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    32
#include <ext/stdio_filebuf.h>
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    33
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
#include <relpipe/reader/typedefs.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
#include <relpipe/reader/TypeId.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
#include <relpipe/reader/handlers/AttributeMetadata.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
#include <relpipe/writer/Factory.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    41
#include <relpipe/cli/RelpipeCLIException.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    42
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    43
#include "Configuration.h"
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    44
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
namespace relpipe {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    46
namespace tr {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
namespace awk {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    48
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    49
using namespace std;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    50
using namespace relpipe;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    51
using namespace relpipe::reader;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    52
using namespace relpipe::reader::handlers;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    53
5
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    54
/**
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    55
 * This transformation consists of three processes connected together using pipes.
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    56
 * 
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    57
 * Processes:
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    58
 *	- Parent: process RelationalReaderStringHandler events (read relational data, usually from STDIN) and pass them in the special text format to the pipe1
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    59
 *  - AWK: external program (/usr/bin/awk), read from the pipe1, writes to the pipe2
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    60
 *  - Writer: reads from the pipe2 and writes to relationalWriter (relational data, usually to STDOUT)
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    61
 */
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    62
class AwkHandler : public RelationalReaderStringHandler {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    63
private:
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    64
	Configuration configuration;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    65
	writer::RelationalWriter* relationalWriter;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
    66
	std::function<void() > relationalWriterFlush;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
    67
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    68
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    69
	int awkInputWriterFD = -1;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
    70
	RelationConfiguration* currentRelationConfiguration = nullptr;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
    71
	std::vector<AttributeMetadata> currentReaderMetadata;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
    72
	vector<writer::AttributeMetadata> currentWriterMetadata;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
    73
	integer_t currentAttributeIndex = 0;
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    74
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    75
	void createPipe(int& readerFD, int& writerFD) {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    76
		int fds[2];
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    77
		int result = pipe(fds);
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    78
		readerFD = fds[0];
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    79
		writerFD = fds[1];
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    80
		if (result < 0) throw cli::RelpipeCLIException(L"Unable to create a pipe.", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    81
	}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    82
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    83
	void redirectFD(int oldfd, int newfd) {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    84
		int result = dup2(oldfd, newfd);
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    85
		if (result < 0) throw cli::RelpipeCLIException(L"Unable redirect FD.", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    86
	}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    87
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    88
	void closeOrThrow(int fd) {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    89
		int error = close(fd);
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    90
		if (error) throw cli::RelpipeCLIException(L"Unable to close FD: " + to_wstring(fd) + L" from PID: " + to_wstring(getpid()), cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    91
	}
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    92
11
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    93
	void execp(const std::vector<std::string>& args) {
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    94
		const char** a = new const char*[args.size() + 1];
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    95
		for (size_t i = 0; i < args.size(); i++) a[i] = args[i].c_str();
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    96
		a[args.size()] = nullptr;
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    97
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    98
		execvp(a[0], (char*const*) a);
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    99
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   100
		delete[] a;
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   101
		throw cli::RelpipeCLIException(L"Unable to do execvp().", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   102
	}
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   103
18
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   104
	/* TODO: move to lib-cli when stable and used in other modules */
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   105
	void setEnv(const char * name, const string_t& value) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   106
		setenv(name, convertor.to_bytes(value).c_str(), true);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   107
	}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   108
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   109
	/* TODO: move to lib-cli when stable and used in other modules */
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   110
	void setEnv(const char * name, std::string& value) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   111
		setenv(name, value.c_str(), true);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   112
	}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   113
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   114
	/* TODO: move to lib-cli when stable and used in other modules */
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   115
	void setEnv(const char * name, const integer_t& value) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   116
		setenv(name, to_string(value).c_str(), true);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   117
	}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   118
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   119
	void addDefinition(std::vector<std::string>& awkCliArgs, DefinitionRecipe& d) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   120
		awkCliArgs.push_back("-v");
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   121
		awkCliArgs.push_back(convertor.to_bytes(a2v(d.name) + L"=" + d.value));
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   122
	}
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   123
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   124
	void add(vector<AttributeMetadata>& readerAttributes, vector<writer::AttributeMetadata>& writerAttributes) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   125
		for (AttributeMetadata readerAttributes : readerAttributes)
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   126
			writerAttributes.push_back({
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   127
				readerAttributes.getAttributeName(),
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   128
				relationalWriter->toTypeId(readerAttributes.getTypeName())
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   129
			});
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   130
	}
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   131
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   132
	void cleanUp() {
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   133
		if (awkInputWriterFD >= 0) {
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   134
			closeOrThrow(awkInputWriterFD);
22
98acfdc4c20b some TODO → FIXME
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   135
			// FIXME: check exit codes
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   136
			__pid_t waitResult1 = wait(NULL);
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   137
			__pid_t waitResult2 = wait(NULL);
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   138
			awkInputWriterFD = -1;
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   139
		}
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   140
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   141
		currentAttributeIndex = 0;
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   142
		currentReaderMetadata.clear();
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   143
		currentWriterMetadata.clear();
19
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   144
		currentRelationConfiguration = nullptr;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   145
	}
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   146
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   147
	string_t a2v(const string_t& attributeName) {
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   148
		// cat awkgram.y | awk -v FS='\\{"|",' -v ORS='|' '/static const struct token tokentab/, /\};/ { if (/^\{/) { print $2} }'
24
c805c968b7ed escape reserved AWK and our names: prefix conflicting variables with _
František Kučera <franta-hg@frantovo.cz>
parents: 23
diff changeset
   149
		std::wregex awkReservedNames(L"BEGIN|BEGINFILE|END|ENDFILE|adump|and|asort|asorti|atan2|bindtextdomain|break|case|close|compl|continue|cos|dcgettext|dcngettext|default|delete|do|else|eval|exit|exp|fflush|for|func|function|gensub|getline|gsub|if|in|include|index|int|intdiv0|isarray|length|load|log|lshift|match|mktime|namespace|next|nextfile|or|patsplit|print|printf|rand|return|rshift|sin|split|sprintf|sqrt|srand|stopme|strftime|strtonum|sub|substr|switch|system|systime|tolower|toupper|typeof|while|xor");
c805c968b7ed escape reserved AWK and our names: prefix conflicting variables with _
František Kučera <franta-hg@frantovo.cz>
parents: 23
diff changeset
   150
		std::wregex trReservedNames(L"_escape|_unescape|_readVariables|_writeVariables|record");
c805c968b7ed escape reserved AWK and our names: prefix conflicting variables with _
František Kučera <franta-hg@frantovo.cz>
parents: 23
diff changeset
   151
		if (regex_match(attributeName, awkReservedNames) || regex_match(attributeName, trReservedNames)) return a2v(L"_" + attributeName);
c805c968b7ed escape reserved AWK and our names: prefix conflicting variables with _
František Kučera <franta-hg@frantovo.cz>
parents: 23
diff changeset
   152
		else return attributeName;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   153
	}
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   154
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   155
	string_t escapeAwkValue(const string_t& value) {
23
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   156
		std::wstringstream escaped;
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   157
		for (wchar_t ch : value) {
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   158
			if (ch == '\t') escaped << L"\\t";
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   159
			else if (ch == '\n') escaped << L"\\n";
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   160
			else if (ch == '\\') escaped << L"\\\\";
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   161
			else escaped << ch;
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   162
		}
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   163
		return escaped.str();
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   164
	}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   165
20
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   166
	void processAwkOutput(int awkOutputReaderFD) {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   167
		locale::global(locale("")); // needed for processing unicode texts, otherwise getline() stopped working on first line with non-ascii characters; TODO: move somewhere else?
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   168
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   169
		__gnu_cxx::stdio_filebuf<wchar_t> awkOutputReaderBuffer(awkOutputReaderFD, std::ios::in);
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   170
		std::wistream awkOutputReader(&awkOutputReaderBuffer);
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   171
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   172
		if (currentRelationConfiguration->drop) {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   173
			for (wchar_t ch; awkOutputReader.read(&ch, 1).good();); // just eat the lines from the AWK
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   174
		} else {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   175
			std::wstringstream currentValue;
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   176
			for (wchar_t ch; awkOutputReader.read(&ch, 1).good();) {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   177
				if (ch == '\t' || ch == '\n') {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   178
					relationalWriter->writeAttribute(currentValue.str());
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   179
					currentValue.str(L"");
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   180
					currentValue.clear();
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   181
				} else if (ch == '\\') {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   182
					ch = awkOutputReader.get();
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   183
					if (ch == 't') currentValue << L'\t';
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   184
					else if (ch == 'n') currentValue << L'\n';
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   185
					else if (ch == '\\') currentValue << L'\\';
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   186
					else throw cli::RelpipeCLIException(L"Unknown escape sequence. Only \\t, \\n and \\\\ are supported.", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   187
				} else {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   188
					currentValue << ch;
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   189
				}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   190
			}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   191
		}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   192
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   193
		closeOrThrow(awkOutputReaderFD);
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   194
	}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   195
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   196
public:
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   197
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   198
	/**
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   199
	 * @param relationalWriter
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   200
	 * @param relationalWriterFlush the writer must be flushed before fork() in order to 
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   201
	 * avoid duplicate output (otherwise single relation might be written from two processes); 
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   202
	 * This is a little hack – if it stops working, we should reconnect the pipes 
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   203
	 * and use the writer only from a single process and avoid its effective duplication,
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   204
	 * or use different writers for each relation (or process).
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   205
	 * @param configuration
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   206
	 */
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   207
	AwkHandler(writer::RelationalWriter* relationalWriter, std::function<void() > relationalWriterFlush, Configuration& configuration) : relationalWriter(relationalWriter), relationalWriterFlush(relationalWriterFlush), configuration(configuration) {
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   208
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   209
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   210
	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   211
		cleanUp();
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   212
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   213
		for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   214
			if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   215
				currentRelationConfiguration = &configuration.relationConfigurations[i];
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   216
				break; // it there are multiple matches, only the first configuration is used
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   217
			}
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   218
		}
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   219
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   220
		currentReaderMetadata = attributes;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   221
		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)		
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   222
		if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) {
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   223
			if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata);
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   224
			currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end());
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   225
			if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata);
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   226
		} else {
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   227
			add(currentReaderMetadata, currentWriterMetadata);
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   228
		}
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   229
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   230
		if (currentRelationConfiguration) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   231
			int awkInputReaderFD;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   232
			int awkOutputReaderFD;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   233
			int awkOutputWriterFD;
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   234
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   235
			createPipe(awkInputReaderFD, awkInputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   236
			createPipe(awkOutputReaderFD, awkOutputWriterFD);
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   237
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   238
			relationalWriterFlush();
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   239
			__pid_t awkPid = fork();
11
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   240
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   241
			if (awkPid < 0) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   242
				throw cli::RelpipeCLIException(L"Unable to fork AWK process.", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   243
			} else if (awkPid == 0) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   244
				// AWK child process
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   245
				closeOrThrow(awkInputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   246
				closeOrThrow(awkOutputReaderFD);
12
8844ebce8fb4 pass defined global variables to AWK as CLI arguments
František Kučera <franta-hg@frantovo.cz>
parents: 11
diff changeset
   247
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   248
				redirectFD(awkInputReaderFD, STDIN_FILENO);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   249
				redirectFD(awkOutputWriterFD, STDOUT_FILENO);
11
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   250
18
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   251
				// Environment variables:
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   252
				setEnv("relationName", name);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   253
				setEnv("inputAttributeCount", currentReaderMetadata.size());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   254
				setEnv("outputAttributeCount", currentWriterMetadata.size());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   255
				// TODO: better names? ENV vs. AWK variables?
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   256
				for (int i = 0; i < currentReaderMetadata.size(); i++) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   257
					setEnv((std::string("inputAttributeName") + std::to_string(i)).c_str(), currentReaderMetadata[i].getAttributeName());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   258
					setEnv("inputAttributeType" + i, currentReaderMetadata[i].getTypeName());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   259
				}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   260
				for (int i = 0; i < currentWriterMetadata.size(); i++) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   261
					setEnv("outputAttributeName" + i, currentWriterMetadata[i].attributeName);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   262
					// setEnv("outputAttributeType" + i, currentWriterMetadata[i].typeId); // TODO: type?
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   263
				}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   264
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   265
				// AWK script:
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   266
				std::wstringstream awkScript;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   267
				awkScript << L"BEGIN {" << std::endl;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   268
				awkScript << L"FS=\"\\t\";" << std::endl;
18
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   269
				awkScript << L"OFS=\"\\t\";" << std::endl;
15
ba91a464d2b3 implement --for-each, --before-records, --after-records
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   270
				awkScript << currentRelationConfiguration->awkBeforeRecords << std::endl;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   271
				awkScript << L"};" << std::endl;
15
ba91a464d2b3 implement --for-each, --before-records, --after-records
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   272
				awkScript << std::endl;
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   273
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   274
				awkScript << L"END {" << std::endl;
15
ba91a464d2b3 implement --for-each, --before-records, --after-records
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   275
				awkScript << currentRelationConfiguration->awkAfterRecords << std::endl;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   276
				awkScript << L"};" << std::endl;
15
ba91a464d2b3 implement --for-each, --before-records, --after-records
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   277
				awkScript << std::endl;
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   278
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   279
				awkScript << L"function _escape(value) {" << std::endl;
22
98acfdc4c20b some TODO → FIXME
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   280
				// FIXME: escape function
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   281
				awkScript << L"return value;" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   282
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   283
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   284
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   285
				awkScript << L"function _unescape(value) {" << std::endl;
22
98acfdc4c20b some TODO → FIXME
František Kučera <franta-hg@frantovo.cz>
parents: 21
diff changeset
   286
				// FIXME: unescape function
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   287
				awkScript << L"return value;" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   288
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   289
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   290
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   291
				awkScript << L"function _readVariables() {" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   292
				for (int i = 0; i < currentReaderMetadata.size(); i++) awkScript << a2v(currentReaderMetadata[i].getAttributeName()) << L"=_unescape($" << (i + 1) << L");" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   293
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   294
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   295
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   296
				awkScript << L"function _writeVariables() {" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   297
				awkScript << L"NF=" << currentWriterMetadata.size() << ";" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   298
				for (int i = 0; i < currentWriterMetadata.size(); i++) awkScript << L"$" << (i + 1) << L"=_escape(" << a2v(currentWriterMetadata[i].attributeName) << L");" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   299
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   300
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   301
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   302
				awkScript << L"function record() {" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   303
				awkScript << L"_writeVariables();" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   304
				awkScript << L"print;" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   305
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   306
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   307
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   308
				awkScript << L"{ _readVariables();  }" << std::endl; // read line (input attributes) into AWK variables
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   309
				awkScript << L"{ _writeVariables(); }" << std::endl; // write AWK variables to the line (so it matches the output attributes and can be implicitly printed without explicit record() call)
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   310
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   311
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   312
				awkScript << currentRelationConfiguration->awkForEach << std::endl; // user's code – can modify variables, filter results or explicitly call record() (can generate additional records or duplicate them)
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   313
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   314
				// CLI arguments:
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   315
				std::vector<std::string> args;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   316
				args.push_back("awk");
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   317
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   318
				for (auto d : configuration.definitions) addDefinition(args, d);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   319
				for (auto d : currentRelationConfiguration->definitions) addDefinition(args, d);
3
e086ae6a19c3 generate (some) relational output
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   320
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   321
				args.push_back(convertor.to_bytes(awkScript.str()));
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   322
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   323
				// Runs AWK program found on $PATH → user can plug-in a custom implementation or a wrapper, but this can be also bit dangerous (however AWK itself is dangerous).
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   324
				execp(args);
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   325
			} else {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   326
				// Parent process
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   327
				closeOrThrow(awkInputReaderFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   328
				closeOrThrow(awkOutputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   329
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   330
				__pid_t writerPid = fork();
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   331
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   332
				if (writerPid < 0) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   333
					throw cli::RelpipeCLIException(L"Unable to fork Writer process.", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   334
				} else if (writerPid == 0) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   335
					// Writer child process
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   336
					closeOrThrow(awkInputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   337
19
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   338
					if (currentRelationConfiguration->drop) {
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   339
						// TODO: omit whole this process and pipe AWK output to /dev/null?
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   340
					} else {
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   341
						relationalWriter->startRelation(name, currentWriterMetadata, true);
19
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   342
					}
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   343
20
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   344
					processAwkOutput(awkOutputReaderFD);
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   345
					exit(0);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   346
				} else {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   347
					// Parent process
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   348
					closeOrThrow(awkOutputReaderFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   349
				}
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   350
			}
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   351
		} else {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   352
			relationalWriter->startRelation(name, currentWriterMetadata, true);
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   353
		}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   354
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   355
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   356
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   357
	void attribute(const string_t& value) override {
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   358
		if (currentRelationConfiguration) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   359
			currentAttributeIndex++;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   360
			currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size();
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   361
17
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   362
			std::string awkValue = convertor.to_bytes(escapeAwkValue(value));
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   363
			if (currentAttributeIndex == 0) awkValue += "\n";
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   364
			else awkValue += "\t";
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   365
17
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   366
			write(awkInputWriterFD, awkValue.c_str(), awkValue.length());
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   367
		} else {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   368
			relationalWriter->writeAttribute(value);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   369
		}
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   370
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   371
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   372
	void endOfPipe() {
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   373
		cleanUp();
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   374
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   375
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   376
};
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   377
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   378
}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   379
}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   380
}