src/AwkHandler.h
author František Kučera <franta-hg@frantovo.cz>
Sat, 25 May 2019 21:52:56 +0200
branchv_0
changeset 29 b3d1a671315b
parent 28 4fdbe30d8c58
child 30 5261dfd3b952
permissions -rw-r--r--
AWK escaping and unescaping functions
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     7
 * the Free Software Foundation, either version 3 of the License, or
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 * (at your option) any later version.
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 *
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * This program is distributed in the hope that it will be useful,
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 * GNU General Public License for more details.
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 *
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
 */
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
#pragma once
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
    20
#include<functional>
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <memory>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <string>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <vector>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
#include <iostream>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <sstream>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <locale>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
#include <codecvt>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <regex>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    30
#include <unistd.h>
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    31
#include <wait.h>
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    32
#include <ext/stdio_filebuf.h>
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    33
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
#include <relpipe/reader/typedefs.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
#include <relpipe/reader/TypeId.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
#include <relpipe/reader/handlers/AttributeMetadata.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
#include <relpipe/writer/Factory.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    41
#include <relpipe/cli/RelpipeCLIException.h>
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    42
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    43
#include "Configuration.h"
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
    44
#include "AwkException.h"
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    46
namespace relpipe {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
namespace tr {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    48
namespace awk {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    49
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    50
using namespace std;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    51
using namespace relpipe;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    52
using namespace relpipe::reader;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    53
using namespace relpipe::reader::handlers;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    54
5
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    55
/**
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    56
 * This transformation consists of three processes connected together using pipes.
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    57
 * 
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    58
 * Processes:
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    59
 *	- Parent: process RelationalReaderStringHandler events (read relational data, usually from STDIN) and pass them in the special text format to the pipe1
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    60
 *  - AWK: external program (/usr/bin/awk), read from the pipe1, writes to the pipe2
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    61
 *  - Writer: reads from the pipe2 and writes to relationalWriter (relational data, usually to STDOUT)
86de8e6ab231 documentation
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
    62
 */
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    63
class AwkHandler : public RelationalReaderStringHandler {
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    64
private:
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    65
	Configuration configuration;
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    66
	writer::RelationalWriter* relationalWriter;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
    67
	std::function<void() > relationalWriterFlush;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
    68
	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    69
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    70
	int awkInputWriterFD = -1;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
    71
	RelationConfiguration* currentRelationConfiguration = nullptr;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
    72
	std::vector<AttributeMetadata> currentReaderMetadata;
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
    73
	std::vector<writer::AttributeMetadata> currentWriterMetadata;
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
    74
	std::map<string_t, string_t> currenVariablesMapping;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
    75
	integer_t currentAttributeIndex = 0;
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    76
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    77
	void createPipe(int& readerFD, int& writerFD) {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    78
		int fds[2];
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    79
		int result = pipe(fds);
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    80
		readerFD = fds[0];
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    81
		writerFD = fds[1];
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
    82
		if (result < 0) throw AwkException(L"Unable to create a pipe.");
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    83
	}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    84
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    85
	void redirectFD(int oldfd, int newfd) {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    86
		int result = dup2(oldfd, newfd);
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
    87
		if (result < 0) throw AwkException(L"Unable redirect FD.");
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    88
	}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    89
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    90
	void closeOrThrow(int fd) {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    91
		int error = close(fd);
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
    92
		if (error) throw AwkException(L"Unable to close FD: " + to_wstring(fd) + L" from PID: " + to_wstring(getpid()));
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    93
	}
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    94
11
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    95
	void execp(const std::vector<std::string>& args) {
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    96
		const char** a = new const char*[args.size() + 1];
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    97
		for (size_t i = 0; i < args.size(); i++) a[i] = args[i].c_str();
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    98
		a[args.size()] = nullptr;
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    99
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   100
		execvp(a[0], (char*const*) a);
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   101
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   102
		delete[] a;
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
   103
		throw AwkException(L"Unable to do execvp().");
11
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   104
	}
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   105
18
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   106
	/* TODO: move to lib-cli when stable and used in other modules */
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   107
	void setEnv(const char * name, const string_t& value) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   108
		setenv(name, convertor.to_bytes(value).c_str(), true);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   109
	}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   110
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   111
	/* TODO: move to lib-cli when stable and used in other modules */
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   112
	void setEnv(const char * name, std::string& value) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   113
		setenv(name, value.c_str(), true);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   114
	}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   115
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   116
	/* TODO: move to lib-cli when stable and used in other modules */
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   117
	void setEnv(const char * name, const integer_t& value) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   118
		setenv(name, to_string(value).c_str(), true);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   119
	}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   120
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   121
	void addDefinition(std::vector<std::string>& awkCliArgs, DefinitionRecipe& d) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   122
		awkCliArgs.push_back("-v");
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   123
		awkCliArgs.push_back(convertor.to_bytes(a2v(d.name) + L"=" + d.value));
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   124
	}
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   125
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   126
	void add(vector<AttributeMetadata>& readerAttributes, vector<writer::AttributeMetadata>& writerAttributes) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   127
		for (AttributeMetadata readerAttributes : readerAttributes)
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   128
			writerAttributes.push_back({
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   129
				readerAttributes.getAttributeName(),
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   130
				relationalWriter->toTypeId(readerAttributes.getTypeName())
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   131
			});
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   132
	}
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   133
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   134
	void cleanUp() {
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   135
		if (awkInputWriterFD >= 0) {
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   136
			closeOrThrow(awkInputWriterFD);
27
86d8bbc99e7b check exit codes of child processes
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   137
			int error1;
86d8bbc99e7b check exit codes of child processes
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   138
			int error2;
86d8bbc99e7b check exit codes of child processes
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   139
			__pid_t waitPID1 = wait(&error1);
86d8bbc99e7b check exit codes of child processes
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   140
			__pid_t waitPID2 = wait(&error2);
86d8bbc99e7b check exit codes of child processes
František Kučera <franta-hg@frantovo.cz>
parents: 26
diff changeset
   141
			if (error1 || error2) throw cli::RelpipeCLIException(L"The child process returned an error exit code.", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   142
			awkInputWriterFD = -1;
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   143
		}
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   144
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   145
		currentAttributeIndex = 0;
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   146
		currentReaderMetadata.clear();
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   147
		currentWriterMetadata.clear();
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   148
		currenVariablesMapping.clear();
19
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   149
		currentRelationConfiguration = nullptr;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   150
	}
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   151
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   152
	void generateVariableMappings() {
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   153
		for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L"";
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   154
		for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L"";
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   155
		for (DefinitionRecipe d : configuration.definitions) currenVariablesMapping[d.name] = L"";
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   156
		for (DefinitionRecipe d : currentRelationConfiguration->definitions) currenVariablesMapping[d.name] = L"";
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   157
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   158
		for (std::pair<string_t, string_t> m : currenVariablesMapping) {
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   159
			currenVariablesMapping[m.first] = escapeAwkVariableName(m.first);
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   160
		}
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   161
	}
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   162
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   163
	string_t a2v(const string_t& attributeName) {
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   164
		if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName];
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
   165
		else throw AwkException(L"Unable to find value in currenVariablesMapping");
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   166
	}
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   167
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   168
	template <typename K, typename V> bool containsValue(std::map<K, V> map, V value) {
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   169
		for (std::pair<K, V> p : map) if (p.second == value) return true;
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   170
		return false;
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   171
	}
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   172
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   173
	string_t escapeAwkVariableName(const string_t& attributeName) {
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   174
		// cat awkgram.y | awk -v FS='\\{"|",' -v ORS='|' '/static const struct token tokentab/, /\};/ { if (/^\{/) { print $2} }'
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   175
		// cat AwkHandler.h | awk -v FS=' |\\(' -v ORS='|' '/awkScript.*"function/ { print $4; }'
24
c805c968b7ed escape reserved AWK and our names: prefix conflicting variables with _
František Kučera <franta-hg@frantovo.cz>
parents: 23
diff changeset
   176
		std::wregex awkReservedNames(L"BEGIN|BEGINFILE|END|ENDFILE|adump|and|asort|asorti|atan2|bindtextdomain|break|case|close|compl|continue|cos|dcgettext|dcngettext|default|delete|do|else|eval|exit|exp|fflush|for|func|function|gensub|getline|gsub|if|in|include|index|int|intdiv0|isarray|length|load|log|lshift|match|mktime|namespace|next|nextfile|or|patsplit|print|printf|rand|return|rshift|sin|split|sprintf|sqrt|srand|stopme|strftime|strtonum|sub|substr|switch|system|systime|tolower|toupper|typeof|while|xor");
c805c968b7ed escape reserved AWK and our names: prefix conflicting variables with _
František Kučera <franta-hg@frantovo.cz>
parents: 23
diff changeset
   177
		std::wregex trReservedNames(L"_escape|_unescape|_readVariables|_writeVariables|record");
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   178
		std::wregex badCharacters(L"[^a-zA-Z0-9_]|^([0-9])");
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   179
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   180
		const string_t& name = std::regex_replace(attributeName, badCharacters, L"_$1");
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   181
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   182
		bool badName = false;
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   183
		badName |= regex_match(name, awkReservedNames);
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   184
		badName |= regex_match(name, trReservedNames);
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   185
		badName |= containsValue(currenVariablesMapping, name);
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   186
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   187
		if (badName) return escapeAwkVariableName(L"_" + name);
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   188
		else return name;
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   189
	}
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   190
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   191
	string_t escapeAwkValue(const string_t& value) {
23
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   192
		std::wstringstream escaped;
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   193
		for (wchar_t ch : value) {
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   194
			if (ch == '\t') escaped << L"\\t";
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   195
			else if (ch == '\n') escaped << L"\\n";
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   196
			else if (ch == '\\') escaped << L"\\\\";
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   197
			else escaped << ch;
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   198
		}
bc20d499a458 value escaping in c++
František Kučera <franta-hg@frantovo.cz>
parents: 22
diff changeset
   199
		return escaped.str();
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   200
	}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   201
20
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   202
	void processAwkOutput(int awkOutputReaderFD) {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   203
		locale::global(locale("")); // needed for processing unicode texts, otherwise getline() stopped working on first line with non-ascii characters; TODO: move somewhere else?
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   204
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   205
		__gnu_cxx::stdio_filebuf<wchar_t> awkOutputReaderBuffer(awkOutputReaderFD, std::ios::in);
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   206
		std::wistream awkOutputReader(&awkOutputReaderBuffer);
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   207
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   208
		if (currentRelationConfiguration->drop) {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   209
			for (wchar_t ch; awkOutputReader.read(&ch, 1).good();); // just eat the lines from the AWK
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   210
		} else {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   211
			std::wstringstream currentValue;
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   212
			for (wchar_t ch; awkOutputReader.read(&ch, 1).good();) {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   213
				if (ch == '\t' || ch == '\n') {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   214
					relationalWriter->writeAttribute(currentValue.str());
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   215
					currentValue.str(L"");
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   216
					currentValue.clear();
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   217
				} else if (ch == '\\') {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   218
					ch = awkOutputReader.get();
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   219
					if (ch == 't') currentValue << L'\t';
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   220
					else if (ch == 'n') currentValue << L'\n';
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   221
					else if (ch == '\\') currentValue << L'\\';
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
   222
					else throw AwkException(L"Unknown escape sequence. Only \\t, \\n and \\\\ are supported.");
20
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   223
				} else {
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   224
					currentValue << ch;
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   225
				}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   226
			}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   227
		}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   228
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   229
		closeOrThrow(awkOutputReaderFD);
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   230
	}
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   231
26
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   232
	void debugVariableMapping(const string_t& relationName) {
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   233
		relationalWriter->startRelation(relationName + L".variableMapping",{
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   234
			{L"attribute", writer::TypeId::STRING},
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   235
			{L"variable", writer::TypeId::STRING},
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   236
		}, true);
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   237
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   238
		for (std::pair<string_t, string_t> m : currenVariablesMapping) {
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   239
			relationalWriter->writeAttribute(m.first);
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   240
			relationalWriter->writeAttribute(m.second);
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   241
		}
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   242
	}
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   243
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   244
public:
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   245
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   246
	/**
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   247
	 * @param relationalWriter
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   248
	 * @param relationalWriterFlush the writer must be flushed before fork() in order to 
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   249
	 * avoid duplicate output (otherwise single relation might be written from two processes); 
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   250
	 * This is a little hack – if it stops working, we should reconnect the pipes 
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   251
	 * and use the writer only from a single process and avoid its effective duplication,
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   252
	 * or use different writers for each relation (or process).
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   253
	 * @param configuration
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   254
	 */
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   255
	AwkHandler(writer::RelationalWriter* relationalWriter, std::function<void() > relationalWriterFlush, Configuration& configuration) : relationalWriter(relationalWriter), relationalWriterFlush(relationalWriterFlush), configuration(configuration) {
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   256
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   257
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   258
	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   259
		cleanUp();
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   260
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   261
		for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   262
			if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   263
				currentRelationConfiguration = &configuration.relationConfigurations[i];
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   264
				break; // it there are multiple matches, only the first configuration is used
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   265
			}
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   266
		}
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   267
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   268
		currentReaderMetadata = attributes;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   269
		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)		
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   270
		if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) {
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   271
			if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata);
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   272
			currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end());
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   273
			if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata);
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   274
		} else {
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   275
			add(currentReaderMetadata, currentWriterMetadata);
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   276
		}
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   277
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   278
		if (currentRelationConfiguration) {
25
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   279
			generateVariableMappings();
13a1e1134797 avoid collisions in AWK variable names
František Kučera <franta-hg@frantovo.cz>
parents: 24
diff changeset
   280
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   281
			int awkInputReaderFD;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   282
			int awkOutputReaderFD;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   283
			int awkOutputWriterFD;
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   284
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   285
			createPipe(awkInputReaderFD, awkInputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   286
			createPipe(awkOutputReaderFD, awkOutputWriterFD);
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   287
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   288
			relationalWriterFlush();
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   289
			__pid_t awkPid = fork();
11
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   290
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   291
			if (awkPid < 0) {
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
   292
				throw AwkException(L"Unable to fork AWK process.");
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   293
			} else if (awkPid == 0) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   294
				// AWK child process
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   295
				closeOrThrow(awkInputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   296
				closeOrThrow(awkOutputReaderFD);
12
8844ebce8fb4 pass defined global variables to AWK as CLI arguments
František Kučera <franta-hg@frantovo.cz>
parents: 11
diff changeset
   297
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   298
				redirectFD(awkInputReaderFD, STDIN_FILENO);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   299
				redirectFD(awkOutputWriterFD, STDOUT_FILENO);
11
f515d14794e0 variable execvp() arguments
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
   300
18
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   301
				// Environment variables:
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   302
				setEnv("relationName", name);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   303
				setEnv("inputAttributeCount", currentReaderMetadata.size());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   304
				setEnv("outputAttributeCount", currentWriterMetadata.size());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   305
				// TODO: better names? ENV vs. AWK variables?
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   306
				for (int i = 0; i < currentReaderMetadata.size(); i++) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   307
					setEnv((std::string("inputAttributeName") + std::to_string(i)).c_str(), currentReaderMetadata[i].getAttributeName());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   308
					setEnv("inputAttributeType" + i, currentReaderMetadata[i].getTypeName());
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   309
				}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   310
				for (int i = 0; i < currentWriterMetadata.size(); i++) {
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   311
					setEnv("outputAttributeName" + i, currentWriterMetadata[i].attributeName);
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   312
					// setEnv("outputAttributeType" + i, currentWriterMetadata[i].typeId); // TODO: type?
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   313
				}
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   314
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   315
				// AWK script:
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   316
				std::wstringstream awkScript;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   317
				awkScript << L"BEGIN {" << std::endl;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   318
				awkScript << L"FS=\"\\t\";" << std::endl;
18
2ac59242d0cb environment variables
František Kučera <franta-hg@frantovo.cz>
parents: 17
diff changeset
   319
				awkScript << L"OFS=\"\\t\";" << std::endl;
15
ba91a464d2b3 implement --for-each, --before-records, --after-records
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   320
				awkScript << currentRelationConfiguration->awkBeforeRecords << std::endl;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   321
				awkScript << L"};" << std::endl;
15
ba91a464d2b3 implement --for-each, --before-records, --after-records
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   322
				awkScript << std::endl;
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   323
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   324
				awkScript << L"END {" << std::endl;
15
ba91a464d2b3 implement --for-each, --before-records, --after-records
František Kučera <franta-hg@frantovo.cz>
parents: 13
diff changeset
   325
				awkScript << currentRelationConfiguration->awkAfterRecords << std::endl;
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   326
				awkScript << L"};" << std::endl;
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   327
29
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   328
				awkScript << LR"AWK(
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   329
function _escape(value,    i) {
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   330
	result = "";
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   331
	split(value, chars, "");
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   332
	for (i = 1; i <= length(chars); i++) {
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   333
		ch = chars[i];
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   334
		if (ch == "\\")      { ch = "\\\\"; }
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   335
		else if (ch == "\t") { ch = "\\t"; }
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   336
		else if (ch == "\n") { ch = "\\n"; }
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   337
		result = result ch;
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   338
	}
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   339
	return result;
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   340
};
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   341
						
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   342
function _unescape(value,    i) {
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   343
	result = "";
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   344
	split(value, chars, "");
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   345
	for (i = 1; i <= length(chars); i++) {
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   346
		ch = chars[i];
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   347
		if (ch == "\\") {
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   348
			ch = chars[++i];
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   349
			if (ch == "\\")     { ch = "\\"; }
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   350
			else if (ch == "t") { ch = "\t"; }
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   351
			else if (ch == "n") { ch = "\n"; }
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   352
			else {
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   353
				printf("Unsupported escape sequence: %s\n", ch) > "/dev/stderr";
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   354
				exit 70;
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   355
			}
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   356
		}
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   357
		result = result ch;
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   358
	}
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   359
	return result;
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   360
};
b3d1a671315b AWK escaping and unescaping functions
František Kučera <franta-hg@frantovo.cz>
parents: 28
diff changeset
   361
)AWK";
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   362
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   363
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   364
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   365
				awkScript << L"function _readVariables() {" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   366
				for (int i = 0; i < currentReaderMetadata.size(); i++) awkScript << a2v(currentReaderMetadata[i].getAttributeName()) << L"=_unescape($" << (i + 1) << L");" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   367
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   368
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   369
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   370
				awkScript << L"function _writeVariables() {" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   371
				awkScript << L"NF=" << currentWriterMetadata.size() << ";" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   372
				for (int i = 0; i < currentWriterMetadata.size(); i++) awkScript << L"$" << (i + 1) << L"=_escape(" << a2v(currentWriterMetadata[i].attributeName) << L");" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   373
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   374
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   375
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   376
				awkScript << L"function record() {" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   377
				awkScript << L"_writeVariables();" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   378
				awkScript << L"print;" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   379
				awkScript << L"};" << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   380
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   381
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   382
				awkScript << L"{ _readVariables();  }" << std::endl; // read line (input attributes) into AWK variables
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   383
				awkScript << L"{ _writeVariables(); }" << std::endl; // write AWK variables to the line (so it matches the output attributes and can be implicitly printed without explicit record() call)
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   384
				awkScript << std::endl;
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   385
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   386
				awkScript << currentRelationConfiguration->awkForEach << std::endl; // user's code – can modify variables, filter results or explicitly call record() (can generate additional records or duplicate them)
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   387
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   388
				// CLI arguments:
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   389
				std::vector<std::string> args;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   390
				args.push_back("awk");
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   391
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   392
				for (auto d : configuration.definitions) addDefinition(args, d);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   393
				for (auto d : currentRelationConfiguration->definitions) addDefinition(args, d);
3
e086ae6a19c3 generate (some) relational output
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   394
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   395
				args.push_back(convertor.to_bytes(awkScript.str()));
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   396
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   397
				// Runs AWK program found on $PATH → user can plug-in a custom implementation or a wrapper, but this can be also bit dangerous (however AWK itself is dangerous).
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   398
				execp(args);
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   399
			} else {
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   400
				// Parent process
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   401
				closeOrThrow(awkInputReaderFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   402
				closeOrThrow(awkOutputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   403
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   404
				__pid_t writerPid = fork();
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   405
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   406
				if (writerPid < 0) {
28
4fdbe30d8c58 introduce AwkException
František Kučera <franta-hg@frantovo.cz>
parents: 27
diff changeset
   407
					throw AwkException(L"Unable to fork Writer process.");
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   408
				} else if (writerPid == 0) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   409
					// Writer child process
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   410
					closeOrThrow(awkInputWriterFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   411
26
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   412
					if (currentRelationConfiguration->debugVariableMapping) debugVariableMapping(name);
cf57e8c78492 add option: --debug-variable-mapping
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   413
19
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   414
					if (currentRelationConfiguration->drop) {
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   415
						// TODO: omit whole this process and pipe AWK output to /dev/null?
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   416
					} else {
21
d46a727b7965 read and write variables, generate real relation
František Kučera <franta-hg@frantovo.cz>
parents: 20
diff changeset
   417
						relationalWriter->startRelation(name, currentWriterMetadata, true);
19
e4558df9ba2d drop – run AWK code but generate no output for given relation
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
   418
					}
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   419
20
f937ad57351f read and un-escape the AWK output, convert to relational attributes
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   420
					processAwkOutput(awkOutputReaderFD);
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   421
					exit(0);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   422
				} else {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   423
					// Parent process
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   424
					closeOrThrow(awkOutputReaderFD);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   425
				}
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   426
			}
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   427
		} else {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   428
			relationalWriter->startRelation(name, currentWriterMetadata, true);
1
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   429
		}
05d969cd90d4 fork() processes
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   430
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   431
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   432
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   433
	void attribute(const string_t& value) override {
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   434
		if (currentRelationConfiguration) {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   435
			currentAttributeIndex++;
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   436
			currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size();
7
46db0e6e548b pass name-value pair to AWK
František Kučera <franta-hg@frantovo.cz>
parents: 6
diff changeset
   437
17
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   438
			std::string awkValue = convertor.to_bytes(escapeAwkValue(value));
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   439
			if (currentAttributeIndex == 0) awkValue += "\n";
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   440
			else awkValue += "\t";
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   441
17
54716ea44d52 pass only values to the awk process
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
   442
			write(awkInputWriterFD, awkValue.c_str(), awkValue.length());
13
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   443
		} else {
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   444
			relationalWriter->writeAttribute(value);
b74001992ec3 implement --relation option (thus some relations might pass unmodified by AWK), support per-relation variables
František Kučera <franta-hg@frantovo.cz>
parents: 12
diff changeset
   445
		}
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   446
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   447
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   448
	void endOfPipe() {
2
580ccb511301 for each attribute
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   449
		cleanUp();
0
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   450
	}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   451
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   452
};
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   453
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   454
}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   455
}
644fd2ce2580 project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   456
}