src/CutHandler.h
author František Kučera <franta-hg@frantovo.cz>
Tue, 22 Oct 2019 22:01:59 +0200
branchv_0
changeset 22 d07ac873cc89
parent 18 90d4e4f9fde6
child 28 bc15f5471b6a
permissions -rw-r--r--
fix license version: GNU GPLv3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
22
d07ac873cc89 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 18
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#pragma once
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <memory>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
#include <string>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <vector>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <iostream>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <sstream>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
#include <locale>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <codecvt>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <regex>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <relpipe/reader/typedefs.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
#include <relpipe/reader/TypeId.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
#include <relpipe/reader/handlers/AttributeMetadata.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
#include <relpipe/writer/Factory.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    35
#include <relpipe/cli/RelpipeCLIException.h>
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    36
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
namespace relpipe {
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
namespace tr {
7
aebaf590a838 use smart pointer
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    39
namespace grep {
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    41
using namespace std;
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    42
using namespace relpipe;
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    43
using namespace relpipe::reader;
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    44
using namespace relpipe::reader::handlers;
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
18
90d4e4f9fde6 fix typo: Hadler → Handler
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
    46
class CutHandler : public RelationalReaderStringHandler {
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
private:
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    48
	shared_ptr<writer::RelationalWriter> relationalWriter;
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    49
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    50
	wregex relationNameRegEx;
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    51
	vector<wregex> attributeNameRegExes;
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    52
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    53
	vector<integer_t> currentAttributeMapping;
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    54
	vector<string_t> currentRecord;
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    55
	integer_t currentAttributeIndex = 0;
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    56
	boolean_t filterCurrentRelation = false;
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    57
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    58
public:
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    59
15
63e804b63321 fix the class name
František Kučera <franta-hg@frantovo.cz>
parents: 11
diff changeset
    60
	CutHandler(ostream& output, const vector<string_t>& arguments) {
7
aebaf590a838 use smart pointer
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    61
		relationalWriter.reset(writer::Factory::create(output));
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    62
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    63
		if (arguments.size() >= 2) {
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    64
			relationNameRegEx = wregex(arguments[0]);
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    65
			for (int i = 1; i < arguments.size(); i++) attributeNameRegExes.push_back(wregex(arguments[i]));
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    66
		} else {
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    67
			throw cli::RelpipeCLIException(L"Usage: relpipe-tr-cut <relationNameRegExp> <attributeNameRegExp> [<otherAttributeNameRegExp> ...]", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND);
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    68
		}
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    69
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    70
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    71
	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
11
9d528c98912d cut can also do DROP of whole relation: just use regex that matches no attributes like $^
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    72
		currentRecord.resize(attributes.size());
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    73
		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    74
		vector<writer::AttributeMetadata> allWriterMetadata;
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    75
		for (AttributeMetadata readerMetadata : attributes) {
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    76
			allWriterMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())});
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    77
		}
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    78
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    79
		vector<writer::AttributeMetadata> writerMetadata;
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    80
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    81
		filterCurrentRelation = regex_match(name, relationNameRegEx);
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    82
		if (filterCurrentRelation) {
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    83
			currentAttributeMapping.clear();
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    84
			for (wregex attributeNameRegEx : attributeNameRegExes) {
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    85
				for (int i = 0; i < allWriterMetadata.size(); i++) {
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    86
					if (regex_match(allWriterMetadata[i].attributeName, attributeNameRegEx)) currentAttributeMapping.push_back(i);
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    87
				}
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    88
			}
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    89
11
9d528c98912d cut can also do DROP of whole relation: just use regex that matches no attributes like $^
František Kučera <franta-hg@frantovo.cz>
parents: 10
diff changeset
    90
			if (currentAttributeMapping.empty()) return; // No attribute matches → DROP whole relation
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    91
			for (integer_t i : currentAttributeMapping) writerMetadata.push_back(allWriterMetadata[i]);
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    92
		} else {
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    93
			writerMetadata = allWriterMetadata;
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    94
		}
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
    95
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    96
		relationalWriter->startRelation(name, writerMetadata, true);
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    97
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    98
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    99
	void attribute(const string_t& value) override {
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   100
		if (filterCurrentRelation) {
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   101
			currentRecord[currentAttributeIndex] = value;
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   102
			currentAttributeIndex++;
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   103
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
   104
			if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) {
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
   105
				for (integer_t i : currentAttributeMapping) relationalWriter->writeAttribute(currentRecord[i]);
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   106
			}
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   107
10
9ec1290b4a9d first working cut version
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
   108
			currentAttributeIndex = currentAttributeIndex % currentRecord.size();
8
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   109
		} else {
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   110
			relationalWriter->writeAttribute(value);
f66c759d1111 first working grep version
František Kučera <franta-hg@frantovo.cz>
parents: 7
diff changeset
   111
		}
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   112
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   113
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   114
	void endOfPipe() {
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   115
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   116
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   117
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   118
};
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   119
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   120
}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   121
}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   122
}