src/SedHandler.h
author František Kučera <franta-hg@frantovo.cz>
Sun, 16 May 2021 17:33:35 +0200
branchv_0
changeset 26 576d4965434f
parent 25 0cfbaf5c57a6
permissions -rw-r--r--
new CLI interface: --modify relation-name, --modify attribute-name
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info)
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
19
9bac174d11b6 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 15
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#pragma once
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <memory>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
#include <string>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <vector>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <iostream>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    23
#include <sstream>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
#include <locale>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <codecvt>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <regex>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    27
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
#include <relpipe/reader/typedefs.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
#include <relpipe/reader/TypeId.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
#include <relpipe/reader/handlers/RelationalReaderStringHandler.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
#include <relpipe/reader/handlers/AttributeMetadata.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
#include <relpipe/writer/Factory.h>
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    35
#include <relpipe/cli/RelpipeCLIException.h>
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    36
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    37
#include "Configuration.h"
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    38
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
namespace relpipe {
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
namespace tr {
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    41
namespace sed {
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    42
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    43
using namespace std;
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    44
using namespace relpipe;
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
using namespace relpipe::reader;
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    46
using namespace relpipe::reader::handlers;
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
15
09981db6ad87 fix typo: Hadler → Handler
František Kučera <franta-hg@frantovo.cz>
parents: 9
diff changeset
    48
class SedHandler : public RelationalReaderStringHandler {
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    49
private:
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    50
	shared_ptr<writer::RelationalWriter> relationalWriter;
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    51
	Configuration configuration;
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    52
	RelationConfiguration* currentFilter = nullptr;
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    53
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    54
	std::vector<std::vector<RewriteRule*>> currentRules;
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    55
	integer_t currentAttributeIndex = 0;
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    56
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    57
public:
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    58
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    59
	SedHandler(shared_ptr<writer::RelationalWriter> relationalWriter, Configuration configuration) : relationalWriter(relationalWriter), configuration(configuration) {
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    60
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    61
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    62
	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
26
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    63
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    64
		std::vector<RewriteRule*> attributeRenamingRules;
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    65
		for (RelationConfiguration& rc : configuration.relationConfigurations) {
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    66
			if (std::regex_match(name, rc.relationPattern) ^ rc.invertMatch[ENTITY::RELATION]) {
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    67
				for (RewriteRule& rule : rc.rules) if (rule.modify == ENTITY::ATTRIBUTE) attributeRenamingRules.push_back(&rule);
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    68
			}
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    69
		}
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    70
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    71
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    72
		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    73
		vector<writer::AttributeMetadata> writerMetadata;
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    74
		for (AttributeMetadata readerMetadata : attributes) {
26
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    75
			string_t newName = readerMetadata.getAttributeName();
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    76
			for (RewriteRule* rule : attributeRenamingRules)
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    77
				if (std::regex_match(readerMetadata.getAttributeName(), rule->attributePattern) ^ rule->invertMatch[ENTITY::ATTRIBUTE]) // TODO: optionally: regex_match(newName, rule->attributePattern) ?
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    78
					newName = std::regex_replace(newName, rule->valuePattern, rule->replacement);
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    79
			writerMetadata.push_back({newName, relationalWriter->toTypeId(readerMetadata.getTypeName())});
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    80
		}
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    81
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    82
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    83
		currentRules.resize(attributes.size());
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    84
		for (RelationConfiguration& rc : configuration.relationConfigurations) {
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    85
			if (std::regex_match(name, rc.relationPattern) ^ rc.invertMatch[ENTITY::RELATION]) {
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    86
				for (int i = 0; i < currentRules.size(); i++) {
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    87
					for (RewriteRule& rule : rc.rules) {
26
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    88
						if (rule.modify == ENTITY::VALUE && std::regex_match(attributes[i].getAttributeName(), rule.attributePattern) ^ rule.invertMatch[ENTITY::ATTRIBUTE]) currentRules[i].push_back(&rule);
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    89
					}
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
    90
				}
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    91
			}
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    92
		}
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    93
26
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    94
		for (RelationConfiguration& rc : configuration.relationConfigurations) {
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    95
			if (std::regex_match(name, rc.relationPattern) ^ rc.invertMatch[ENTITY::RELATION]) {
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    96
				for (RewriteRule rule : rc.rules) if (rule.modify == ENTITY::RELATION) name = std::regex_replace(name, rule.valuePattern, rule.replacement);
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    97
			}
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    98
		}
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
    99
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   100
		relationalWriter->startRelation(name, writerMetadata, true);
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   101
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   102
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   103
	void attribute(const string_t& value) override {
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   104
		string_t newValue = value;
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   105
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   106
		for (RewriteRule* rule : currentRules[currentAttributeIndex]) {
26
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   107
			if (rule && !rule->invertMatch[ENTITY::VALUE]) newValue = std::regex_replace(newValue, rule->valuePattern, rule->replacement);
576d4965434f new CLI interface: --modify relation-name, --modify attribute-name
František Kučera <franta-hg@frantovo.cz>
parents: 25
diff changeset
   108
			else if (rule && !std::regex_match(value, rule->valuePattern)) newValue = rule->replacement;
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
   109
		}
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
   110
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   111
		relationalWriter->writeAttribute(newValue);
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   112
7
92d85e02b276 regex match relation and attribute + replace with a fixed value
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
   113
		currentAttributeIndex++;
25
0cfbaf5c57a6 new CLI interface: --relation --attribute --value --replacement --case-sensitive --invert-match – first version
František Kučera <franta-hg@frantovo.cz>
parents: 19
diff changeset
   114
		currentAttributeIndex = currentAttributeIndex % currentRules.size();
3
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   115
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   116
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   117
	void endOfPipe() {
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   118
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   119
	}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   120
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   121
};
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   122
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   123
}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   124
}
8731263d44f1 PassthroughHandler (string-based version)
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   125
}