src/CutHandler.h
branchv_0
changeset 28 bc15f5471b6a
parent 22 d07ac873cc89
equal deleted inserted replaced
27:f9b72d263838 28:bc15f5471b6a
    32 
    32 
    33 #include <relpipe/writer/Factory.h>
    33 #include <relpipe/writer/Factory.h>
    34 
    34 
    35 #include <relpipe/cli/RelpipeCLIException.h>
    35 #include <relpipe/cli/RelpipeCLIException.h>
    36 
    36 
       
    37 #include "Configuration.h"
       
    38 
    37 namespace relpipe {
    39 namespace relpipe {
    38 namespace tr {
    40 namespace tr {
    39 namespace grep {
    41 namespace cut {
    40 
    42 
    41 using namespace std;
    43 using namespace std;
    42 using namespace relpipe;
    44 using namespace relpipe;
    43 using namespace relpipe::reader;
    45 using namespace relpipe::reader;
    44 using namespace relpipe::reader::handlers;
    46 using namespace relpipe::reader::handlers;
    45 
    47 
    46 class CutHandler : public RelationalReaderStringHandler {
    48 class CutHandler : public RelationalReaderStringHandler {
    47 private:
    49 private:
    48 	shared_ptr<writer::RelationalWriter> relationalWriter;
    50 	shared_ptr<writer::RelationalWriter> relationalWriter;
    49 
    51 	Configuration configuration;
    50 	wregex relationNameRegEx;
    52 	RelationConfiguration* currentFilter = nullptr;
    51 	vector<wregex> attributeNameRegExes;
       
    52 
    53 
    53 	vector<integer_t> currentAttributeMapping;
    54 	vector<integer_t> currentAttributeMapping;
    54 	vector<string_t> currentRecord;
    55 	vector<string_t> currentRecord;
    55 	integer_t currentAttributeIndex = 0;
    56 	integer_t currentAttributeIndex = 0;
    56 	boolean_t filterCurrentRelation = false;
       
    57 
    57 
    58 public:
    58 public:
    59 
    59 
    60 	CutHandler(ostream& output, const vector<string_t>& arguments) {
    60 	CutHandler(shared_ptr<writer::RelationalWriter> relationalWriter, Configuration configuration) : relationalWriter(relationalWriter), configuration(configuration) {
    61 		relationalWriter.reset(writer::Factory::create(output));
       
    62 
       
    63 		if (arguments.size() >= 2) {
       
    64 			relationNameRegEx = wregex(arguments[0]);
       
    65 			for (int i = 1; i < arguments.size(); i++) attributeNameRegExes.push_back(wregex(arguments[i]));
       
    66 		} else {
       
    67 			throw cli::RelpipeCLIException(L"Usage: relpipe-tr-cut <relationNameRegExp> <attributeNameRegExp> [<otherAttributeNameRegExp> ...]", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND);
       
    68 		}
       
    69 	}
    61 	}
    70 
    62 
    71 	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
    63 	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
    72 		currentRecord.resize(attributes.size());
    64 		currentRecord.resize(attributes.size());
    73 		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
    65 		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
    76 			allWriterMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())});
    68 			allWriterMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())});
    77 		}
    69 		}
    78 
    70 
    79 		vector<writer::AttributeMetadata> writerMetadata;
    71 		vector<writer::AttributeMetadata> writerMetadata;
    80 
    72 
    81 		filterCurrentRelation = regex_match(name, relationNameRegEx);
    73 		using E = RelationConfiguration::ENTITY;
    82 		if (filterCurrentRelation) {
    74 
       
    75 		currentFilter = nullptr;
       
    76 		for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
       
    77 			if (regex_match(name, configuration.relationConfigurations[i].relationPattern) ^ configuration.relationConfigurations[i].invertMatch[E::RELATION]) {
       
    78 				currentFilter = &configuration.relationConfigurations[i];
       
    79 				break;
       
    80 			}
       
    81 		}
       
    82 
       
    83 		if (currentFilter) {
    83 			currentAttributeMapping.clear();
    84 			currentAttributeMapping.clear();
    84 			for (wregex attributeNameRegEx : attributeNameRegExes) {
    85 			for (std::wregex attributePattern : currentFilter->attributePatterns) {
    85 				for (int i = 0; i < allWriterMetadata.size(); i++) {
    86 				for (int i = 0; i < allWriterMetadata.size(); i++) {
    86 					if (regex_match(allWriterMetadata[i].attributeName, attributeNameRegEx)) currentAttributeMapping.push_back(i);
    87 					if (regex_match(allWriterMetadata[i].attributeName, attributePattern) ^ currentFilter->invertMatch[E::ATTRIBUTE]) currentAttributeMapping.push_back(i);
    87 				}
    88 				}
    88 			}
    89 			}
    89 
    90 
    90 			if (currentAttributeMapping.empty()) return; // No attribute matches → DROP whole relation
    91 			if (currentAttributeMapping.empty()) return; // No attribute matches → DROP whole relation
    91 			for (integer_t i : currentAttributeMapping) writerMetadata.push_back(allWriterMetadata[i]);
    92 			for (integer_t i : currentAttributeMapping) writerMetadata.push_back(allWriterMetadata[i]);
    95 
    96 
    96 		relationalWriter->startRelation(name, writerMetadata, true);
    97 		relationalWriter->startRelation(name, writerMetadata, true);
    97 	}
    98 	}
    98 
    99 
    99 	void attribute(const string_t& value) override {
   100 	void attribute(const string_t& value) override {
   100 		if (filterCurrentRelation) {
   101 		if (currentFilter) {
   101 			currentRecord[currentAttributeIndex] = value;
   102 			currentRecord[currentAttributeIndex] = value;
   102 			currentAttributeIndex++;
   103 			currentAttributeIndex++;
   103 
   104 
   104 			if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) {
   105 			if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) {
   105 				for (integer_t i : currentAttributeMapping) relationalWriter->writeAttribute(currentRecord[i]);
   106 				for (integer_t i : currentAttributeMapping) relationalWriter->writeAttribute(currentRecord[i]);