src/CutHandler.h
branchv_0
changeset 10 9ec1290b4a9d
parent 9 f06781a5071b
child 11 9d528c98912d
equal deleted inserted replaced
9:f06781a5071b 10:9ec1290b4a9d
    47 class GrepHandler : public RelationalReaderStringHadler {
    47 class GrepHandler : public RelationalReaderStringHadler {
    48 private:
    48 private:
    49 	shared_ptr<writer::RelationalWriter> relationalWriter;
    49 	shared_ptr<writer::RelationalWriter> relationalWriter;
    50 
    50 
    51 	wregex relationNameRegEx;
    51 	wregex relationNameRegEx;
    52 	wregex attributeNameRegEx;
    52 	vector<wregex> attributeNameRegExes;
    53 	wregex searchRegEx;
       
    54 
    53 
    55 	vector<boolean_t> currentSearchableAttributes;
    54 	vector<integer_t> currentAttributeMapping;
    56 	vector<string_t> currentRecord;
    55 	vector<string_t> currentRecord;
    57 	integer_t currentAttributeIndex = 0;
    56 	integer_t currentAttributeIndex = 0;
    58 	boolean_t includeCurrentRecord = false;
       
    59 	boolean_t filterCurrentRelation = false;
    57 	boolean_t filterCurrentRelation = false;
    60 
    58 
    61 public:
    59 public:
    62 
    60 
    63 	GrepHandler(ostream& output, const vector<string_t>& arguments) {
    61 	GrepHandler(ostream& output, const vector<string_t>& arguments) {
    64 		relationalWriter.reset(writer::Factory::create(output));
    62 		relationalWriter.reset(writer::Factory::create(output));
    65 
    63 
    66 		if (arguments.size() == 3) {
    64 		if (arguments.size() >= 2) {
    67 			relationNameRegEx = wregex(arguments[0]);
    65 			relationNameRegEx = wregex(arguments[0]);
    68 			attributeNameRegEx = wregex(arguments[1]);
    66 			for (int i = 1; i < arguments.size(); i++) attributeNameRegExes.push_back(wregex(arguments[i]));
    69 			searchRegEx = wregex(arguments[2]);
       
    70 		} else {
    67 		} else {
    71 			throw cli::RelpipeCLIException(L"Usage: relpipe-tr-cut <relationNameRegExp> <attributeNameRegExp> <searchRegExp>", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND);
    68 			throw cli::RelpipeCLIException(L"Usage: relpipe-tr-cut <relationNameRegExp> <attributeNameRegExp> [<otherAttributeNameRegExp> ...]", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND);
    72 		}
    69 		}
    73 	}
    70 	}
    74 
    71 
    75 	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
    72 	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
    76 		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
    73 		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)
    77 		vector<writer::AttributeMetadata> writerMetadata;
    74 		vector<writer::AttributeMetadata> allWriterMetadata;
    78 		for (AttributeMetadata readerMetadata : attributes) {
    75 		for (AttributeMetadata readerMetadata : attributes) {
    79 			writerMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())});
    76 			allWriterMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())});
    80 		}
    77 		}
    81 
    78 
       
    79 		vector<writer::AttributeMetadata> writerMetadata;
    82 
    80 
    83 		currentRecord.resize(attributes.size());
       
    84 		currentSearchableAttributes.resize(attributes.size(), false);
       
    85 		filterCurrentRelation = regex_match(name, relationNameRegEx);
    81 		filterCurrentRelation = regex_match(name, relationNameRegEx);
    86 		if (filterCurrentRelation) {
    82 		if (filterCurrentRelation) {
    87 			for (int i = 0; i < currentSearchableAttributes.size(); i++) {
    83 			currentAttributeMapping.clear();
    88 				currentSearchableAttributes[i] = regex_match(attributes[i].getAttributeName(), attributeNameRegEx);
    84 			for (wregex attributeNameRegEx : attributeNameRegExes) {
       
    85 				for (int i = 0; i < allWriterMetadata.size(); i++) {
       
    86 					if (regex_match(allWriterMetadata[i].attributeName, attributeNameRegEx)) currentAttributeMapping.push_back(i);
       
    87 				}
    89 			}
    88 			}
       
    89 
       
    90 			if (currentAttributeMapping.empty()) throw cli::RelpipeCLIException(L"No attribute matches. Relation must have at least one attribute.", cli::CLI::EXIT_CODE_BAD_SYNTAX); // TODO: review exit code
       
    91 			for (integer_t i : currentAttributeMapping) writerMetadata.push_back(allWriterMetadata[i]);
       
    92 		} else {
       
    93 			writerMetadata = allWriterMetadata;
    90 		}
    94 		}
    91 
    95 
       
    96 		currentRecord.resize(allWriterMetadata.size());
    92 		relationalWriter->startRelation(name, writerMetadata, true);
    97 		relationalWriter->startRelation(name, writerMetadata, true);
    93 	}
    98 	}
    94 
    99 
    95 	void attribute(const string_t& value) override {
   100 	void attribute(const string_t& value) override {
    96 		if (filterCurrentRelation) {
   101 		if (filterCurrentRelation) {
    97 			currentRecord[currentAttributeIndex] = value;
   102 			currentRecord[currentAttributeIndex] = value;
       
   103 			currentAttributeIndex++;
    98 
   104 
    99 			if (currentSearchableAttributes[currentAttributeIndex]) {
   105 			if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) {
   100 				includeCurrentRecord |= regex_search(value, searchRegEx);
   106 				for (integer_t i : currentAttributeMapping) relationalWriter->writeAttribute(currentRecord[i]);
   101 			}
   107 			}
   102 
   108 
   103 			currentAttributeIndex++;
   109 			currentAttributeIndex = currentAttributeIndex % currentRecord.size();
   104 
       
   105 			if (currentAttributeIndex > 0 && currentAttributeIndex % currentSearchableAttributes.size() == 0) {
       
   106 				if (includeCurrentRecord) for (string_t v : currentRecord) relationalWriter->writeAttribute(v);
       
   107 				includeCurrentRecord = false;
       
   108 			}
       
   109 
       
   110 			currentAttributeIndex = currentAttributeIndex % currentSearchableAttributes.size();
       
   111 		} else {
   110 		} else {
   112 			relationalWriter->writeAttribute(value);
   111 			relationalWriter->writeAttribute(value);
   113 		}
   112 		}
   114 	}
   113 	}
   115 
   114