47 class GrepHandler : public RelationalReaderStringHadler { |
47 class GrepHandler : public RelationalReaderStringHadler { |
48 private: |
48 private: |
49 shared_ptr<writer::RelationalWriter> relationalWriter; |
49 shared_ptr<writer::RelationalWriter> relationalWriter; |
50 |
50 |
51 wregex relationNameRegEx; |
51 wregex relationNameRegEx; |
52 wregex attributeNameRegEx; |
52 vector<wregex> attributeNameRegExes; |
53 wregex searchRegEx; |
|
54 |
53 |
55 vector<boolean_t> currentSearchableAttributes; |
54 vector<integer_t> currentAttributeMapping; |
56 vector<string_t> currentRecord; |
55 vector<string_t> currentRecord; |
57 integer_t currentAttributeIndex = 0; |
56 integer_t currentAttributeIndex = 0; |
58 boolean_t includeCurrentRecord = false; |
|
59 boolean_t filterCurrentRelation = false; |
57 boolean_t filterCurrentRelation = false; |
60 |
58 |
61 public: |
59 public: |
62 |
60 |
63 GrepHandler(ostream& output, const vector<string_t>& arguments) { |
61 GrepHandler(ostream& output, const vector<string_t>& arguments) { |
64 relationalWriter.reset(writer::Factory::create(output)); |
62 relationalWriter.reset(writer::Factory::create(output)); |
65 |
63 |
66 if (arguments.size() == 3) { |
64 if (arguments.size() >= 2) { |
67 relationNameRegEx = wregex(arguments[0]); |
65 relationNameRegEx = wregex(arguments[0]); |
68 attributeNameRegEx = wregex(arguments[1]); |
66 for (int i = 1; i < arguments.size(); i++) attributeNameRegExes.push_back(wregex(arguments[i])); |
69 searchRegEx = wregex(arguments[2]); |
|
70 } else { |
67 } else { |
71 throw cli::RelpipeCLIException(L"Usage: relpipe-tr-cut <relationNameRegExp> <attributeNameRegExp> <searchRegExp>", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND); |
68 throw cli::RelpipeCLIException(L"Usage: relpipe-tr-cut <relationNameRegExp> <attributeNameRegExp> [<otherAttributeNameRegExp> ...]", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND); |
72 } |
69 } |
73 } |
70 } |
74 |
71 |
75 void startRelation(string_t name, vector<AttributeMetadata> attributes) override { |
72 void startRelation(string_t name, vector<AttributeMetadata> attributes) override { |
76 // TODO: move to a reusable method (or use same metadata on both reader and writer side?) |
73 // TODO: move to a reusable method (or use same metadata on both reader and writer side?) |
77 vector<writer::AttributeMetadata> writerMetadata; |
74 vector<writer::AttributeMetadata> allWriterMetadata; |
78 for (AttributeMetadata readerMetadata : attributes) { |
75 for (AttributeMetadata readerMetadata : attributes) { |
79 writerMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())}); |
76 allWriterMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())}); |
80 } |
77 } |
81 |
78 |
|
79 vector<writer::AttributeMetadata> writerMetadata; |
82 |
80 |
83 currentRecord.resize(attributes.size()); |
|
84 currentSearchableAttributes.resize(attributes.size(), false); |
|
85 filterCurrentRelation = regex_match(name, relationNameRegEx); |
81 filterCurrentRelation = regex_match(name, relationNameRegEx); |
86 if (filterCurrentRelation) { |
82 if (filterCurrentRelation) { |
87 for (int i = 0; i < currentSearchableAttributes.size(); i++) { |
83 currentAttributeMapping.clear(); |
88 currentSearchableAttributes[i] = regex_match(attributes[i].getAttributeName(), attributeNameRegEx); |
84 for (wregex attributeNameRegEx : attributeNameRegExes) { |
|
85 for (int i = 0; i < allWriterMetadata.size(); i++) { |
|
86 if (regex_match(allWriterMetadata[i].attributeName, attributeNameRegEx)) currentAttributeMapping.push_back(i); |
|
87 } |
89 } |
88 } |
|
89 |
|
90 if (currentAttributeMapping.empty()) throw cli::RelpipeCLIException(L"No attribute matches. Relation must have at least one attribute.", cli::CLI::EXIT_CODE_BAD_SYNTAX); // TODO: review exit code |
|
91 for (integer_t i : currentAttributeMapping) writerMetadata.push_back(allWriterMetadata[i]); |
|
92 } else { |
|
93 writerMetadata = allWriterMetadata; |
90 } |
94 } |
91 |
95 |
|
96 currentRecord.resize(allWriterMetadata.size()); |
92 relationalWriter->startRelation(name, writerMetadata, true); |
97 relationalWriter->startRelation(name, writerMetadata, true); |
93 } |
98 } |
94 |
99 |
95 void attribute(const string_t& value) override { |
100 void attribute(const string_t& value) override { |
96 if (filterCurrentRelation) { |
101 if (filterCurrentRelation) { |
97 currentRecord[currentAttributeIndex] = value; |
102 currentRecord[currentAttributeIndex] = value; |
|
103 currentAttributeIndex++; |
98 |
104 |
99 if (currentSearchableAttributes[currentAttributeIndex]) { |
105 if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) { |
100 includeCurrentRecord |= regex_search(value, searchRegEx); |
106 for (integer_t i : currentAttributeMapping) relationalWriter->writeAttribute(currentRecord[i]); |
101 } |
107 } |
102 |
108 |
103 currentAttributeIndex++; |
109 currentAttributeIndex = currentAttributeIndex % currentRecord.size(); |
104 |
|
105 if (currentAttributeIndex > 0 && currentAttributeIndex % currentSearchableAttributes.size() == 0) { |
|
106 if (includeCurrentRecord) for (string_t v : currentRecord) relationalWriter->writeAttribute(v); |
|
107 includeCurrentRecord = false; |
|
108 } |
|
109 |
|
110 currentAttributeIndex = currentAttributeIndex % currentSearchableAttributes.size(); |
|
111 } else { |
110 } else { |
112 relationalWriter->writeAttribute(value); |
111 relationalWriter->writeAttribute(value); |
113 } |
112 } |
114 } |
113 } |
115 |
114 |