32 |
32 |
33 #include <relpipe/writer/Factory.h> |
33 #include <relpipe/writer/Factory.h> |
34 |
34 |
35 #include <relpipe/cli/RelpipeCLIException.h> |
35 #include <relpipe/cli/RelpipeCLIException.h> |
36 |
36 |
|
37 #include "Configuration.h" |
|
38 |
37 namespace relpipe { |
39 namespace relpipe { |
38 namespace tr { |
40 namespace tr { |
39 namespace grep { |
41 namespace cut { |
40 |
42 |
41 using namespace std; |
43 using namespace std; |
42 using namespace relpipe; |
44 using namespace relpipe; |
43 using namespace relpipe::reader; |
45 using namespace relpipe::reader; |
44 using namespace relpipe::reader::handlers; |
46 using namespace relpipe::reader::handlers; |
45 |
47 |
46 class CutHandler : public RelationalReaderStringHandler { |
48 class CutHandler : public RelationalReaderStringHandler { |
47 private: |
49 private: |
48 shared_ptr<writer::RelationalWriter> relationalWriter; |
50 shared_ptr<writer::RelationalWriter> relationalWriter; |
49 |
51 Configuration configuration; |
50 wregex relationNameRegEx; |
52 RelationConfiguration* currentFilter = nullptr; |
51 vector<wregex> attributeNameRegExes; |
|
52 |
53 |
53 vector<integer_t> currentAttributeMapping; |
54 vector<integer_t> currentAttributeMapping; |
54 vector<string_t> currentRecord; |
55 vector<string_t> currentRecord; |
55 integer_t currentAttributeIndex = 0; |
56 integer_t currentAttributeIndex = 0; |
56 boolean_t filterCurrentRelation = false; |
|
57 |
57 |
58 public: |
58 public: |
59 |
59 |
60 CutHandler(ostream& output, const vector<string_t>& arguments) { |
60 CutHandler(shared_ptr<writer::RelationalWriter> relationalWriter, Configuration configuration) : relationalWriter(relationalWriter), configuration(configuration) { |
61 relationalWriter.reset(writer::Factory::create(output)); |
|
62 |
|
63 if (arguments.size() >= 2) { |
|
64 relationNameRegEx = wregex(arguments[0]); |
|
65 for (int i = 1; i < arguments.size(); i++) attributeNameRegExes.push_back(wregex(arguments[i])); |
|
66 } else { |
|
67 throw cli::RelpipeCLIException(L"Usage: relpipe-tr-cut <relationNameRegExp> <attributeNameRegExp> [<otherAttributeNameRegExp> ...]", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND); |
|
68 } |
|
69 } |
61 } |
70 |
62 |
71 void startRelation(string_t name, vector<AttributeMetadata> attributes) override { |
63 void startRelation(string_t name, vector<AttributeMetadata> attributes) override { |
72 currentRecord.resize(attributes.size()); |
64 currentRecord.resize(attributes.size()); |
73 // TODO: move to a reusable method (or use same metadata on both reader and writer side?) |
65 // TODO: move to a reusable method (or use same metadata on both reader and writer side?) |
76 allWriterMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())}); |
68 allWriterMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())}); |
77 } |
69 } |
78 |
70 |
79 vector<writer::AttributeMetadata> writerMetadata; |
71 vector<writer::AttributeMetadata> writerMetadata; |
80 |
72 |
81 filterCurrentRelation = regex_match(name, relationNameRegEx); |
73 using E = RelationConfiguration::ENTITY; |
82 if (filterCurrentRelation) { |
74 |
|
75 currentFilter = nullptr; |
|
76 for (int i = 0; i < configuration.relationConfigurations.size(); i++) { |
|
77 if (regex_match(name, configuration.relationConfigurations[i].relationPattern) ^ configuration.relationConfigurations[i].invertMatch[E::RELATION]) { |
|
78 currentFilter = &configuration.relationConfigurations[i]; |
|
79 break; |
|
80 } |
|
81 } |
|
82 |
|
83 if (currentFilter) { |
83 currentAttributeMapping.clear(); |
84 currentAttributeMapping.clear(); |
84 for (wregex attributeNameRegEx : attributeNameRegExes) { |
85 for (std::wregex attributePattern : currentFilter->attributePatterns) { |
85 for (int i = 0; i < allWriterMetadata.size(); i++) { |
86 for (int i = 0; i < allWriterMetadata.size(); i++) { |
86 if (regex_match(allWriterMetadata[i].attributeName, attributeNameRegEx)) currentAttributeMapping.push_back(i); |
87 if (regex_match(allWriterMetadata[i].attributeName, attributePattern) ^ currentFilter->invertMatch[E::ATTRIBUTE]) currentAttributeMapping.push_back(i); |
87 } |
88 } |
88 } |
89 } |
89 |
90 |
90 if (currentAttributeMapping.empty()) return; // No attribute matches → DROP whole relation |
91 if (currentAttributeMapping.empty()) return; // No attribute matches → DROP whole relation |
91 for (integer_t i : currentAttributeMapping) writerMetadata.push_back(allWriterMetadata[i]); |
92 for (integer_t i : currentAttributeMapping) writerMetadata.push_back(allWriterMetadata[i]); |
95 |
96 |
96 relationalWriter->startRelation(name, writerMetadata, true); |
97 relationalWriter->startRelation(name, writerMetadata, true); |
97 } |
98 } |
98 |
99 |
99 void attribute(const string_t& value) override { |
100 void attribute(const string_t& value) override { |
100 if (filterCurrentRelation) { |
101 if (currentFilter) { |
101 currentRecord[currentAttributeIndex] = value; |
102 currentRecord[currentAttributeIndex] = value; |
102 currentAttributeIndex++; |
103 currentAttributeIndex++; |
103 |
104 |
104 if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) { |
105 if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) { |
105 for (integer_t i : currentAttributeMapping) relationalWriter->writeAttribute(currentRecord[i]); |
106 for (integer_t i : currentAttributeMapping) relationalWriter->writeAttribute(currentRecord[i]); |