# HG changeset patch # User František Kučera # Date 1544473046 -3600 # Node ID f66c759d1111e7f26af6c0c7191396920cf5ffb3 # Parent aebaf590a83836de15fda69b514226a1a392015a first working grep version diff -r aebaf590a838 -r f66c759d1111 src/GrepHandler.h --- a/src/GrepHandler.h Mon Dec 10 15:19:14 2018 +0100 +++ b/src/GrepHandler.h Mon Dec 10 21:17:26 2018 +0100 @@ -33,35 +33,84 @@ #include +#include + namespace relpipe { namespace tr { namespace grep { +using namespace std; using namespace relpipe; using namespace relpipe::reader; using namespace relpipe::reader::handlers; class GrepHandler : public RelationalReaderStringHadler { private: - std::shared_ptr relationalWriter; + shared_ptr relationalWriter; + + wregex relationNameRegEx; + wregex attributeNameRegEx; + wregex searchRegEx; + + vector currentSearchableAttributes; + vector currentRecord; + integer_t currentAttributeIndex = 0; + boolean_t includeCurrentRecord = false; + boolean_t filterCurrentRelation = false; + public: - GrepHandler(std::ostream& output) { + GrepHandler(ostream& output, const vector& arguments) { relationalWriter.reset(writer::Factory::create(output)); + + if (arguments.size() == 3) { + relationNameRegEx = wregex(arguments[0]); + attributeNameRegEx = wregex(arguments[1]); + searchRegEx = wregex(arguments[2]); + } else { + throw cli::RelpipeCLIException(L"Usage: relpipe-tr-grep ", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND); + } } - void startRelation(string_t name, std::vector attributes) override { + void startRelation(string_t name, vector attributes) override { // TODO: move to a reusable method (or use same metadata on both reader and writer side?) - std::vector writerMetadata; + vector writerMetadata; for (AttributeMetadata readerMetadata : attributes) { writerMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())}); } - + + + currentRecord.resize(attributes.size()); + currentSearchableAttributes.resize(attributes.size(), false); + filterCurrentRelation = regex_match(name, relationNameRegEx); + if (filterCurrentRelation) { + for (int i = 0; i < currentSearchableAttributes.size(); i++) { + currentSearchableAttributes[i] = regex_match(attributes[i].getAttributeName(), attributeNameRegEx); + } + } + relationalWriter->startRelation(name, writerMetadata, true); } void attribute(const string_t& value) override { - relationalWriter->writeAttribute(value); + if (filterCurrentRelation) { + currentRecord[currentAttributeIndex] = value; + + if (currentSearchableAttributes[currentAttributeIndex]) { + includeCurrentRecord |= regex_search(value, searchRegEx); + } + + currentAttributeIndex++; + + if (currentAttributeIndex > 0 && currentAttributeIndex % currentSearchableAttributes.size() == 0) { + if (includeCurrentRecord) for (string_t v : currentRecord) relationalWriter->writeAttribute(v); + includeCurrentRecord = false; + } + + currentAttributeIndex = currentAttributeIndex % currentSearchableAttributes.size(); + } else { + relationalWriter->writeAttribute(value); + } } void endOfPipe() { diff -r aebaf590a838 -r f66c759d1111 src/relpipe-tr-grep.cpp --- a/src/relpipe-tr-grep.cpp Mon Dec 10 15:19:14 2018 +0100 +++ b/src/relpipe-tr-grep.cpp Mon Dec 10 21:17:26 2018 +0100 @@ -42,22 +42,22 @@ setlocale(LC_ALL, ""); CLI::untieStdIO(); CLI cli(argc, argv); - + int resultCode = CLI::EXIT_CODE_UNEXPECTED_ERROR; try { std::shared_ptr reader(Factory::create(std::cin)); - GrepHandler handler(std::cout); + GrepHandler handler(std::cout, cli.arguments()); reader->addHandler(&handler); reader->process(); resultCode = CLI::EXIT_CODE_SUCCESS; - } catch (RelpipeCLIException e) { + } catch (RelpipeCLIException& e) { fwprintf(stderr, L"Caught CLI exception: %ls\n", e.getMessge().c_str()); fwprintf(stderr, L"Debug: Input stream: eof=%ls, lastRead=%d\n", (cin.eof() ? L"true" : L"false"), cin.gcount()); resultCode = e.getExitCode(); - } catch (RelpipeReaderException e) { + } catch (RelpipeReaderException& e) { fwprintf(stderr, L"Caught Reader exception: %ls\n", e.getMessge().c_str()); fwprintf(stderr, L"Debug: Input stream: eof=%ls, lastRead=%d\n", (cin.eof() ? L"true" : L"false"), cin.gcount()); resultCode = CLI::EXIT_CODE_DATA_ERROR;