src/PythonHandler.h
branchv_0
changeset 11 5e0b317f4100
parent 10 2c8f5e05c7b7
child 12 ee69be2212fa
equal deleted inserted replaced
10:2c8f5e05c7b7 11:5e0b317f4100
    48 
    48 
    49 class GrepHandler : public RelationalReaderStringHadler {
    49 class GrepHandler : public RelationalReaderStringHadler {
    50 private:
    50 private:
    51 	shared_ptr<writer::RelationalWriter> relationalWriter;
    51 	shared_ptr<writer::RelationalWriter> relationalWriter;
    52 
    52 
       
    53 	wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings.
    53 	wchar_t* pythonProgramName;
    54 	wchar_t* pythonProgramName;
    54 
    55 
    55 	wregex relationNameRegEx;
    56 	wregex relationNameRegEx;
    56 	wregex attributeNameRegEx;
    57 	string_t pythonCode;
    57 	wregex searchRegEx;
       
    58 
    58 
    59 	vector<boolean_t> currentSearchableAttributes;
       
    60 	vector<string_t> currentRecord;
    59 	vector<string_t> currentRecord;
    61 	integer_t currentAttributeIndex = 0;
    60 	integer_t currentAttributeIndex = 0;
    62 	boolean_t includeCurrentRecord = false;
    61 	boolean_t includeCurrentRecord = true;
    63 	boolean_t filterCurrentRelation = false;
    62 	boolean_t filterCurrentRelation = false;
    64 
    63 
    65 public:
    64 public:
    66 
    65 
    67 	GrepHandler(ostream& output, const vector<string_t>& arguments) {
    66 	GrepHandler(ostream& output, const vector<string_t>& arguments) {
    69 
    68 
    70 		pythonProgramName = Py_DecodeLocale("relpipe-tr-python", NULL);
    69 		pythonProgramName = Py_DecodeLocale("relpipe-tr-python", NULL);
    71 		Py_SetProgramName(pythonProgramName);
    70 		Py_SetProgramName(pythonProgramName);
    72 		Py_Initialize();
    71 		Py_Initialize();
    73 		
    72 		
    74 		//PyRun_SimpleString("print('Hello from Python!')");
    73 		if (arguments.size() == 2) {
    75 
       
    76 		if (arguments.size() == 3) {
       
    77 			relationNameRegEx = wregex(arguments[0]);
    74 			relationNameRegEx = wregex(arguments[0]);
    78 			attributeNameRegEx = wregex(arguments[1]);
    75 			pythonCode = arguments[1];
    79 			searchRegEx = wregex(arguments[2]);
       
    80 		} else {
    76 		} else {
    81 			PyMem_RawFree(pythonProgramName);
    77 			PyMem_RawFree(pythonProgramName);
    82 			throw cli::RelpipeCLIException(L"Usage: relpipe-tr-python <relationNameRegExp> <attributeNameRegExp> <searchRegExp>", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND);
    78 			throw cli::RelpipeCLIException(L"Usage: relpipe-tr-python <relationNameRegExp> <pythonCode>", cli::CLI::EXIT_CODE_UNKNOWN_COMMAND);
    83 		}
    79 		}
    84 	}
    80 	}
    85 
    81 
    86 	virtual ~GrepHandler() {
    82 	virtual ~GrepHandler() {
    87 		Py_FinalizeEx();
    83 		Py_FinalizeEx();
    95 			writerMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())});
    91 			writerMetadata.push_back({readerMetadata.getAttributeName(), relationalWriter->toTypeId(readerMetadata.getTypeName())});
    96 		}
    92 		}
    97 
    93 
    98 
    94 
    99 		currentRecord.resize(attributes.size());
    95 		currentRecord.resize(attributes.size());
   100 		currentSearchableAttributes.resize(attributes.size(), false);
       
   101 		filterCurrentRelation = regex_match(name, relationNameRegEx);
    96 		filterCurrentRelation = regex_match(name, relationNameRegEx);
   102 		if (filterCurrentRelation) {
       
   103 			for (int i = 0; i < currentSearchableAttributes.size(); i++) {
       
   104 				currentSearchableAttributes[i] = regex_match(attributes[i].getAttributeName(), attributeNameRegEx);
       
   105 			}
       
   106 		}
       
   107 
       
   108 		relationalWriter->startRelation(name, writerMetadata, true);
    97 		relationalWriter->startRelation(name, writerMetadata, true);
   109 	}
    98 	}
   110 
    99 
   111 	void attribute(const string_t& value) override {
   100 	void attribute(const string_t& value) override {
   112 		if (filterCurrentRelation) {
   101 		if (filterCurrentRelation) {
   113 			currentRecord[currentAttributeIndex] = value;
   102 			currentRecord[currentAttributeIndex] = value;
       
   103 			currentAttributeIndex++;
   114 
   104 
   115 			if (currentSearchableAttributes[currentAttributeIndex]) {
   105 			if (currentAttributeIndex > 0 && currentAttributeIndex % currentRecord.size() == 0) {
   116 				includeCurrentRecord |= regex_search(value, searchRegEx);
   106 				PyRun_SimpleString(convertor.to_bytes(pythonCode).c_str());
       
   107 				if (includeCurrentRecord) for (string_t v : currentRecord) relationalWriter->writeAttribute(v);
       
   108 				includeCurrentRecord = true;
   117 			}
   109 			}
   118 
   110 
   119 			currentAttributeIndex++;
   111 			currentAttributeIndex = currentAttributeIndex % currentRecord.size();
   120 
       
   121 			if (currentAttributeIndex > 0 && currentAttributeIndex % currentSearchableAttributes.size() == 0) {
       
   122 				if (includeCurrentRecord) for (string_t v : currentRecord) relationalWriter->writeAttribute(v);
       
   123 				includeCurrentRecord = false;
       
   124 			}
       
   125 
       
   126 			currentAttributeIndex = currentAttributeIndex % currentSearchableAttributes.size();
       
   127 		} else {
   112 		} else {
   128 			relationalWriter->writeAttribute(value);
   113 			relationalWriter->writeAttribute(value);
   129 		}
   114 		}
   130 	}
   115 	}
   131 
   116