read and write variables, generate real relation v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Wed, 08 May 2019 00:21:55 +0200
branchv_0
changeset 21 d46a727b7965
parent 20 f937ad57351f
child 22 98acfdc4c20b
read and write variables, generate real relation
src/AwkHandler.h
--- a/src/AwkHandler.h	Tue May 07 22:21:56 2019 +0200
+++ b/src/AwkHandler.h	Wed May 08 00:21:55 2019 +0200
@@ -203,8 +203,6 @@
 	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
 		cleanUp();
 
-		currentReaderMetadata = attributes;
-
 		for (int i = 0; i < configuration.relationConfigurations.size(); i++) {
 			if (regex_match(name, wregex(configuration.relationConfigurations[i].relation))) {
 				currentRelationConfiguration = &configuration.relationConfigurations[i];
@@ -212,6 +210,16 @@
 			}
 		}
 
+		currentReaderMetadata = attributes;
+		// TODO: move to a reusable method (or use same metadata on both reader and writer side?)		
+		if (currentRelationConfiguration && currentRelationConfiguration->writerMetadata.size()) {
+			if (currentRelationConfiguration->inputAttributesPrepend) add(currentReaderMetadata, currentWriterMetadata);
+			currentWriterMetadata.insert(currentWriterMetadata.end(), currentRelationConfiguration->writerMetadata.begin(), currentRelationConfiguration->writerMetadata.end());
+			if (currentRelationConfiguration->inputAttributesAppend) add(currentReaderMetadata, currentWriterMetadata);
+		} else {
+			add(currentReaderMetadata, currentWriterMetadata);
+		}
+
 		if (currentRelationConfiguration) {
 			int awkInputReaderFD;
 			int awkOutputReaderFD;
@@ -261,7 +269,40 @@
 				awkScript << L"};" << std::endl;
 				awkScript << std::endl;
 
-				awkScript << currentRelationConfiguration->awkForEach << std::endl;
+				awkScript << L"function _escape(value) {" << std::endl;
+				// TODO: escape function
+				awkScript << L"return value;" << std::endl;
+				awkScript << L"};" << std::endl;
+				awkScript << std::endl;
+
+				awkScript << L"function _unescape(value) {" << std::endl;
+				// TODO: unescape function
+				awkScript << L"return value;" << std::endl;
+				awkScript << L"};" << std::endl;
+				awkScript << std::endl;
+
+				awkScript << L"function _readVariables() {" << std::endl;
+				for (int i = 0; i < currentReaderMetadata.size(); i++) awkScript << a2v(currentReaderMetadata[i].getAttributeName()) << L"=_unescape($" << (i + 1) << L");" << std::endl;
+				awkScript << L"};" << std::endl;
+				awkScript << std::endl;
+
+				awkScript << L"function _writeVariables() {" << std::endl;
+				awkScript << L"NF=" << currentWriterMetadata.size() << ";" << std::endl;
+				for (int i = 0; i < currentWriterMetadata.size(); i++) awkScript << L"$" << (i + 1) << L"=_escape(" << a2v(currentWriterMetadata[i].attributeName) << L");" << std::endl;
+				awkScript << L"};" << std::endl;
+				awkScript << std::endl;
+
+				awkScript << L"function record() {" << std::endl;
+				awkScript << L"_writeVariables();" << std::endl;
+				awkScript << L"print;" << std::endl;
+				awkScript << L"};" << std::endl;
+				awkScript << std::endl;
+
+				awkScript << L"{ _readVariables();  }" << std::endl; // read line (input attributes) into AWK variables
+				awkScript << L"{ _writeVariables(); }" << std::endl; // write AWK variables to the line (so it matches the output attributes and can be implicitly printed without explicit record() call)
+				awkScript << std::endl;
+
+				awkScript << currentRelationConfiguration->awkForEach << std::endl; // user's code – can modify variables, filter results or explicitly call record() (can generate additional records or duplicate them)
 
 				// CLI arguments:
 				std::vector<std::string> args;
@@ -290,10 +331,7 @@
 					if (currentRelationConfiguration->drop) {
 						// TODO: omit whole this process and pipe AWK output to /dev/null?
 					} else {
-						// FIXME: currentWriterMetadata
-						relationalWriter->startRelation(name,{
-							{L"message", writer::TypeId::STRING},
-						}, true);
+						relationalWriter->startRelation(name, currentWriterMetadata, true);
 					}
 
 					processAwkOutput(awkOutputReaderFD);
@@ -304,7 +342,6 @@
 				}
 			}
 		} else {
-			add(currentReaderMetadata, currentWriterMetadata);
 			relationalWriter->startRelation(name, currentWriterMetadata, true);
 		}