src/AwkHandler.h
branchv_0
changeset 7 46db0e6e548b
parent 6 efa96f51b308
child 10 f911910fd68f
equal deleted inserted replaced
6:efa96f51b308 7:46db0e6e548b
    60  */
    60  */
    61 class AwkHandler : public RelationalReaderStringHandler {
    61 class AwkHandler : public RelationalReaderStringHandler {
    62 private:
    62 private:
    63 	Configuration configuration;
    63 	Configuration configuration;
    64 	writer::RelationalWriter* relationalWriter;
    64 	writer::RelationalWriter* relationalWriter;
       
    65 	std::wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings
    65 
    66 
    66 	int awkInputWriterFD = -1;
    67 	int awkInputWriterFD = -1;
       
    68 	std::vector<AttributeMetadata> currentReaderMetadata;
       
    69 	integer_t currentAttributeIndex = 0;
    67 
    70 
    68 	void createPipe(int& readerFD, int& writerFD) {
    71 	void createPipe(int& readerFD, int& writerFD) {
    69 		int fds[2];
    72 		int fds[2];
    70 		int result = pipe(fds);
    73 		int result = pipe(fds);
    71 		readerFD = fds[0];
    74 		readerFD = fds[0];
    89 			// TODO: check exit codes
    92 			// TODO: check exit codes
    90 			__pid_t waitResult1 = wait(NULL);
    93 			__pid_t waitResult1 = wait(NULL);
    91 			__pid_t waitResult2 = wait(NULL);
    94 			__pid_t waitResult2 = wait(NULL);
    92 			awkInputWriterFD = -1;
    95 			awkInputWriterFD = -1;
    93 		}
    96 		}
       
    97 
       
    98 		currentAttributeIndex = 0;
       
    99 		currentReaderMetadata.clear();
       
   100 	}
       
   101 
       
   102 	string_t a2v(const string_t& attributeName) {
       
   103 		// FIXME: escape reserved names; prefix with _ ?
       
   104 		// cat awkgram.y | awk -v FS='\\{"|",' -v ORS='|' '/static const struct token tokentab/, /\};/ { if (/^\{/) { print $2} }'
       
   105 		// BEGIN|BEGINFILE|END|ENDFILE|adump|and|asort|asorti|atan2|bindtextdomain|break|case|close|compl|continue|cos|dcgettext|dcngettext|default|delete|do|else|eval|exit|exp|fflush|for|func|function|gensub|getline|gsub|if|in|include|index|int|intdiv0|isarray|length|load|log|lshift|match|mktime|namespace|next|nextfile|or|patsplit|print|printf|rand|return|rshift|sin|split|sprintf|sqrt|srand|stopme|strftime|strtonum|sub|substr|switch|system|systime|tolower|toupper|typeof|while|xor
       
   106 		return attributeName;
       
   107 	}
       
   108 
       
   109 	string_t escapeAwkValue(const string_t& value) {
       
   110 		// FIXME: escape field and record separators
       
   111 		return value;
    94 	}
   112 	}
    95 
   113 
    96 public:
   114 public:
    97 
   115 
    98 	AwkHandler(writer::RelationalWriter* relationalWriter, Configuration& configuration) : relationalWriter(relationalWriter), configuration(configuration) {
   116 	AwkHandler(writer::RelationalWriter* relationalWriter, Configuration& configuration) : relationalWriter(relationalWriter), configuration(configuration) {
    99 	}
   117 	}
   100 
   118 
   101 	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
   119 	void startRelation(string_t name, vector<AttributeMetadata> attributes) override {
   102 		cleanUp();
   120 		cleanUp();
   103 
   121 
       
   122 		currentReaderMetadata = attributes;
   104 
   123 
   105 		int awkInputReaderFD;
   124 		int awkInputReaderFD;
   106 		int awkOutputReaderFD;
   125 		int awkOutputReaderFD;
   107 		int awkOutputWriterFD;
   126 		int awkOutputWriterFD;
   108 
   127 
   120 
   139 
   121 			redirectFD(awkInputReaderFD, STDIN_FILENO);
   140 			redirectFD(awkInputReaderFD, STDIN_FILENO);
   122 			redirectFD(awkOutputWriterFD, STDOUT_FILENO);
   141 			redirectFD(awkOutputWriterFD, STDOUT_FILENO);
   123 
   142 
   124 			// Runs AWK program found on $PATH → user can plug-in a custom implementation or a wrapper, but this can be also bit dangerous (however AWK itself is dangerous).
   143 			// Runs AWK program found on $PATH → user can plug-in a custom implementation or a wrapper, but this can be also bit dangerous (however AWK itself is dangerous).
   125 			execlp("awk", "awk", "{print \"AWK says: line \" NR \" = \" $0;}", nullptr);
   144 			execlp("awk", "awk", "BEGIN { FS=\"\\t\" }; {print \"AWK says: line \" NR \" '\" $0 \"' has \" NF \" fields; first field is '\" $1 \"'\";}", nullptr);
   126 		} else {
   145 		} else {
   127 			// Parent process
   146 			// Parent process
   128 			closeOrThrow(awkInputReaderFD);
   147 			closeOrThrow(awkInputReaderFD);
   129 			closeOrThrow(awkOutputWriterFD);
   148 			closeOrThrow(awkOutputWriterFD);
   130 
   149 
   156 		}
   175 		}
   157 
   176 
   158 	}
   177 	}
   159 
   178 
   160 	void attribute(const string_t& value) override {
   179 	void attribute(const string_t& value) override {
   161 		dprintf(awkInputWriterFD, "attribute!\n");
   180 		string_t variableName = a2v(currentReaderMetadata[currentAttributeIndex].getAttributeName());
       
   181 		string_t variableValue = escapeAwkValue(value);
       
   182 
       
   183 		currentAttributeIndex++;
       
   184 		currentAttributeIndex = currentAttributeIndex % currentReaderMetadata.size();
       
   185 
       
   186 		// TODO: just the value – move name to the AWK function
       
   187 		std::string variablePair = convertor.to_bytes(variableName + L"=" + variableValue);
       
   188 
       
   189 		if (currentAttributeIndex == 0) variablePair += "\n";
       
   190 		else variablePair += "\t";
       
   191 
       
   192 		write(awkInputWriterFD, variablePair.c_str(), variablePair.length());
       
   193 
   162 	}
   194 	}
   163 
   195 
   164 	void endOfPipe() {
   196 	void endOfPipe() {
   165 		cleanUp();
   197 		cleanUp();
   166 	}
   198 	}