avoid collisions in AWK variable names v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Wed, 22 May 2019 19:26:10 +0200
branchv_0
changeset 25 13a1e1134797
parent 24 c805c968b7ed
child 26 cf57e8c78492
avoid collisions in AWK variable names
src/AwkHandler.h
--- a/src/AwkHandler.h	Sat May 11 22:55:21 2019 +0200
+++ b/src/AwkHandler.h	Wed May 22 19:26:10 2019 +0200
@@ -69,7 +69,8 @@
 	int awkInputWriterFD = -1;
 	RelationConfiguration* currentRelationConfiguration = nullptr;
 	std::vector<AttributeMetadata> currentReaderMetadata;
-	vector<writer::AttributeMetadata> currentWriterMetadata;
+	std::vector<writer::AttributeMetadata> currentWriterMetadata;
+	std::map<string_t, string_t> currenVariablesMapping;
 	integer_t currentAttributeIndex = 0;
 
 	void createPipe(int& readerFD, int& writerFD) {
@@ -141,15 +142,47 @@
 		currentAttributeIndex = 0;
 		currentReaderMetadata.clear();
 		currentWriterMetadata.clear();
+		currenVariablesMapping.clear();
 		currentRelationConfiguration = nullptr;
 	}
 
+	void generateVariableMappings() {
+		for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L"";
+		for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L"";
+		for (DefinitionRecipe d : configuration.definitions) currenVariablesMapping[d.name] = L"";
+		for (DefinitionRecipe d : currentRelationConfiguration->definitions) currenVariablesMapping[d.name] = L"";
+
+		for (std::pair<string_t, string_t> m : currenVariablesMapping) {
+			currenVariablesMapping[m.first] = escapeAwkVariableName(m.first);
+		}
+	}
+
 	string_t a2v(const string_t& attributeName) {
+		if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName];
+		else throw cli::RelpipeCLIException(L"Unable to find value in currenVariablesMapping", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions?
+	}
+
+	template <typename K, typename V> bool containsValue(std::map<K, V> map, V value) {
+		for (std::pair<K, V> p : map) if (p.second == value) return true;
+		return false;
+	}
+
+	string_t escapeAwkVariableName(const string_t& attributeName) {
 		// cat awkgram.y | awk -v FS='\\{"|",' -v ORS='|' '/static const struct token tokentab/, /\};/ { if (/^\{/) { print $2} }'
+		// cat AwkHandler.h | awk -v FS=' |\\(' -v ORS='|' '/awkScript.*"function/ { print $4; }'
 		std::wregex awkReservedNames(L"BEGIN|BEGINFILE|END|ENDFILE|adump|and|asort|asorti|atan2|bindtextdomain|break|case|close|compl|continue|cos|dcgettext|dcngettext|default|delete|do|else|eval|exit|exp|fflush|for|func|function|gensub|getline|gsub|if|in|include|index|int|intdiv0|isarray|length|load|log|lshift|match|mktime|namespace|next|nextfile|or|patsplit|print|printf|rand|return|rshift|sin|split|sprintf|sqrt|srand|stopme|strftime|strtonum|sub|substr|switch|system|systime|tolower|toupper|typeof|while|xor");
 		std::wregex trReservedNames(L"_escape|_unescape|_readVariables|_writeVariables|record");
-		if (regex_match(attributeName, awkReservedNames) || regex_match(attributeName, trReservedNames)) return a2v(L"_" + attributeName);
-		else return attributeName;
+		std::wregex badCharacters(L"[^a-zA-Z0-9_]|^([0-9])");
+
+		const string_t& name = std::regex_replace(attributeName, badCharacters, L"_$1");
+
+		bool badName = false;
+		badName |= regex_match(name, awkReservedNames);
+		badName |= regex_match(name, trReservedNames);
+		badName |= containsValue(currenVariablesMapping, name);
+
+		if (badName) return escapeAwkVariableName(L"_" + name);
+		else return name;
 	}
 
 	string_t escapeAwkValue(const string_t& value) {
@@ -228,6 +261,8 @@
 		}
 
 		if (currentRelationConfiguration) {
+			generateVariableMappings();
+
 			int awkInputReaderFD;
 			int awkOutputReaderFD;
 			int awkOutputWriterFD;