# HG changeset patch # User František Kučera # Date 1558545970 -7200 # Node ID 13a1e11347975b97ff31a72a2b8295192146047c # Parent c805c968b7ed4d35b827ab698f576dd6dc669d58 avoid collisions in AWK variable names diff -r c805c968b7ed -r 13a1e1134797 src/AwkHandler.h --- a/src/AwkHandler.h Sat May 11 22:55:21 2019 +0200 +++ b/src/AwkHandler.h Wed May 22 19:26:10 2019 +0200 @@ -69,7 +69,8 @@ int awkInputWriterFD = -1; RelationConfiguration* currentRelationConfiguration = nullptr; std::vector currentReaderMetadata; - vector currentWriterMetadata; + std::vector currentWriterMetadata; + std::map currenVariablesMapping; integer_t currentAttributeIndex = 0; void createPipe(int& readerFD, int& writerFD) { @@ -141,15 +142,47 @@ currentAttributeIndex = 0; currentReaderMetadata.clear(); currentWriterMetadata.clear(); + currenVariablesMapping.clear(); currentRelationConfiguration = nullptr; } + void generateVariableMappings() { + for (AttributeMetadata m : currentReaderMetadata) currenVariablesMapping[m.getAttributeName()] = L""; + for (writer::AttributeMetadata m : currentWriterMetadata) currenVariablesMapping[m.attributeName] = L""; + for (DefinitionRecipe d : configuration.definitions) currenVariablesMapping[d.name] = L""; + for (DefinitionRecipe d : currentRelationConfiguration->definitions) currenVariablesMapping[d.name] = L""; + + for (std::pair m : currenVariablesMapping) { + currenVariablesMapping[m.first] = escapeAwkVariableName(m.first); + } + } + string_t a2v(const string_t& attributeName) { + if (currenVariablesMapping.find(attributeName) != currenVariablesMapping.end()) return currenVariablesMapping[attributeName]; + else throw cli::RelpipeCLIException(L"Unable to find value in currenVariablesMapping", cli::CLI::EXIT_CODE_UNEXPECTED_ERROR); // TODO: better exceptions? + } + + template bool containsValue(std::map map, V value) { + for (std::pair p : map) if (p.second == value) return true; + return false; + } + + string_t escapeAwkVariableName(const string_t& attributeName) { // cat awkgram.y | awk -v FS='\\{"|",' -v ORS='|' '/static const struct token tokentab/, /\};/ { if (/^\{/) { print $2} }' + // cat AwkHandler.h | awk -v FS=' |\\(' -v ORS='|' '/awkScript.*"function/ { print $4; }' std::wregex awkReservedNames(L"BEGIN|BEGINFILE|END|ENDFILE|adump|and|asort|asorti|atan2|bindtextdomain|break|case|close|compl|continue|cos|dcgettext|dcngettext|default|delete|do|else|eval|exit|exp|fflush|for|func|function|gensub|getline|gsub|if|in|include|index|int|intdiv0|isarray|length|load|log|lshift|match|mktime|namespace|next|nextfile|or|patsplit|print|printf|rand|return|rshift|sin|split|sprintf|sqrt|srand|stopme|strftime|strtonum|sub|substr|switch|system|systime|tolower|toupper|typeof|while|xor"); std::wregex trReservedNames(L"_escape|_unescape|_readVariables|_writeVariables|record"); - if (regex_match(attributeName, awkReservedNames) || regex_match(attributeName, trReservedNames)) return a2v(L"_" + attributeName); - else return attributeName; + std::wregex badCharacters(L"[^a-zA-Z0-9_]|^([0-9])"); + + const string_t& name = std::regex_replace(attributeName, badCharacters, L"_$1"); + + bool badName = false; + badName |= regex_match(name, awkReservedNames); + badName |= regex_match(name, trReservedNames); + badName |= containsValue(currenVariablesMapping, name); + + if (badName) return escapeAwkVariableName(L"_" + name); + else return name; } string_t escapeAwkValue(const string_t& value) { @@ -228,6 +261,8 @@ } if (currentRelationConfiguration) { + generateVariableMappings(); + int awkInputReaderFD; int awkOutputReaderFD; int awkOutputWriterFD;