src/RecfileCommand.h
branchv_0
changeset 10 c59363fd805b
parent 8 9c8c20c3bd64
child 17 06c8926debe0
equal deleted inserted replaced
9:c28c6eda540f 10:c59363fd805b
    19 #include <iostream>
    19 #include <iostream>
    20 #include <string>
    20 #include <string>
    21 #include <sstream>
    21 #include <sstream>
    22 #include <vector>
    22 #include <vector>
    23 #include <set>
    23 #include <set>
       
    24 #include <regex>
    24 
    25 
    25 #include <relpipe/writer/typedefs.h>
    26 #include <relpipe/writer/typedefs.h>
    26 #include <relpipe/writer/RelationalWriter.h>
    27 #include <relpipe/writer/RelationalWriter.h>
    27 #include <relpipe/writer/AttributeMetadata.h>
    28 #include <relpipe/writer/AttributeMetadata.h>
    28 
    29 
    46 	class RecfileHandler {
    47 	class RecfileHandler {
    47 	private:
    48 	private:
    48 		RelationalWriter* writer;
    49 		RelationalWriter* writer;
    49 		string_t currentRelationName;
    50 		string_t currentRelationName;
    50 		std::vector<AttributeMetadata> currentAttributeMetadata;
    51 		std::vector<AttributeMetadata> currentAttributeMetadata;
       
    52 		std::vector<AttributeMetadata> currentTypeHints;
    51 		std::vector<string_t> currentRecord;
    53 		std::vector<string_t> currentRecord;
    52 		std::vector<std::vector<string_t>> currentRecords;
    54 		std::vector<std::vector<string_t>> currentRecords;
    53 		size_t prefetchCount = 1;
    55 		size_t prefetchCount = 1;
    54 		bool headerWritten = false;
    56 		bool headerWritten = false;
       
    57 
       
    58 		TypeId findType(string_t attributeName, TypeId defaultType = TypeId::STRING) {
       
    59 			for (AttributeMetadata m : currentTypeHints) if (m.attributeName == attributeName) return m.typeId;
       
    60 			return defaultType;
       
    61 		}
       
    62 
       
    63 		TypeId recType2typeId(string_t recType) {
       
    64 			// TODO: support more types
       
    65 			// boolean is currently unsupported, because NULLs are not implemented yet and recfile booleans might be null
       
    66 			if (recType == L"int") return TypeId::INTEGER;
       
    67 			else return TypeId::STRING;
       
    68 		}
    55 
    69 
    56 		void writeHeader() {
    70 		void writeHeader() {
    57 			if (headerWritten) return;
    71 			if (headerWritten) return;
    58 
    72 
    59 			if (currentRelationName.size() == 0) currentRelationName = L"recfile";
    73 			if (currentRelationName.size() == 0) currentRelationName = L"recfile";
    63 			// TODO: add also attribute names from type hints from recfile metadata
    77 			// TODO: add also attribute names from type hints from recfile metadata
    64 			for (int i = 0; i < currentRecords.size(); i++) {
    78 			for (int i = 0; i < currentRecords.size(); i++) {
    65 				std::vector<string_t> record = currentRecords[i];
    79 				std::vector<string_t> record = currentRecords[i];
    66 				for (int j = 0; j < record.size(); j += 2) {
    80 				for (int j = 0; j < record.size(); j += 2) {
    67 					if (uniqueAttributeNames.insert(record[j]).second) {
    81 					if (uniqueAttributeNames.insert(record[j]).second) {
    68 						currentAttributeMetadata.push_back({record[j], TypeId::STRING}); // TODO: type from type hints
    82 						currentAttributeMetadata.push_back({record[j], findType(record[j])});
    69 					}
    83 					}
    70 				}
    84 				}
    71 			}
    85 			}
    72 
    86 
    73 			writer->startRelation(currentRelationName, currentAttributeMetadata, true);
    87 			writer->startRelation(currentRelationName, currentAttributeMetadata, true);
    90 
   104 
    91 		void metadata(const string_t& name, const string_t& value) {
   105 		void metadata(const string_t& name, const string_t& value) {
    92 			if (name == L"rec") {
   106 			if (name == L"rec") {
    93 				currentRelationName = value;
   107 				currentRelationName = value;
    94 				currentAttributeMetadata.clear();
   108 				currentAttributeMetadata.clear();
       
   109 				currentTypeHints.clear();
    95 				currentRecord.clear();
   110 				currentRecord.clear();
    96 				currentRecords.clear();
   111 				currentRecords.clear();
    97 				headerWritten = false;
   112 				headerWritten = false;
    98 			} else if (name == L"type") {
   113 			} else if (name == L"type") {
    99 				// TODO: save type hint
   114 				std::wsmatch match;
       
   115 				if (regex_search(value, match, std::wregex(L"\\s?(.*)\\s+(.*)\\s?"))) currentTypeHints.push_back({match[1], recType2typeId(match[2])});
   100 			} else {
   116 			} else {
   101 				// ignore – other recfile metadata like keys or auto-increments
   117 				// ignore – other recfile metadata like keys or auto-increments
   102 			}
   118 			}
   103 		}
   119 		}
   104 
   120