src/RecfileCommand.h
branchv_0
changeset 1 8dfb42e5c088
parent 0 515a697cc9cd
child 2 2390e2949a36
equal deleted inserted replaced
0:515a697cc9cd 1:8dfb42e5c088
    33 class RecfileCommand {
    33 class RecfileCommand {
    34 private:
    34 private:
    35 
    35 
    36 	enum class RecfileLineType {
    36 	enum class RecfileLineType {
    37 		METADATA,
    37 		METADATA,
    38 		DATA
    38 		DATA,
       
    39 		SEPARATOR,
       
    40 		COMMENT,
    39 	};
    41 	};
    40 
    42 
    41 	class RecfileHandler {
    43 	class RecfileHandler {
    42 	private:
    44 	private:
    43 		RelationalWriter* writer;
    45 		RelationalWriter* writer;
    47 		}
    49 		}
    48 
    50 
    49 		virtual ~RecfileHandler() {
    51 		virtual ~RecfileHandler() {
    50 		}
    52 		}
    51 
    53 
    52 		void logicalLine(const string_t& name, const string_t& value, RecfileLineType type) {
    54 		void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") {
    53 			std::wcerr << L"logicalLine(" << name << L", " << value << L", " << (int) type << L");" << std::endl; // TODO: remove debug
    55 			std::wcerr << L"logicalLine(" << (int) type << L", " << name << L", " << value << L");" << std::endl; // TODO: remove debug
    54 			// TODO: writer->startRelation()
    56 			// TODO: writer->startRelation()
    55 			// TODO: writer->writeAttribute()
    57 			// TODO: writer->writeAttribute()
    56 		}
    58 		}
    57 
    59 
    58 	};
    60 	};
    59 
    61 
       
    62 	enum class ParserState {
       
    63 		START,
       
    64 		NAME,
       
    65 		VALUE,
       
    66 		VALUE_CONTINUATION,
       
    67 		COMMENT,
       
    68 		END,
       
    69 	};
       
    70 
    60 	class RecfileParser {
    71 	class RecfileParser {
    61 	private:
    72 	private:
       
    73 		wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8?
    62 		RecfileHandler& handler;
    74 		RecfileHandler& handler;
       
    75 
       
    76 		void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) {
       
    77 			handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str()));
       
    78 
       
    79 			name.str("");
       
    80 			name.clear();
       
    81 			value.str("");
       
    82 			value.clear();
       
    83 			type = RecfileLineType::DATA;
       
    84 		}
       
    85 
    63 	public:
    86 	public:
    64 
    87 
    65 		RecfileParser(RecfileHandler& handler) : handler(handler) {
    88 		RecfileParser(RecfileHandler& handler) : handler(handler) {
    66 		}
    89 		}
    67 
    90 
    68 		virtual ~RecfileParser() {
    91 		virtual ~RecfileParser() {
    69 		}
    92 		}
    70 
    93 
    71 		void parse(std::istream& input) {
    94 		void parse(std::istream& input) {
    72 			// TODO: parse
    95 
    73 			handler.logicalLine(L"nnn", L"vvv", RecfileLineType::METADATA); // TODO: remove debug
    96 			ParserState state = ParserState::START;
    74 			handler.logicalLine(L"nnn", L"vvv", RecfileLineType::DATA); // TODO: remove debug
    97 			RecfileLineType type = RecfileLineType::DATA;
       
    98 			std::stringstream name;
       
    99 			std::stringstream value;
       
   100 			char ch;
       
   101 
       
   102 			while (state != ParserState::END && input.good()) {
       
   103 				ch = input.get();
       
   104 				if (input.eof()) continue;
       
   105 
       
   106 				switch (state) {
       
   107 					case ParserState::START:
       
   108 						if (ch == '%') {
       
   109 							type = RecfileLineType::METADATA;
       
   110 							break;
       
   111 						} else if (ch == ' ') {
       
   112 							break;
       
   113 						} else if (ch == '\n') {
       
   114 							handler.logicalLine(RecfileLineType::SEPARATOR);
       
   115 							break;
       
   116 						} else if (ch == '#') {
       
   117 							type = RecfileLineType::COMMENT;
       
   118 							state = ParserState::COMMENT;
       
   119 							if (input.get() != ' ') input.unget();
       
   120 							break;
       
   121 						} // else → name
       
   122 					case ParserState::NAME:
       
   123 						if (ch == ':') {
       
   124 							state = ParserState::VALUE;
       
   125 							if (input.get() != ' ') input.unget();
       
   126 						} else {
       
   127 							name << ch;
       
   128 						}
       
   129 						break;
       
   130 					case ParserState::VALUE:
       
   131 						if (ch == '\n') state = ParserState::VALUE_CONTINUATION;
       
   132 						else value << ch;
       
   133 						break;
       
   134 					case ParserState::VALUE_CONTINUATION:
       
   135 						if (ch == '+') {
       
   136 							state = ParserState::VALUE;
       
   137 							if (value.tellp()) value << '\n';
       
   138 							if (input.get() != ' ') input.unget();
       
   139 						} else {
       
   140 							input.unget();
       
   141 							state = ParserState::START;
       
   142 							emitLogicalLine(type, name, value);
       
   143 						}
       
   144 						break;
       
   145 					case ParserState::COMMENT:
       
   146 						if (ch == '\n') {
       
   147 							state = ParserState::START;
       
   148 							emitLogicalLine(type, name, value);
       
   149 						} else {
       
   150 							value << ch;
       
   151 						}
       
   152 						break;
       
   153 					default:
       
   154 						throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception
       
   155 				}
       
   156 			}
       
   157 			emitLogicalLine(type, name, value);
    75 		}
   158 		}
    76 
       
    77 	};
   159 	};
    78 
       
    79 
       
    80 
   160 
    81 public:
   161 public:
    82 
   162 
    83 	void process(std::istream& input, std::ostream& output) {
   163 	void process(std::istream& input, std::ostream& output) {
    84 		unique_ptr<RelationalWriter> writer(Factory::create(output));
   164 		unique_ptr<RelationalWriter> writer(Factory::create(output));