parse recfile (logical lines) v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Sat, 06 Apr 2019 17:33:42 +0200
branchv_0
changeset 1 8dfb42e5c088
parent 0 515a697cc9cd
child 2 2390e2949a36
parse recfile (logical lines)
src/RecfileCommand.h
--- a/src/RecfileCommand.h	Fri Apr 05 18:02:19 2019 +0200
+++ b/src/RecfileCommand.h	Sat Apr 06 17:33:42 2019 +0200
@@ -35,7 +35,9 @@
 
 	enum class RecfileLineType {
 		METADATA,
-		DATA
+		DATA,
+		SEPARATOR,
+		COMMENT,
 	};
 
 	class RecfileHandler {
@@ -49,17 +51,38 @@
 		virtual ~RecfileHandler() {
 		}
 
-		void logicalLine(const string_t& name, const string_t& value, RecfileLineType type) {
-			std::wcerr << L"logicalLine(" << name << L", " << value << L", " << (int) type << L");" << std::endl; // TODO: remove debug
+		void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") {
+			std::wcerr << L"logicalLine(" << (int) type << L", " << name << L", " << value << L");" << std::endl; // TODO: remove debug
 			// TODO: writer->startRelation()
 			// TODO: writer->writeAttribute()
 		}
 
 	};
 
+	enum class ParserState {
+		START,
+		NAME,
+		VALUE,
+		VALUE_CONTINUATION,
+		COMMENT,
+		END,
+	};
+
 	class RecfileParser {
 	private:
+		wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8?
 		RecfileHandler& handler;
+
+		void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) {
+			handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str()));
+
+			name.str("");
+			name.clear();
+			value.str("");
+			value.clear();
+			type = RecfileLineType::DATA;
+		}
+
 	public:
 
 		RecfileParser(RecfileHandler& handler) : handler(handler) {
@@ -69,15 +92,72 @@
 		}
 
 		void parse(std::istream& input) {
-			// TODO: parse
-			handler.logicalLine(L"nnn", L"vvv", RecfileLineType::METADATA); // TODO: remove debug
-			handler.logicalLine(L"nnn", L"vvv", RecfileLineType::DATA); // TODO: remove debug
+
+			ParserState state = ParserState::START;
+			RecfileLineType type = RecfileLineType::DATA;
+			std::stringstream name;
+			std::stringstream value;
+			char ch;
+
+			while (state != ParserState::END && input.good()) {
+				ch = input.get();
+				if (input.eof()) continue;
+
+				switch (state) {
+					case ParserState::START:
+						if (ch == '%') {
+							type = RecfileLineType::METADATA;
+							break;
+						} else if (ch == ' ') {
+							break;
+						} else if (ch == '\n') {
+							handler.logicalLine(RecfileLineType::SEPARATOR);
+							break;
+						} else if (ch == '#') {
+							type = RecfileLineType::COMMENT;
+							state = ParserState::COMMENT;
+							if (input.get() != ' ') input.unget();
+							break;
+						} // else → name
+					case ParserState::NAME:
+						if (ch == ':') {
+							state = ParserState::VALUE;
+							if (input.get() != ' ') input.unget();
+						} else {
+							name << ch;
+						}
+						break;
+					case ParserState::VALUE:
+						if (ch == '\n') state = ParserState::VALUE_CONTINUATION;
+						else value << ch;
+						break;
+					case ParserState::VALUE_CONTINUATION:
+						if (ch == '+') {
+							state = ParserState::VALUE;
+							if (value.tellp()) value << '\n';
+							if (input.get() != ' ') input.unget();
+						} else {
+							input.unget();
+							state = ParserState::START;
+							emitLogicalLine(type, name, value);
+						}
+						break;
+					case ParserState::COMMENT:
+						if (ch == '\n') {
+							state = ParserState::START;
+							emitLogicalLine(type, name, value);
+						} else {
+							value << ch;
+						}
+						break;
+					default:
+						throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception
+				}
+			}
+			emitLogicalLine(type, name, value);
 		}
-
 	};
 
-
-
 public:
 
 	void process(std::istream& input, std::ostream& output) {