47 } |
49 } |
48 |
50 |
49 virtual ~RecfileHandler() { |
51 virtual ~RecfileHandler() { |
50 } |
52 } |
51 |
53 |
52 void logicalLine(const string_t& name, const string_t& value, RecfileLineType type) { |
54 void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") { |
53 std::wcerr << L"logicalLine(" << name << L", " << value << L", " << (int) type << L");" << std::endl; // TODO: remove debug |
55 std::wcerr << L"logicalLine(" << (int) type << L", " << name << L", " << value << L");" << std::endl; // TODO: remove debug |
54 // TODO: writer->startRelation() |
56 // TODO: writer->startRelation() |
55 // TODO: writer->writeAttribute() |
57 // TODO: writer->writeAttribute() |
56 } |
58 } |
57 |
59 |
58 }; |
60 }; |
59 |
61 |
|
62 enum class ParserState { |
|
63 START, |
|
64 NAME, |
|
65 VALUE, |
|
66 VALUE_CONTINUATION, |
|
67 COMMENT, |
|
68 END, |
|
69 }; |
|
70 |
60 class RecfileParser { |
71 class RecfileParser { |
61 private: |
72 private: |
|
73 wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8? |
62 RecfileHandler& handler; |
74 RecfileHandler& handler; |
|
75 |
|
76 void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) { |
|
77 handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str())); |
|
78 |
|
79 name.str(""); |
|
80 name.clear(); |
|
81 value.str(""); |
|
82 value.clear(); |
|
83 type = RecfileLineType::DATA; |
|
84 } |
|
85 |
63 public: |
86 public: |
64 |
87 |
65 RecfileParser(RecfileHandler& handler) : handler(handler) { |
88 RecfileParser(RecfileHandler& handler) : handler(handler) { |
66 } |
89 } |
67 |
90 |
68 virtual ~RecfileParser() { |
91 virtual ~RecfileParser() { |
69 } |
92 } |
70 |
93 |
71 void parse(std::istream& input) { |
94 void parse(std::istream& input) { |
72 // TODO: parse |
95 |
73 handler.logicalLine(L"nnn", L"vvv", RecfileLineType::METADATA); // TODO: remove debug |
96 ParserState state = ParserState::START; |
74 handler.logicalLine(L"nnn", L"vvv", RecfileLineType::DATA); // TODO: remove debug |
97 RecfileLineType type = RecfileLineType::DATA; |
|
98 std::stringstream name; |
|
99 std::stringstream value; |
|
100 char ch; |
|
101 |
|
102 while (state != ParserState::END && input.good()) { |
|
103 ch = input.get(); |
|
104 if (input.eof()) continue; |
|
105 |
|
106 switch (state) { |
|
107 case ParserState::START: |
|
108 if (ch == '%') { |
|
109 type = RecfileLineType::METADATA; |
|
110 break; |
|
111 } else if (ch == ' ') { |
|
112 break; |
|
113 } else if (ch == '\n') { |
|
114 handler.logicalLine(RecfileLineType::SEPARATOR); |
|
115 break; |
|
116 } else if (ch == '#') { |
|
117 type = RecfileLineType::COMMENT; |
|
118 state = ParserState::COMMENT; |
|
119 if (input.get() != ' ') input.unget(); |
|
120 break; |
|
121 } // else → name |
|
122 case ParserState::NAME: |
|
123 if (ch == ':') { |
|
124 state = ParserState::VALUE; |
|
125 if (input.get() != ' ') input.unget(); |
|
126 } else { |
|
127 name << ch; |
|
128 } |
|
129 break; |
|
130 case ParserState::VALUE: |
|
131 if (ch == '\n') state = ParserState::VALUE_CONTINUATION; |
|
132 else value << ch; |
|
133 break; |
|
134 case ParserState::VALUE_CONTINUATION: |
|
135 if (ch == '+') { |
|
136 state = ParserState::VALUE; |
|
137 if (value.tellp()) value << '\n'; |
|
138 if (input.get() != ' ') input.unget(); |
|
139 } else { |
|
140 input.unget(); |
|
141 state = ParserState::START; |
|
142 emitLogicalLine(type, name, value); |
|
143 } |
|
144 break; |
|
145 case ParserState::COMMENT: |
|
146 if (ch == '\n') { |
|
147 state = ParserState::START; |
|
148 emitLogicalLine(type, name, value); |
|
149 } else { |
|
150 value << ch; |
|
151 } |
|
152 break; |
|
153 default: |
|
154 throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception |
|
155 } |
|
156 } |
|
157 emitLogicalLine(type, name, value); |
75 } |
158 } |
76 |
|
77 }; |
159 }; |
78 |
|
79 |
|
80 |
160 |
81 public: |
161 public: |
82 |
162 |
83 void process(std::istream& input, std::ostream& output) { |
163 void process(std::istream& input, std::ostream& output) { |
84 unique_ptr<RelationalWriter> writer(Factory::create(output)); |
164 unique_ptr<RelationalWriter> writer(Factory::create(output)); |