--- a/src/RecfileCommand.h Fri Apr 05 18:02:19 2019 +0200
+++ b/src/RecfileCommand.h Sat Apr 06 17:33:42 2019 +0200
@@ -35,7 +35,9 @@
enum class RecfileLineType {
METADATA,
- DATA
+ DATA,
+ SEPARATOR,
+ COMMENT,
};
class RecfileHandler {
@@ -49,17 +51,38 @@
virtual ~RecfileHandler() {
}
- void logicalLine(const string_t& name, const string_t& value, RecfileLineType type) {
- std::wcerr << L"logicalLine(" << name << L", " << value << L", " << (int) type << L");" << std::endl; // TODO: remove debug
+ void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") {
+ std::wcerr << L"logicalLine(" << (int) type << L", " << name << L", " << value << L");" << std::endl; // TODO: remove debug
// TODO: writer->startRelation()
// TODO: writer->writeAttribute()
}
};
+ enum class ParserState {
+ START,
+ NAME,
+ VALUE,
+ VALUE_CONTINUATION,
+ COMMENT,
+ END,
+ };
+
class RecfileParser {
private:
+ wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8?
RecfileHandler& handler;
+
+ void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) {
+ handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str()));
+
+ name.str("");
+ name.clear();
+ value.str("");
+ value.clear();
+ type = RecfileLineType::DATA;
+ }
+
public:
RecfileParser(RecfileHandler& handler) : handler(handler) {
@@ -69,15 +92,72 @@
}
void parse(std::istream& input) {
- // TODO: parse
- handler.logicalLine(L"nnn", L"vvv", RecfileLineType::METADATA); // TODO: remove debug
- handler.logicalLine(L"nnn", L"vvv", RecfileLineType::DATA); // TODO: remove debug
+
+ ParserState state = ParserState::START;
+ RecfileLineType type = RecfileLineType::DATA;
+ std::stringstream name;
+ std::stringstream value;
+ char ch;
+
+ while (state != ParserState::END && input.good()) {
+ ch = input.get();
+ if (input.eof()) continue;
+
+ switch (state) {
+ case ParserState::START:
+ if (ch == '%') {
+ type = RecfileLineType::METADATA;
+ break;
+ } else if (ch == ' ') {
+ break;
+ } else if (ch == '\n') {
+ handler.logicalLine(RecfileLineType::SEPARATOR);
+ break;
+ } else if (ch == '#') {
+ type = RecfileLineType::COMMENT;
+ state = ParserState::COMMENT;
+ if (input.get() != ' ') input.unget();
+ break;
+ } // else → name
+ case ParserState::NAME:
+ if (ch == ':') {
+ state = ParserState::VALUE;
+ if (input.get() != ' ') input.unget();
+ } else {
+ name << ch;
+ }
+ break;
+ case ParserState::VALUE:
+ if (ch == '\n') state = ParserState::VALUE_CONTINUATION;
+ else value << ch;
+ break;
+ case ParserState::VALUE_CONTINUATION:
+ if (ch == '+') {
+ state = ParserState::VALUE;
+ if (value.tellp()) value << '\n';
+ if (input.get() != ' ') input.unget();
+ } else {
+ input.unget();
+ state = ParserState::START;
+ emitLogicalLine(type, name, value);
+ }
+ break;
+ case ParserState::COMMENT:
+ if (ch == '\n') {
+ state = ParserState::START;
+ emitLogicalLine(type, name, value);
+ } else {
+ value << ch;
+ }
+ break;
+ default:
+ throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception
+ }
+ }
+ emitLogicalLine(type, name, value);
}
-
};
-
-
public:
void process(std::istream& input, std::ostream& output) {