src/RecfileCommand.h
author František Kučera <franta-hg@frantovo.cz>
Tue, 22 Oct 2019 19:52:02 +0200
branchv_0
changeset 8 9c8c20c3bd64
parent 4 b5239b4b345b
child 10 c59363fd805b
permissions -rw-r--r--
fix license version: GNU GPLv3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
8
9c8c20c3bd64 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#pragma once
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <iostream>
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
#include <string>
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <sstream>
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <vector>
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    23
#include <set>
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    24
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
#include <relpipe/writer/typedefs.h>
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    26
#include <relpipe/writer/RelationalWriter.h>
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    27
#include <relpipe/writer/AttributeMetadata.h>
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    28
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
namespace relpipe {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
namespace in {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
namespace recfile {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
using namespace relpipe::writer;
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
class RecfileCommand {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
private:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
	enum class RecfileLineType {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
		METADATA,
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    40
		DATA,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    41
		SEPARATOR,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    42
		COMMENT,
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    43
		END,
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    44
	};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    46
	class RecfileHandler {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
	private:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    48
		RelationalWriter* writer;
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    49
		string_t currentRelationName;
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    50
		std::vector<AttributeMetadata> currentAttributeMetadata;
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    51
		std::vector<string_t> currentRecord;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    52
		std::vector<std::vector<string_t>> currentRecords;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    53
		size_t prefetchCount = 1;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    54
		bool headerWritten = false;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    55
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    56
		void writeHeader() {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    57
			if (headerWritten) return;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    58
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    59
			if (currentRelationName.size() == 0) currentRelationName = L"recfile";
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    60
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    61
			std::set<string_t> uniqueAttributeNames;
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    62
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    63
			// TODO: add also attribute names from type hints from recfile metadata
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    64
			for (int i = 0; i < currentRecords.size(); i++) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    65
				std::vector<string_t> record = currentRecords[i];
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    66
				for (int j = 0; j < record.size(); j += 2) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    67
					if (uniqueAttributeNames.insert(record[j]).second) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    68
						currentAttributeMetadata.push_back({record[j], TypeId::STRING}); // TODO: type from type hints
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    69
					}
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    70
				}
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    71
			}
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    72
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    73
			writer->startRelation(currentRelationName, currentAttributeMetadata, true);
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    74
			headerWritten = true;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    75
		}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    76
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    77
		string_t findValue(std::vector<string_t>& record, TypeId type, string_t& name) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    78
			for (int j = 0; j < record.size(); j += 2) if (record[j] == name) return record[j + 1];
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    79
			return L""; // TODO: proper empty/null value for given type
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    80
		}
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    81
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    82
		void writeRecords() {
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    83
			for (std::vector<string_t> record : currentRecords) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    84
				for (AttributeMetadata a : currentAttributeMetadata) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    85
					writer->writeAttribute(findValue(record, a.typeId, a.attributeName));
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    86
				}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    87
			}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    88
			currentRecords.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    89
		}
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    90
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    91
		void metadata(const string_t& name, const string_t& value) {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    92
			if (name == L"rec") {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    93
				currentRelationName = value;
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    94
				currentAttributeMetadata.clear();
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    95
				currentRecord.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    96
				currentRecords.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    97
				headerWritten = false;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    98
			} else if (name == L"type") {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    99
				// TODO: save type hint
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   100
			} else {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   101
				// ignore – other recfile metadata like keys or auto-increments
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   102
			}
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   103
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   104
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   105
		void data(const string_t& name, const string_t& value) {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   106
			currentRecord.push_back(name);
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   107
			currentRecord.push_back(value);
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   108
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   109
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   110
		void comment(const string_t& value) {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   111
			// ignore comments
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   112
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   113
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   114
		void separator() {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   115
			if (currentRecord.size()) {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   116
				currentRecords.push_back(currentRecord);
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   117
				currentRecord.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   118
			}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   119
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   120
			if (prefetchCount > 0 && currentRecords.size() >= prefetchCount) {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   121
				writeHeader();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   122
				writeRecords();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   123
			}
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   124
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   125
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   126
		void end() {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   127
			if (currentRecord.size()) currentRecords.push_back(currentRecord);
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   128
			writeHeader();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   129
			writeRecords();
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   130
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   131
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   132
	public:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   133
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   134
		RecfileHandler(RelationalWriter* writer) : writer(writer) {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   135
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   136
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   137
		virtual ~RecfileHandler() {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   138
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   139
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   140
		void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") {
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   141
			switch (type) {
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   142
				case RecfileLineType::METADATA: return metadata(name, value);
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   143
				case RecfileLineType::DATA: return data(name, value);
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   144
				case RecfileLineType::COMMENT: return comment(value);
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   145
				case RecfileLineType::SEPARATOR: return separator();
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   146
				case RecfileLineType::END: return end();
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   147
			}
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   148
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   149
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   150
	};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   151
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   152
	enum class ParserState {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   153
		START,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   154
		NAME,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   155
		VALUE,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   156
		VALUE_CONTINUATION,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   157
		COMMENT,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   158
		END,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   159
	};
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   160
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   161
	class RecfileParser {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   162
	private:
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   163
		wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8?
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   164
		RecfileHandler& handler;
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   165
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   166
		void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   167
			handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str()));
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   168
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   169
			name.str("");
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   170
			name.clear();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   171
			value.str("");
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   172
			value.clear();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   173
			type = RecfileLineType::DATA;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   174
		}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   175
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   176
	public:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   177
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   178
		RecfileParser(RecfileHandler& handler) : handler(handler) {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   179
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   180
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   181
		virtual ~RecfileParser() {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   182
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   183
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   184
		void parse(std::istream& input) {
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   185
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   186
			ParserState state = ParserState::START;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   187
			RecfileLineType type = RecfileLineType::DATA;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   188
			std::stringstream name;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   189
			std::stringstream value;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   190
			char ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   191
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   192
			while (state != ParserState::END && input.good()) {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   193
				ch = input.get();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   194
				if (input.eof()) continue;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   195
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   196
				switch (state) {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   197
					case ParserState::START:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   198
						if (ch == '%') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   199
							type = RecfileLineType::METADATA;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   200
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   201
						} else if (ch == ' ') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   202
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   203
						} else if (ch == '\n') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   204
							handler.logicalLine(RecfileLineType::SEPARATOR);
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   205
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   206
						} else if (ch == '#') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   207
							type = RecfileLineType::COMMENT;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   208
							state = ParserState::COMMENT;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   209
							if (input.get() != ' ') input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   210
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   211
						} // else → name
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   212
					case ParserState::NAME:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   213
						if (ch == ':') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   214
							state = ParserState::VALUE;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   215
							if (input.get() != ' ') input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   216
						} else {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   217
							name << ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   218
						}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   219
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   220
					case ParserState::VALUE:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   221
						if (ch == '\n') state = ParserState::VALUE_CONTINUATION;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   222
						else value << ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   223
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   224
					case ParserState::VALUE_CONTINUATION:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   225
						if (ch == '+') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   226
							state = ParserState::VALUE;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   227
							if (value.tellp()) value << '\n';
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   228
							if (input.get() != ' ') input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   229
						} else {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   230
							input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   231
							state = ParserState::START;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   232
							emitLogicalLine(type, name, value);
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   233
						}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   234
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   235
					case ParserState::COMMENT:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   236
						if (ch == '\n') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   237
							state = ParserState::START;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   238
							emitLogicalLine(type, name, value);
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   239
						} else {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   240
							value << ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   241
						}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   242
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   243
					default:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   244
						throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   245
				}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   246
			}
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
   247
			if (name.tellp()) emitLogicalLine(type, name, value);
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   248
			handler.logicalLine(RecfileLineType::END);
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   249
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   250
	};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   251
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   252
public:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   253
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   254
	void process(std::istream& input, std::ostream& output) {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   255
		unique_ptr<RelationalWriter> writer(Factory::create(output));
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   256
		RecfileHandler handler(writer.get());
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   257
		RecfileParser parser(handler);
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   258
		parser.parse(input);
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   259
	}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   260
};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   261
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   262
}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   263
}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   264
}