src/RecfileCommand.h
author František Kučera <franta-hg@frantovo.cz>
Fri, 13 Dec 2019 22:19:39 +0100
branchv_0
changeset 10 c59363fd805b
parent 8 9c8c20c3bd64
child 17 06c8926debe0
permissions -rw-r--r--
support signed integers, negative numbers; binary format change: encode numbers as SLEB128
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     1
/**
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     2
 * Relational pipes
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     3
 * Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     4
 *
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     5
 * This program is free software: you can redistribute it and/or modify
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     6
 * it under the terms of the GNU General Public License as published by
8
9c8c20c3bd64 fix license version: GNU GPLv3
František Kučera <franta-hg@frantovo.cz>
parents: 4
diff changeset
     7
 * the Free Software Foundation, version 3 of the License.
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     8
 *
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
     9
 * This program is distributed in the hope that it will be useful,
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    12
 * GNU General Public License for more details.
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    13
 *
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    14
 * You should have received a copy of the GNU General Public License
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    16
 */
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    17
#pragma once
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    18
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    19
#include <iostream>
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    20
#include <string>
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    21
#include <sstream>
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    22
#include <vector>
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    23
#include <set>
10
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    24
#include <regex>
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    25
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    26
#include <relpipe/writer/typedefs.h>
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    27
#include <relpipe/writer/RelationalWriter.h>
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    28
#include <relpipe/writer/AttributeMetadata.h>
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    29
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    30
namespace relpipe {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    31
namespace in {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    32
namespace recfile {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    33
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    34
using namespace relpipe::writer;
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    35
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    36
class RecfileCommand {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    37
private:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    38
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    39
	enum class RecfileLineType {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    40
		METADATA,
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    41
		DATA,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    42
		SEPARATOR,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
    43
		COMMENT,
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
    44
		END,
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    45
	};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    46
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    47
	class RecfileHandler {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    48
	private:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
    49
		RelationalWriter* writer;
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    50
		string_t currentRelationName;
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    51
		std::vector<AttributeMetadata> currentAttributeMetadata;
10
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    52
		std::vector<AttributeMetadata> currentTypeHints;
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    53
		std::vector<string_t> currentRecord;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    54
		std::vector<std::vector<string_t>> currentRecords;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    55
		size_t prefetchCount = 1;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    56
		bool headerWritten = false;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    57
10
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    58
		TypeId findType(string_t attributeName, TypeId defaultType = TypeId::STRING) {
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    59
			for (AttributeMetadata m : currentTypeHints) if (m.attributeName == attributeName) return m.typeId;
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    60
			return defaultType;
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    61
		}
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    62
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    63
		TypeId recType2typeId(string_t recType) {
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    64
			// TODO: support more types
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    65
			// boolean is currently unsupported, because NULLs are not implemented yet and recfile booleans might be null
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    66
			if (recType == L"int") return TypeId::INTEGER;
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    67
			else return TypeId::STRING;
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    68
		}
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    69
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    70
		void writeHeader() {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    71
			if (headerWritten) return;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    72
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    73
			if (currentRelationName.size() == 0) currentRelationName = L"recfile";
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    74
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    75
			std::set<string_t> uniqueAttributeNames;
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    76
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    77
			// TODO: add also attribute names from type hints from recfile metadata
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    78
			for (int i = 0; i < currentRecords.size(); i++) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    79
				std::vector<string_t> record = currentRecords[i];
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    80
				for (int j = 0; j < record.size(); j += 2) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    81
					if (uniqueAttributeNames.insert(record[j]).second) {
10
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
    82
						currentAttributeMetadata.push_back({record[j], findType(record[j])});
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    83
					}
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    84
				}
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    85
			}
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    86
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    87
			writer->startRelation(currentRelationName, currentAttributeMetadata, true);
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    88
			headerWritten = true;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    89
		}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    90
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    91
		string_t findValue(std::vector<string_t>& record, TypeId type, string_t& name) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    92
			for (int j = 0; j < record.size(); j += 2) if (record[j] == name) return record[j + 1];
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    93
			return L""; // TODO: proper empty/null value for given type
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    94
		}
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    95
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
    96
		void writeRecords() {
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    97
			for (std::vector<string_t> record : currentRecords) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    98
				for (AttributeMetadata a : currentAttributeMetadata) {
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
    99
					writer->writeAttribute(findValue(record, a.typeId, a.attributeName));
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   100
				}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   101
			}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   102
			currentRecords.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   103
		}
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   104
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   105
		void metadata(const string_t& name, const string_t& value) {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   106
			if (name == L"rec") {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   107
				currentRelationName = value;
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
   108
				currentAttributeMetadata.clear();
10
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
   109
				currentTypeHints.clear();
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   110
				currentRecord.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   111
				currentRecords.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   112
				headerWritten = false;
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   113
			} else if (name == L"type") {
10
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
   114
				std::wsmatch match;
c59363fd805b support signed integers, negative numbers; binary format change: encode numbers as SLEB128
František Kučera <franta-hg@frantovo.cz>
parents: 8
diff changeset
   115
				if (regex_search(value, match, std::wregex(L"\\s?(.*)\\s+(.*)\\s?"))) currentTypeHints.push_back({match[1], recType2typeId(match[2])});
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   116
			} else {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   117
				// ignore – other recfile metadata like keys or auto-increments
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   118
			}
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   119
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   120
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   121
		void data(const string_t& name, const string_t& value) {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   122
			currentRecord.push_back(name);
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   123
			currentRecord.push_back(value);
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   124
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   125
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   126
		void comment(const string_t& value) {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   127
			// ignore comments
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   128
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   129
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   130
		void separator() {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   131
			if (currentRecord.size()) {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   132
				currentRecords.push_back(currentRecord);
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   133
				currentRecord.clear();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   134
			}
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   135
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   136
			if (prefetchCount > 0 && currentRecords.size() >= prefetchCount) {
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   137
				writeHeader();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   138
				writeRecords();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   139
			}
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   140
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   141
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   142
		void end() {
3
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   143
			if (currentRecord.size()) currentRecords.push_back(currentRecord);
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   144
			writeHeader();
891fe13d7397 handler logic structure
František Kučera <franta-hg@frantovo.cz>
parents: 2
diff changeset
   145
			writeRecords();
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   146
		}
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   147
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   148
	public:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   149
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   150
		RecfileHandler(RelationalWriter* writer) : writer(writer) {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   151
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   152
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   153
		virtual ~RecfileHandler() {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   154
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   155
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   156
		void logicalLine(RecfileLineType type, const string_t& name = L"", const string_t& value = L"") {
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   157
			switch (type) {
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   158
				case RecfileLineType::METADATA: return metadata(name, value);
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   159
				case RecfileLineType::DATA: return data(name, value);
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   160
				case RecfileLineType::COMMENT: return comment(value);
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   161
				case RecfileLineType::SEPARATOR: return separator();
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   162
				case RecfileLineType::END: return end();
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   163
			}
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   164
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   165
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   166
	};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   167
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   168
	enum class ParserState {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   169
		START,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   170
		NAME,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   171
		VALUE,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   172
		VALUE_CONTINUATION,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   173
		COMMENT,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   174
		END,
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   175
	};
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   176
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   177
	class RecfileParser {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   178
	private:
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   179
		wstring_convert<codecvt_utf8<wchar_t>> convertor; // TODO: support also other encodings or are recfiles always in UTF-8?
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   180
		RecfileHandler& handler;
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   181
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   182
		void emitLogicalLine(RecfileLineType& type, std::stringstream& name, std::stringstream& value) {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   183
			handler.logicalLine(type, convertor.from_bytes(name.str()), convertor.from_bytes(value.str()));
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   184
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   185
			name.str("");
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   186
			name.clear();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   187
			value.str("");
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   188
			value.clear();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   189
			type = RecfileLineType::DATA;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   190
		}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   191
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   192
	public:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   193
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   194
		RecfileParser(RecfileHandler& handler) : handler(handler) {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   195
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   196
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   197
		virtual ~RecfileParser() {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   198
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   199
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   200
		void parse(std::istream& input) {
1
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   201
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   202
			ParserState state = ParserState::START;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   203
			RecfileLineType type = RecfileLineType::DATA;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   204
			std::stringstream name;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   205
			std::stringstream value;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   206
			char ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   207
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   208
			while (state != ParserState::END && input.good()) {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   209
				ch = input.get();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   210
				if (input.eof()) continue;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   211
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   212
				switch (state) {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   213
					case ParserState::START:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   214
						if (ch == '%') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   215
							type = RecfileLineType::METADATA;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   216
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   217
						} else if (ch == ' ') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   218
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   219
						} else if (ch == '\n') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   220
							handler.logicalLine(RecfileLineType::SEPARATOR);
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   221
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   222
						} else if (ch == '#') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   223
							type = RecfileLineType::COMMENT;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   224
							state = ParserState::COMMENT;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   225
							if (input.get() != ' ') input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   226
							break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   227
						} // else → name
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   228
					case ParserState::NAME:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   229
						if (ch == ':') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   230
							state = ParserState::VALUE;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   231
							if (input.get() != ' ') input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   232
						} else {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   233
							name << ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   234
						}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   235
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   236
					case ParserState::VALUE:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   237
						if (ch == '\n') state = ParserState::VALUE_CONTINUATION;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   238
						else value << ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   239
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   240
					case ParserState::VALUE_CONTINUATION:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   241
						if (ch == '+') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   242
							state = ParserState::VALUE;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   243
							if (value.tellp()) value << '\n';
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   244
							if (input.get() != ' ') input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   245
						} else {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   246
							input.unget();
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   247
							state = ParserState::START;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   248
							emitLogicalLine(type, name, value);
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   249
						}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   250
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   251
					case ParserState::COMMENT:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   252
						if (ch == '\n') {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   253
							state = ParserState::START;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   254
							emitLogicalLine(type, name, value);
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   255
						} else {
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   256
							value << ch;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   257
						}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   258
						break;
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   259
					default:
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   260
						throw RelpipeWriterException(L"Unknown ParserState: " + std::to_wstring((int) state) + L" in RecfileParser."); // TODO: better exception
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   261
				}
8dfb42e5c088 parse recfile (logical lines)
František Kučera <franta-hg@frantovo.cz>
parents: 0
diff changeset
   262
			}
4
b5239b4b345b first working version
František Kučera <franta-hg@frantovo.cz>
parents: 3
diff changeset
   263
			if (name.tellp()) emitLogicalLine(type, name, value);
2
2390e2949a36 parse recfile: separate methods
František Kučera <franta-hg@frantovo.cz>
parents: 1
diff changeset
   264
			handler.logicalLine(RecfileLineType::END);
0
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   265
		}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   266
	};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   267
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   268
public:
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   269
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   270
	void process(std::istream& input, std::ostream& output) {
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   271
		unique_ptr<RelationalWriter> writer(Factory::create(output));
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   272
		RecfileHandler handler(writer.get());
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   273
		RecfileParser parser(handler);
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   274
		parser.parse(input);
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   275
	}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   276
};
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   277
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   278
}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   279
}
515a697cc9cd project skeleton
František Kučera <franta-hg@frantovo.cz>
parents:
diff changeset
   280
}