|
1 /** |
|
2 * Relational pipes |
|
3 * Copyright © 2018 František Kučera (Frantovo.cz, GlobalCode.info) |
|
4 * |
|
5 * This program is free software: you can redistribute it and/or modify |
|
6 * it under the terms of the GNU General Public License as published by |
|
7 * the Free Software Foundation, version 3 of the License. |
|
8 * |
|
9 * This program is distributed in the hope that it will be useful, |
|
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 * GNU General Public License for more details. |
|
13 * |
|
14 * You should have received a copy of the GNU General Public License |
|
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 */ |
|
17 #include <cstdlib> |
|
18 #include <vector> |
|
19 #include <memory> |
|
20 #include <regex> |
|
21 #include <algorithm> |
|
22 #include <unistd.h> |
|
23 |
|
24 #include <relpipe/writer/RelationalWriter.h> |
|
25 #include <relpipe/writer/RelpipeWriterException.h> |
|
26 #include <relpipe/writer/AttributeMetadata.h> |
|
27 #include <relpipe/writer/Factory.h> |
|
28 #include <relpipe/writer/TypeId.h> |
|
29 |
|
30 #include <relpipe/cli/CLI.h> |
|
31 |
|
32 #include "CSVCommand.h" |
|
33 |
|
34 using namespace std; |
|
35 using namespace relpipe::cli; |
|
36 using namespace relpipe::writer; |
|
37 |
|
38 namespace relpipe { |
|
39 namespace in { |
|
40 namespace csv { |
|
41 |
|
42 bool CSVCommand::readValue(std::istream& input, std::stringstream& currentValue, bool& lastInRecord) { |
|
43 lastInRecord = false; |
|
44 char ch; |
|
45 input.get(ch); |
|
46 if (ch == '"') { |
|
47 while (input.get(ch)) { |
|
48 if (ch == '"') { |
|
49 input.get(ch); |
|
50 if (ch == '"') { |
|
51 currentValue << ch; |
|
52 } else { |
|
53 if (ch == '\r') input.get(ch); |
|
54 if (ch == '\n') lastInRecord = true; |
|
55 else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)"); |
|
56 return true; |
|
57 } |
|
58 } else { |
|
59 currentValue << ch; |
|
60 } |
|
61 } |
|
62 } else if (ch == ',') { |
|
63 return true; |
|
64 } else if (ch == '\n') { |
|
65 lastInRecord = true; |
|
66 return true; |
|
67 } else if (ch == '\r') { |
|
68 input.get(ch); |
|
69 if (ch == '\n') { |
|
70 lastInRecord = true; |
|
71 return true; |
|
72 } else { |
|
73 throw RelpipeWriterException(L"Crazy carriage stuck during journey"); |
|
74 } |
|
75 } else { |
|
76 for (currentValue << ch; input.get(ch);) { |
|
77 switch (ch) { |
|
78 case ',': return true; |
|
79 case '\r': break; |
|
80 case '\n': |
|
81 lastInRecord = true; |
|
82 return true; |
|
83 default: currentValue << ch; |
|
84 } |
|
85 } |
|
86 } |
|
87 return false; |
|
88 } |
|
89 |
|
90 void CSVCommand::processDataStream(std::ostream& output, std::istream& input, const vector<relpipe::writer::string_t>& args) { |
|
91 wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV |
|
92 std::shared_ptr<RelationalWriter> writer(Factory::create(output)); |
|
93 vector<AttributeMetadata> metadata; |
|
94 bool headerDone = false; |
|
95 bool lastInRecord = false; |
|
96 stringstream currentValue; |
|
97 |
|
98 |
|
99 while (readValue(input, currentValue, lastInRecord) && input.good()) { |
|
100 if (headerDone) { |
|
101 writer->writeAttribute(convertor.from_bytes(currentValue.str())); |
|
102 } else { |
|
103 AttributeMetadata am; |
|
104 am.attributeName = convertor.from_bytes(currentValue.str()); |
|
105 am.typeId = TypeId::STRING; |
|
106 metadata.push_back(am); |
|
107 if (lastInRecord) { |
|
108 |
|
109 /* |
|
110 * Usage (simple syntax): |
|
111 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string |
|
112 * relpipe-in-csv my_relation → custom relation name |
|
113 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data |
|
114 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data |
|
115 */ |
|
116 |
|
117 vector<string_t> firstLine; |
|
118 if (args.size() == (1 + metadata.size())) { |
|
119 for (int i = 0; i < metadata.size(); i++) { |
|
120 firstLine.push_back(metadata[i].attributeName); |
|
121 metadata[i].attributeName = args[1 + i]; |
|
122 } |
|
123 } else if (args.size() == (1 + 2 * metadata.size())) { |
|
124 for (int i = 0; i < metadata.size(); i++) { |
|
125 firstLine.push_back(metadata[i].attributeName); |
|
126 metadata[i].attributeName = args[1 + i * 2]; |
|
127 metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]); |
|
128 } |
|
129 } |
|
130 |
|
131 headerDone = true; |
|
132 writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true); |
|
133 if (firstLine.size()) { |
|
134 for (string_t value : firstLine) writer->writeAttribute(value); |
|
135 } |
|
136 } |
|
137 } |
|
138 |
|
139 currentValue.str(""); |
|
140 currentValue.clear(); |
|
141 } |
|
142 } |
|
143 |
|
144 } |
|
145 } |
|
146 } |