27 #include <relpipe/writer/Factory.h> |
27 #include <relpipe/writer/Factory.h> |
28 #include <relpipe/writer/TypeId.h> |
28 #include <relpipe/writer/TypeId.h> |
29 |
29 |
30 #include <relpipe/cli/CLI.h> |
30 #include <relpipe/cli/CLI.h> |
31 |
31 |
|
32 #include "CSVCommand.h" |
|
33 |
32 using namespace std; |
34 using namespace std; |
33 using namespace relpipe::cli; |
35 using namespace relpipe::cli; |
34 using namespace relpipe::writer; |
36 using namespace relpipe::writer; |
35 |
37 using namespace relpipe::in::csv; |
36 bool readValue(istream& input, stringstream& currentValue, bool& lastInRecord) { |
|
37 lastInRecord = false; |
|
38 char ch; |
|
39 input.get(ch); |
|
40 if (ch == '"') { |
|
41 while (input.get(ch)) { |
|
42 if (ch == '"') { |
|
43 input.get(ch); |
|
44 if (ch == '"') { |
|
45 currentValue << ch; |
|
46 } else { |
|
47 if (ch == '\r') input.get(ch); |
|
48 if (ch == '\n') lastInRecord = true; |
|
49 else if (ch != ',') throw RelpipeWriterException(L"Unexpected character (should be „\\n“ or „,“)"); |
|
50 return true; |
|
51 } |
|
52 } else { |
|
53 currentValue << ch; |
|
54 } |
|
55 } |
|
56 } else if (ch == ',') { |
|
57 return true; |
|
58 } else if (ch == '\n') { |
|
59 lastInRecord = true; |
|
60 return true; |
|
61 } else if (ch == '\r') { |
|
62 input.get(ch); |
|
63 if (ch == '\n') { |
|
64 lastInRecord = true; |
|
65 return true; |
|
66 } else { |
|
67 throw RelpipeWriterException(L"Crazy carriage stuck during journey"); |
|
68 } |
|
69 } else { |
|
70 for (currentValue << ch; input.get(ch);) { |
|
71 switch (ch) { |
|
72 case ',': return true; |
|
73 case '\r': break; |
|
74 case '\n': |
|
75 lastInRecord = true; |
|
76 return true; |
|
77 default: currentValue << ch; |
|
78 } |
|
79 } |
|
80 } |
|
81 return false; |
|
82 } |
|
83 |
|
84 void processDataStream(ostream &output, istream& input, const vector<string_t>& args) { |
|
85 wstring_convert < codecvt_utf8<wchar_t>> convertor; // UTF-8 is required for CSV |
|
86 std::shared_ptr<RelationalWriter> writer(Factory::create(output)); |
|
87 vector<AttributeMetadata> metadata; |
|
88 bool headerDone = false; |
|
89 bool lastInRecord = false; |
|
90 stringstream currentValue; |
|
91 |
|
92 |
|
93 while (readValue(input, currentValue, lastInRecord) && input.good()) { |
|
94 if (headerDone) { |
|
95 writer->writeAttribute(convertor.from_bytes(currentValue.str())); |
|
96 } else { |
|
97 AttributeMetadata am; |
|
98 am.attributeName = convertor.from_bytes(currentValue.str()); |
|
99 am.typeId = TypeId::STRING; |
|
100 metadata.push_back(am); |
|
101 if (lastInRecord) { |
|
102 |
|
103 /* |
|
104 * Usage (simple syntax): |
|
105 * relpipe-in-csv → default relation name, attribute names on the first line, all types are string |
|
106 * relpipe-in-csv my_relation → custom relation name |
|
107 * relpipe-in-csv my_relation a b c → custom relation name, custom attribute names (a,b,c), first line contains data |
|
108 * relpipe-in-csv my_relation a integer b string c boolean → custom relation name, custom attribute names (a,b,c), custom types (integer,string,boolean), first line contains data |
|
109 */ |
|
110 |
|
111 vector<string_t> firstLine; |
|
112 if (args.size() == (1 + metadata.size())) { |
|
113 for (int i = 0; i < metadata.size(); i++) { |
|
114 firstLine.push_back(metadata[i].attributeName); |
|
115 metadata[i].attributeName = args[1 + i]; |
|
116 } |
|
117 } else if (args.size() == (1 + 2 * metadata.size())) { |
|
118 for (int i = 0; i < metadata.size(); i++) { |
|
119 firstLine.push_back(metadata[i].attributeName); |
|
120 metadata[i].attributeName = args[1 + i * 2]; |
|
121 metadata[i].typeId = writer->toTypeId(args[1 + i * 2 + 1]); |
|
122 } |
|
123 } |
|
124 |
|
125 headerDone = true; |
|
126 writer->startRelation(args.size() > 0 ? args[0] : L"csv", metadata, true); |
|
127 if (firstLine.size()) { |
|
128 for (string_t value : firstLine) writer->writeAttribute(value); |
|
129 } |
|
130 } |
|
131 } |
|
132 |
|
133 currentValue.str(""); |
|
134 currentValue.clear(); |
|
135 } |
|
136 } |
|
137 |
38 |
138 int main(int argc, char** argv) { |
39 int main(int argc, char** argv) { |
139 setlocale(LC_ALL, ""); |
40 setlocale(LC_ALL, ""); |
140 CLI::untieStdIO(); |
41 CLI::untieStdIO(); |
141 CLI cli(argc, argv); |
42 CLI cli(argc, argv); |
142 |
43 |
143 int resultCode = CLI::EXIT_CODE_UNEXPECTED_ERROR; |
44 int resultCode = CLI::EXIT_CODE_UNEXPECTED_ERROR; |
144 |
45 |
145 try { |
46 try { |
146 processDataStream(cout, cin, cli.arguments()); |
47 CSVCommand command; |
|
48 command.processDataStream(cout, cin, cli.arguments()); |
147 resultCode = CLI::EXIT_CODE_SUCCESS; |
49 resultCode = CLI::EXIT_CODE_SUCCESS; |
148 } catch (RelpipeWriterException e) { |
50 } catch (RelpipeWriterException e) { |
149 fwprintf(stderr, L"Caught Writer exception: %ls\n", e.getMessge().c_str()); |
51 fwprintf(stderr, L"Caught Writer exception: %ls\n", e.getMessge().c_str()); |
150 fwprintf(stderr, L"Debug: Input stream: eof=%ls, lastRead=%d\n", (cin.eof() ? L"true" : L"false"), cin.gcount()); |
52 fwprintf(stderr, L"Debug: Input stream: eof=%ls, lastRead=%d\n", (cin.eof() ? L"true" : L"false"), cin.gcount()); |
151 resultCode = CLI::EXIT_CODE_DATA_ERROR; |
53 resultCode = CLI::EXIT_CODE_DATA_ERROR; |