# HG changeset patch # User František Kučera # Date 1684104916 -7200 # Node ID d4e0472e8e5d52c58d3e175910abc4804b930ea8 # Parent e82aaf24b0fe753f409ea36b70cf4aff958a09dd support @table:number-columns-repeated and @table:number-columns-spanned diff -r e82aaf24b0fe -r d4e0472e8e5d src/ODSCommand.h --- a/src/ODSCommand.h Sun May 14 02:13:46 2023 +0200 +++ b/src/ODSCommand.h Mon May 15 00:55:16 2023 +0200 @@ -35,6 +35,15 @@ using namespace relpipe::writer; +/** + * Reads OpenDocument / LibreOffice Spreadsheet. + * + * Known limitations: + * - currently reads only „flat“ uncompressed files (.fods, not .ods) + * - while streaming is possible, this implementation reads whole XML document + * in memory and then processes it (DOM) + * - only string type is supported now → use relpipe-tr-infertypes + */ class ODSCommand { private: std::wstring_convert> convertor; @@ -46,31 +55,28 @@ return convertor.from_bytes(node->eval_to_string(xpath, ns)); } - void processCell(xmlpp::Node* c, AttributeMetadata& am) { - string_t value = xpath(c, "@o:value"); - if (value.size()) writer->writeAttribute(value); - else writer->writeAttribute(xpath(c, "tx:p")); - // TODO: be aware of the current data types + int xpathInt(xmlpp::Node* node, std::string xpath, int defaultValue = 0) { + double result = node->eval_to_number(xpath, ns); + return result != result ? defaultValue : result; } void processRow(xmlpp::Node* r, std::vector& am) { - for (int i = 0; i < am.size(); i++) { - auto xpe = std::string("t:table-cell[") - + std::to_string(i + 1) - + "]"; - auto cells = r->find(xpe, ns); - if (cells.size() == 1) { - processCell(cells[0], am[i]); - } else { - writer->writeAttribute(L""); - // TODO: support also other data types - } + auto cells = r->find("t:table-cell", ns); + for (int i = 0; i < cells.size(); i++) { + xmlpp::Node* c = cells[i]; + string_t value = xpath(c, "@o:value"); + if (value.size() == 0) value = xpath(c, "tx:p"); + + // value = am[i].attributeName + L"=" + value; + + double repeated = xpathInt(c, "@t:number-columns-repeated", 1); + double spanned = xpathInt(c, "@t:number-columns-spanned", 1); + + for (int i = 0; i < repeated; i++) writer->writeAttribute(value); + for (int i = 1; i < spanned; i++) writer->writeAttribute(L""); + + // TODO: support also other data types } - - // FIXME: support sparse data / missing values: - // - // - } void processTable(xmlpp::Node* t) { @@ -79,19 +85,26 @@ for (xmlpp::Node* c : t->find("t:table-row[1]/t:table-cell", ns)) { auto name = xpath(c, "tx:p"); - if (name.size()) { + double repeated = xpathInt(c, "@t:number-columns-repeated", 1); + double spanned = xpathInt(c, "@t:number-columns-spanned", 1); + for (int i = 0, limit = repeated * spanned; i < limit; i++) { metadata.push_back({name, TypeId::STRING}); - // TODO: detect and support other data types } + // TODO: detect and support other data types } - writer->startRelation(relation, metadata, true); + if (metadata.size()) { + writer->startRelation(relation, metadata, true); - int i = 0; - for (xmlpp::Node* r : t->find("t:table-row", ns)) { - i++; - if (i == 1) continue; // skip header row - processRow(r, metadata); + int i = 0; + for (xmlpp::Node* r : t->find("t:table-row", ns)) { + i++; + if (i == 1) continue; // skip header row + processRow(r, metadata); + } + } else { + // no values on the first row + // probably empty table } } @@ -118,8 +131,6 @@ throw RelpipeWriterException(L"Invalid XML structure. " "Expecting OpenDocument spreadsheet."); } - - } };