support @table:number-columns-repeated and @table:number-columns-spanned v_0 tip
authorFrantišek Kučera <franta-hg@frantovo.cz>
Mon, 15 May 2023 00:55:16 +0200
branchv_0
changeset 2 d4e0472e8e5d
parent 1 e82aaf24b0fe
support @table:number-columns-repeated and @table:number-columns-spanned
src/ODSCommand.h
--- a/src/ODSCommand.h	Sun May 14 02:13:46 2023 +0200
+++ b/src/ODSCommand.h	Mon May 15 00:55:16 2023 +0200
@@ -35,6 +35,15 @@
 
 using namespace relpipe::writer;
 
+/**
+ * Reads OpenDocument / LibreOffice Spreadsheet.
+ * 
+ * Known limitations:
+ *  - currently reads only „flat“ uncompressed files (.fods, not .ods)
+ *  - while streaming is possible, this implementation reads whole XML document
+ *    in memory and then processes it (DOM)
+ *  - only string type is supported now → use relpipe-tr-infertypes
+ */
 class ODSCommand {
 private:
 	std::wstring_convert<codecvt_utf8<wchar_t>> convertor;
@@ -46,31 +55,28 @@
 		return convertor.from_bytes(node->eval_to_string(xpath, ns));
 	}
 
-	void processCell(xmlpp::Node* c, AttributeMetadata& am) {
-		string_t value = xpath(c, "@o:value");
-		if (value.size()) writer->writeAttribute(value);
-		else writer->writeAttribute(xpath(c, "tx:p"));
-		// TODO: be aware of the current data types
+	int xpathInt(xmlpp::Node* node, std::string xpath, int defaultValue = 0) {
+		double result = node->eval_to_number(xpath, ns);
+		return result != result ? defaultValue : result;
 	}
 
 	void processRow(xmlpp::Node* r, std::vector<AttributeMetadata>& am) {
-		for (int i = 0; i < am.size(); i++) {
-			auto xpe = std::string("t:table-cell[")
-					+ std::to_string(i + 1)
-					+ "]";
-			auto cells = r->find(xpe, ns);
-			if (cells.size() == 1) {
-				processCell(cells[0], am[i]);
-			} else {
-				writer->writeAttribute(L"");
-				// TODO: support also other data types
-			}
+		auto cells = r->find("t:table-cell", ns);
+		for (int i = 0; i < cells.size(); i++) {
+			xmlpp::Node* c = cells[i];
+			string_t value = xpath(c, "@o:value");
+			if (value.size() == 0) value = xpath(c, "tx:p");
+
+			// value = am[i].attributeName + L"=" + value;
+
+			double repeated = xpathInt(c, "@t:number-columns-repeated", 1);
+			double spanned = xpathInt(c, "@t:number-columns-spanned", 1);
+
+			for (int i = 0; i < repeated; i++) writer->writeAttribute(value);
+			for (int i = 1; i < spanned; i++) writer->writeAttribute(L"");
+
+			// TODO: support also other data types
 		}
-
-		// FIXME: support sparse data / missing values:
-		// <table:table-row  table:number-rows-repeated="2">
-		// <table:table-cell table:number-columns-repeated="3"/>
-
 	}
 
 	void processTable(xmlpp::Node* t) {
@@ -79,19 +85,26 @@
 
 		for (xmlpp::Node* c : t->find("t:table-row[1]/t:table-cell", ns)) {
 			auto name = xpath(c, "tx:p");
-			if (name.size()) {
+			double repeated = xpathInt(c, "@t:number-columns-repeated", 1);
+			double spanned = xpathInt(c, "@t:number-columns-spanned", 1);
+			for (int i = 0, limit = repeated * spanned; i < limit; i++) {
 				metadata.push_back({name, TypeId::STRING});
-				// TODO: detect and support other data types
 			}
+			// TODO: detect and support other data types
 		}
 
-		writer->startRelation(relation, metadata, true);
+		if (metadata.size()) {
+			writer->startRelation(relation, metadata, true);
 
-		int i = 0;
-		for (xmlpp::Node* r : t->find("t:table-row", ns)) {
-			i++;
-			if (i == 1) continue; // skip header row
-			processRow(r, metadata);
+			int i = 0;
+			for (xmlpp::Node* r : t->find("t:table-row", ns)) {
+				i++;
+				if (i == 1) continue; // skip header row
+				processRow(r, metadata);
+			}
+		} else {
+			// no values on the first row
+			// probably empty table
 		}
 	}
 
@@ -118,8 +131,6 @@
 			throw RelpipeWriterException(L"Invalid XML structure. "
 					"Expecting OpenDocument spreadsheet.");
 		}
-
-
 	}
 };