src/ODSCommand.h
branchv_0
changeset 2 d4e0472e8e5d
parent 1 e82aaf24b0fe
equal deleted inserted replaced
1:e82aaf24b0fe 2:d4e0472e8e5d
    33 namespace in {
    33 namespace in {
    34 namespace ods {
    34 namespace ods {
    35 
    35 
    36 using namespace relpipe::writer;
    36 using namespace relpipe::writer;
    37 
    37 
       
    38 /**
       
    39  * Reads OpenDocument / LibreOffice Spreadsheet.
       
    40  * 
       
    41  * Known limitations:
       
    42  *  - currently reads only „flat“ uncompressed files (.fods, not .ods)
       
    43  *  - while streaming is possible, this implementation reads whole XML document
       
    44  *    in memory and then processes it (DOM)
       
    45  *  - only string type is supported now → use relpipe-tr-infertypes
       
    46  */
    38 class ODSCommand {
    47 class ODSCommand {
    39 private:
    48 private:
    40 	std::wstring_convert<codecvt_utf8<wchar_t>> convertor;
    49 	std::wstring_convert<codecvt_utf8<wchar_t>> convertor;
    41 
    50 
    42 	xmlpp::Node::PrefixNsMap ns;
    51 	xmlpp::Node::PrefixNsMap ns;
    44 
    53 
    45 	string_t xpath(xmlpp::Node* node, std::string xpath) {
    54 	string_t xpath(xmlpp::Node* node, std::string xpath) {
    46 		return convertor.from_bytes(node->eval_to_string(xpath, ns));
    55 		return convertor.from_bytes(node->eval_to_string(xpath, ns));
    47 	}
    56 	}
    48 
    57 
    49 	void processCell(xmlpp::Node* c, AttributeMetadata& am) {
    58 	int xpathInt(xmlpp::Node* node, std::string xpath, int defaultValue = 0) {
    50 		string_t value = xpath(c, "@o:value");
    59 		double result = node->eval_to_number(xpath, ns);
    51 		if (value.size()) writer->writeAttribute(value);
    60 		return result != result ? defaultValue : result;
    52 		else writer->writeAttribute(xpath(c, "tx:p"));
       
    53 		// TODO: be aware of the current data types
       
    54 	}
    61 	}
    55 
    62 
    56 	void processRow(xmlpp::Node* r, std::vector<AttributeMetadata>& am) {
    63 	void processRow(xmlpp::Node* r, std::vector<AttributeMetadata>& am) {
    57 		for (int i = 0; i < am.size(); i++) {
    64 		auto cells = r->find("t:table-cell", ns);
    58 			auto xpe = std::string("t:table-cell[")
    65 		for (int i = 0; i < cells.size(); i++) {
    59 					+ std::to_string(i + 1)
    66 			xmlpp::Node* c = cells[i];
    60 					+ "]";
    67 			string_t value = xpath(c, "@o:value");
    61 			auto cells = r->find(xpe, ns);
    68 			if (value.size() == 0) value = xpath(c, "tx:p");
    62 			if (cells.size() == 1) {
    69 
    63 				processCell(cells[0], am[i]);
    70 			// value = am[i].attributeName + L"=" + value;
    64 			} else {
    71 
    65 				writer->writeAttribute(L"");
    72 			double repeated = xpathInt(c, "@t:number-columns-repeated", 1);
    66 				// TODO: support also other data types
    73 			double spanned = xpathInt(c, "@t:number-columns-spanned", 1);
    67 			}
    74 
       
    75 			for (int i = 0; i < repeated; i++) writer->writeAttribute(value);
       
    76 			for (int i = 1; i < spanned; i++) writer->writeAttribute(L"");
       
    77 
       
    78 			// TODO: support also other data types
    68 		}
    79 		}
    69 
       
    70 		// FIXME: support sparse data / missing values:
       
    71 		// <table:table-row  table:number-rows-repeated="2">
       
    72 		// <table:table-cell table:number-columns-repeated="3"/>
       
    73 
       
    74 	}
    80 	}
    75 
    81 
    76 	void processTable(xmlpp::Node* t) {
    82 	void processTable(xmlpp::Node* t) {
    77 		auto relation = xpath(t, "@t:name");
    83 		auto relation = xpath(t, "@t:name");
    78 		std::vector<AttributeMetadata> metadata;
    84 		std::vector<AttributeMetadata> metadata;
    79 
    85 
    80 		for (xmlpp::Node* c : t->find("t:table-row[1]/t:table-cell", ns)) {
    86 		for (xmlpp::Node* c : t->find("t:table-row[1]/t:table-cell", ns)) {
    81 			auto name = xpath(c, "tx:p");
    87 			auto name = xpath(c, "tx:p");
    82 			if (name.size()) {
    88 			double repeated = xpathInt(c, "@t:number-columns-repeated", 1);
       
    89 			double spanned = xpathInt(c, "@t:number-columns-spanned", 1);
       
    90 			for (int i = 0, limit = repeated * spanned; i < limit; i++) {
    83 				metadata.push_back({name, TypeId::STRING});
    91 				metadata.push_back({name, TypeId::STRING});
    84 				// TODO: detect and support other data types
       
    85 			}
    92 			}
       
    93 			// TODO: detect and support other data types
    86 		}
    94 		}
    87 
    95 
    88 		writer->startRelation(relation, metadata, true);
    96 		if (metadata.size()) {
       
    97 			writer->startRelation(relation, metadata, true);
    89 
    98 
    90 		int i = 0;
    99 			int i = 0;
    91 		for (xmlpp::Node* r : t->find("t:table-row", ns)) {
   100 			for (xmlpp::Node* r : t->find("t:table-row", ns)) {
    92 			i++;
   101 				i++;
    93 			if (i == 1) continue; // skip header row
   102 				if (i == 1) continue; // skip header row
    94 			processRow(r, metadata);
   103 				processRow(r, metadata);
       
   104 			}
       
   105 		} else {
       
   106 			// no values on the first row
       
   107 			// probably empty table
    95 		}
   108 		}
    96 	}
   109 	}
    97 
   110 
    98 public:
   111 public:
    99 
   112 
   116 			}
   129 			}
   117 		} else {
   130 		} else {
   118 			throw RelpipeWriterException(L"Invalid XML structure. "
   131 			throw RelpipeWriterException(L"Invalid XML structure. "
   119 					"Expecting OpenDocument spreadsheet.");
   132 					"Expecting OpenDocument spreadsheet.");
   120 		}
   133 		}
   121 
       
   122 
       
   123 	}
   134 	}
   124 };
   135 };
   125 
   136 
   126 }
   137 }
   127 }
   138 }