binary format change: encode numbers as ULEB128 instead of original uint8/16/32/64 format v_0
authorFrantišek Kučera <franta-hg@frantovo.cz>
Tue, 10 Dec 2019 19:37:43 +0100
branchv_0
changeset 44 3f7b5f3f3f84
parent 43 5dd0bfb430a1
child 45 27d5335cd924
binary format change: encode numbers as ULEB128 instead of original uint8/16/32/64 format
src/types/IntegerDataTypeWriter.h
--- a/src/types/IntegerDataTypeWriter.h	Tue Dec 03 16:30:02 2019 +0100
+++ b/src/types/IntegerDataTypeWriter.h	Tue Dec 10 19:37:43 2019 +0100
@@ -36,85 +36,23 @@
 using namespace relpipe::writer;
 
 /**
- * The prototype does not have various integer and other numeric data types,
- * it just works with one type of integer.
- * But this integer has variable length -- smaller values occupy only one byte, bigger ones, more bytes 1,2,4,8 + first byte (contains length signalization).
- * In the real implementation of relational pipes, there will be DataTypes for particular numeric types.
- * 
- * TODO: support also big endian architectures.
- * TODO: throw exception if a value was stored in bigger type than needed (while reading – there should be only one supported way how to encode a single value)
- * 
- * Example of encoded values:
- * -------------------------------------------------------------------------------------------------
- * $ for n in 0 1 10 250 251 252 65535 65536 4294967295 4294967296 18446744073709551615; do printf '%20s = ' $n; dist/Debug/GNU-Linux/rp-prototype write integer $n | hd | head -n 1; done
- *                    0 = 00000000  00                                                |.|
- *                    1 = 00000000  01                                                |.|
- *                   10 = 00000000  0a                                                |.|
- *                  250 = 00000000  fa                                                |.|
- *                  251 = 00000000  fb fb                                             |..|
- *                  252 = 00000000  fb fc                                             |..|
- *                65535 = 00000000  fc ff ff                                          |...|
- *                65536 = 00000000  fd 00 00 01 00                                    |.....|
- *           4294967295 = 00000000  fd ff ff ff ff                                    |.....|
- *           4294967296 = 00000000  fe 00 00 00 00 01 00 00  00                       |.........|
- * 18446744073709551615 = 00000000  fe ff ff ff ff ff ff ff  ff                       |.........|
- * -------------------------------------------------------------------------------------------------
- * 
- * Example of decoded values:
- * -------------------------------------------------------------------------------------------------
- * $ for n in 0 1 10 250 251 252 65535 65536 4294967295 4294967296 18446744073709551615; do dist/Debug/GNU-Linux/rp-prototype write integer $n | dist/Debug/GNU-Linux/rp-prototype read integer; done;
- * 0
- * 1
- * 10
- * 250
- * 251
- * 252
- * 65535
- * 65536
- * 4294967295
- * 4294967296
- * 18446744073709551615
- * -------------------------------------------------------------------------------------------------
- * 
- * Note: similar format as original idea: https://en.wikipedia.org/wiki/X.690#Length_octets
- * 
+ * Unsigned variable-length integer.
+ * ULEB128
  */
 class IntegerDataTypeWriter : public DataTypeWriter<integer_t> {
-private:
-	static const uint8_t INTEGER_TYPE_UINT8 = 251;
-	static const uint8_t INTEGER_TYPE_UINT16 = 252;
-	static const uint8_t INTEGER_TYPE_UINT32 = 253;
-	static const uint8_t INTEGER_TYPE_UINT64 = 254;
-	static const uint8_t INTEGER_TYPE_RESERVED = 255;
-
-	template<typename T> void write(std::ostream &output, const integer_t &value) {
-		assert(sizeof (T) <= sizeof (value));
-		output.write(reinterpret_cast<const char *> (&value), sizeof (T));
-	}
-
-	template<typename T> void write(std::ostream &output, const uint8_t type, const integer_t &value) {
-		write<uint8_t>(output, type);
-		write<T>(output, value);
-	}
-
-	template<typename T> bool fits(const integer_t &value) {
-		return value <= numeric_limits<T>::max();
-	}
-
 public:
 
 	IntegerDataTypeWriter() : DataTypeWriter<integer_t>(TypeId::INTEGER, DATA_TYPE_CODE_INTEGER) {
 	}
 
 	void writeValue(std::ostream &output, const integer_t &value) override {
-		// output << value; // by zapsalo číslo jako ASII text
-
-		if (value < INTEGER_TYPE_UINT8) write<uint8_t>(output, value);
-		else if (fits<uint8_t>(value)) write<uint8_t>(output, INTEGER_TYPE_UINT8, value);
-		else if (fits<uint16_t>(value)) write<uint16_t>(output, INTEGER_TYPE_UINT16, value);
-		else if (fits<uint32_t>(value)) write<uint32_t>(output, INTEGER_TYPE_UINT32, value);
-		else if (fits<uint64_t>(value)) write<uint64_t>(output, INTEGER_TYPE_UINT64, value);
-		else throw RelpipeWriterException(L"Error while writing integer type: value too long");
+		integer_t v = value;
+		do {
+			octet_t octet = v & 0x7F;
+			v >>= 7;
+			if (v) octet |= 0x80; // more bytes follow
+			output << char(octet);
+		} while (v);
 	}
 
 	integer_t toValue(const string_t &stringValue) override {
@@ -126,4 +64,4 @@
 
 }
 }
-}
\ No newline at end of file
+}