339 * thrice the length of <code>str</code>. |
342 * thrice the length of <code>str</code>. |
340 * |
343 * |
341 * @param str a string to be written. |
344 * @param str a string to be written. |
342 * @param out destination to write to |
345 * @param out destination to write to |
343 * @return The number of bytes written out. |
346 * @return The number of bytes written out. |
344 * @exception IOException if an I/O error occurs. |
347 * @throws UTFDataFormatException if the modified UTF-8 encoding of |
|
348 * {@code str} would exceed 65535 bytes in length |
|
349 * @throws IOException if some other I/O error occurs. |
345 */ |
350 */ |
346 static int writeUTF(String str, DataOutput out) throws IOException { |
351 static int writeUTF(String str, DataOutput out) throws IOException { |
347 int strlen = str.length(); |
352 final int strlen = str.length(); |
348 int utflen = 0; |
353 int utflen = strlen; // optimized for ASCII |
349 int c, count = 0; |
354 |
350 |
|
351 /* use charAt instead of copying String to char array */ |
|
352 for (int i = 0; i < strlen; i++) { |
355 for (int i = 0; i < strlen; i++) { |
353 c = str.charAt(i); |
356 int c = str.charAt(i); |
354 if ((c >= 0x0001) && (c <= 0x007F)) { |
357 if (c >= 0x80 || c == 0) |
355 utflen++; |
358 utflen += (c >= 0x800) ? 2 : 1; |
356 } else if (c > 0x07FF) { |
359 } |
357 utflen += 3; |
360 |
358 } else { |
361 if (utflen > 65535 || /* overflow */ utflen < strlen) |
359 utflen += 2; |
362 throw new UTFDataFormatException(tooLongMsg(str, utflen)); |
360 } |
363 |
361 } |
364 final byte[] bytearr; |
362 |
|
363 if (utflen > 65535) |
|
364 throw new UTFDataFormatException( |
|
365 "encoded string too long: " + utflen + " bytes"); |
|
366 |
|
367 byte[] bytearr = null; |
|
368 if (out instanceof DataOutputStream) { |
365 if (out instanceof DataOutputStream) { |
369 DataOutputStream dos = (DataOutputStream)out; |
366 DataOutputStream dos = (DataOutputStream)out; |
370 if(dos.bytearr == null || (dos.bytearr.length < (utflen+2))) |
367 if (dos.bytearr == null || (dos.bytearr.length < (utflen + 2))) |
371 dos.bytearr = new byte[(utflen*2) + 2]; |
368 dos.bytearr = new byte[(utflen*2) + 2]; |
372 bytearr = dos.bytearr; |
369 bytearr = dos.bytearr; |
373 } else { |
370 } else { |
374 bytearr = new byte[utflen+2]; |
371 bytearr = new byte[utflen + 2]; |
375 } |
372 } |
376 |
373 |
|
374 int count = 0; |
377 bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF); |
375 bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF); |
378 bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF); |
376 bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF); |
379 |
377 |
380 int i=0; |
378 int i = 0; |
381 for (i=0; i<strlen; i++) { |
379 for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII |
382 c = str.charAt(i); |
380 int c = str.charAt(i); |
383 if (!((c >= 0x0001) && (c <= 0x007F))) break; |
381 if (c >= 0x80 || c == 0) break; |
384 bytearr[count++] = (byte) c; |
382 bytearr[count++] = (byte) c; |
385 } |
383 } |
386 |
384 |
387 for (;i < strlen; i++){ |
385 for (; i < strlen; i++) { |
388 c = str.charAt(i); |
386 int c = str.charAt(i); |
389 if ((c >= 0x0001) && (c <= 0x007F)) { |
387 if (c < 0x80 && c != 0) { |
390 bytearr[count++] = (byte) c; |
388 bytearr[count++] = (byte) c; |
391 |
389 } else if (c >= 0x800) { |
392 } else if (c > 0x07FF) { |
|
393 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); |
390 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); |
394 bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); |
391 bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); |
395 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); |
392 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); |
396 } else { |
393 } else { |
397 bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); |
394 bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); |
398 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); |
395 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); |
399 } |
396 } |
400 } |
397 } |
401 out.write(bytearr, 0, utflen+2); |
398 out.write(bytearr, 0, utflen + 2); |
402 return utflen + 2; |
399 return utflen + 2; |
|
400 } |
|
401 |
|
402 private static String tooLongMsg(String s, int bits32) { |
|
403 int slen = s.length(); |
|
404 String head = s.substring(0, 8); |
|
405 String tail = s.substring(slen - 8, slen); |
|
406 // handle int overflow with max 3x expansion |
|
407 long actualLength = (long)slen + Integer.toUnsignedLong(bits32 - slen); |
|
408 return "encoded string (" + head + "..." + tail + ") too long: " |
|
409 + actualLength + " bytes"; |
403 } |
410 } |
404 |
411 |
405 /** |
412 /** |
406 * Returns the current value of the counter <code>written</code>, |
413 * Returns the current value of the counter <code>written</code>, |
407 * the number of bytes written to this data output stream so far. |
414 * the number of bytes written to this data output stream so far. |