278 // possible that the SM==null for now but then SM is NOT null later |
269 // possible that the SM==null for now but then SM is NOT null later |
279 // when safeTrim() is invoked...the "safe" way to do is to redundant |
270 // when safeTrim() is invoked...the "safe" way to do is to redundant |
280 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim |
271 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim |
281 // but it then can be argued that the SM is null when the operation |
272 // but it then can be argued that the SM is null when the operation |
282 // is started... |
273 // is started... |
283 if (cs == UTF_8) { |
|
284 return StringDecoderUTF8.decode(ba, off, len, new Result()); |
|
285 } |
|
286 CharsetDecoder cd = cs.newDecoder(); |
274 CharsetDecoder cd = cs.newDecoder(); |
287 // ascii fastpath |
275 // ascii fastpath |
288 if (cs == ISO_8859_1 || ((cd instanceof ArrayDecoder) && |
276 if ((cd instanceof ArrayDecoder) && |
289 ((ArrayDecoder)cd).isASCIICompatible() && |
277 ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) { |
290 !hasNegatives(ba, off, len))) { |
278 return decodeLatin1(ba, off, len); |
291 if (COMPACT_STRINGS) { |
|
292 return new Result().with(Arrays.copyOfRange(ba, off, off + len), |
|
293 LATIN1); |
|
294 } else { |
|
295 return new Result().with(StringLatin1.inflate(ba, off, len), UTF16); |
|
296 } |
|
297 } |
279 } |
298 int en = scale(len, cd.maxCharsPerByte()); |
280 int en = scale(len, cd.maxCharsPerByte()); |
299 if (len == 0) { |
281 if (len == 0) { |
300 return new Result().with(); |
282 return new Result().with(); |
301 } |
283 } |
302 if (cs.getClass().getClassLoader0() != null && |
|
303 System.getSecurityManager() != null) { |
|
304 ba = Arrays.copyOfRange(ba, off, off + len); |
|
305 off = 0; |
|
306 } |
|
307 cd.onMalformedInput(CodingErrorAction.REPLACE) |
284 cd.onMalformedInput(CodingErrorAction.REPLACE) |
308 .onUnmappableCharacter(CodingErrorAction.REPLACE) |
285 .onUnmappableCharacter(CodingErrorAction.REPLACE) |
309 .reset(); |
286 .reset(); |
310 |
|
311 char[] ca = new char[en]; |
287 char[] ca = new char[en]; |
312 if (cd instanceof ArrayDecoder) { |
288 if (cd instanceof ArrayDecoder) { |
313 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); |
289 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); |
314 return new Result().with(ca, 0, clen); |
290 return new Result().with(ca, 0, clen); |
|
291 } |
|
292 if (cs.getClass().getClassLoader0() != null && |
|
293 System.getSecurityManager() != null) { |
|
294 ba = Arrays.copyOfRange(ba, off, off + len); |
|
295 off = 0; |
315 } |
296 } |
316 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); |
297 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); |
317 CharBuffer cb = CharBuffer.wrap(ca); |
298 CharBuffer cb = CharBuffer.wrap(ca); |
318 try { |
299 try { |
319 CoderResult cr = cd.decode(bb, cb, true); |
300 CoderResult cr = cd.decode(bb, cb, true); |
421 } |
397 } |
422 return safeTrim(ba, bb.position(), isTrusted); |
398 return safeTrim(ba, bb.position(), isTrusted); |
423 } |
399 } |
424 } |
400 } |
425 |
401 |
|
402 static byte[] encode(String charsetName, byte coder, byte[] val) |
|
403 throws UnsupportedEncodingException |
|
404 { |
|
405 StringEncoder se = deref(encoder); |
|
406 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; |
|
407 if ((se == null) || !(csn.equals(se.requestedCharsetName()) |
|
408 || csn.equals(se.charsetName()))) { |
|
409 se = null; |
|
410 try { |
|
411 Charset cs = lookupCharset(csn); |
|
412 if (cs != null) { |
|
413 if (cs == UTF_8) { |
|
414 return encodeUTF8(coder, val, true); |
|
415 } |
|
416 if (cs == ISO_8859_1) { |
|
417 return encode8859_1(coder, val); |
|
418 } |
|
419 if (cs == US_ASCII) { |
|
420 return encodeASCII(coder, val); |
|
421 } |
|
422 se = new StringEncoder(cs, csn); |
|
423 } |
|
424 } catch (IllegalCharsetNameException x) {} |
|
425 if (se == null) { |
|
426 throw new UnsupportedEncodingException (csn); |
|
427 } |
|
428 set(encoder, se); |
|
429 } |
|
430 return se.encode(coder, val); |
|
431 } |
|
432 |
|
433 static byte[] encode(Charset cs, byte coder, byte[] val) { |
|
434 if (cs == UTF_8) { |
|
435 return encodeUTF8(coder, val, true); |
|
436 } |
|
437 if (cs == ISO_8859_1) { |
|
438 return encode8859_1(coder, val); |
|
439 } |
|
440 if (cs == US_ASCII) { |
|
441 return encodeASCII(coder, val); |
|
442 } |
|
443 CharsetEncoder ce = cs.newEncoder(); |
|
444 // fastpath for ascii compatible |
|
445 if (coder == LATIN1 && (((ce instanceof ArrayEncoder) && |
|
446 ((ArrayEncoder)ce).isASCIICompatible() && |
|
447 !hasNegatives(val, 0, val.length)))) { |
|
448 return Arrays.copyOf(val, val.length); |
|
449 } |
|
450 int len = val.length >> coder; // assume LATIN1=0/UTF16=1; |
|
451 int en = scale(len, ce.maxBytesPerChar()); |
|
452 byte[] ba = new byte[en]; |
|
453 if (len == 0) { |
|
454 return ba; |
|
455 } |
|
456 ce.onMalformedInput(CodingErrorAction.REPLACE) |
|
457 .onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
458 .reset(); |
|
459 if (ce instanceof ArrayEncoder) { |
|
460 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba) |
|
461 : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba); |
|
462 if (blen != -1) { |
|
463 return safeTrim(ba, blen, true); |
|
464 } |
|
465 } |
|
466 boolean isTrusted = cs.getClass().getClassLoader0() == null || |
|
467 System.getSecurityManager() == null; |
|
468 char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) |
|
469 : StringUTF16.toChars(val); |
|
470 ByteBuffer bb = ByteBuffer.wrap(ba); |
|
471 CharBuffer cb = CharBuffer.wrap(ca, 0, len); |
|
472 try { |
|
473 CoderResult cr = ce.encode(cb, bb, true); |
|
474 if (!cr.isUnderflow()) |
|
475 cr.throwException(); |
|
476 cr = ce.flush(bb); |
|
477 if (!cr.isUnderflow()) |
|
478 cr.throwException(); |
|
479 } catch (CharacterCodingException x) { |
|
480 throw new Error(x); |
|
481 } |
|
482 return safeTrim(ba, bb.position(), isTrusted); |
|
483 } |
|
484 |
|
485 static byte[] encode(byte coder, byte[] val) { |
|
486 Charset cs = Charset.defaultCharset(); |
|
487 if (cs == UTF_8) { |
|
488 return encodeUTF8(coder, val, true); |
|
489 } |
|
490 if (cs == ISO_8859_1) { |
|
491 return encode8859_1(coder, val); |
|
492 } |
|
493 if (cs == US_ASCII) { |
|
494 return encodeASCII(coder, val); |
|
495 } |
|
496 StringEncoder se = deref(encoder); |
|
497 if (se == null || !cs.name().equals(se.cs.name())) { |
|
498 se = new StringEncoder(cs, cs.name()); |
|
499 set(encoder, se); |
|
500 } |
|
501 return se.encode(coder, val); |
|
502 } |
|
503 |
|
504 /** |
|
505 * Print a message directly to stderr, bypassing all character conversion |
|
506 * methods. |
|
507 * @param msg message to print |
|
508 */ |
|
509 private static native void err(String msg); |
|
510 |
|
511 /* The cached Result for each thread */ |
|
512 private static final ThreadLocal<StringCoding.Result> |
|
513 resultCached = new ThreadLocal<>() { |
|
514 protected StringCoding.Result initialValue() { |
|
515 return new StringCoding.Result(); |
|
516 }}; |
|
517 |
|
518 ////////////////////////// ascii ////////////////////////////// |
|
519 |
|
520 private static Result decodeASCII(byte[] ba, int off, int len) { |
|
521 Result result = resultCached.get(); |
|
522 if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) { |
|
523 return result.with(Arrays.copyOfRange(ba, off, off + len), |
|
524 LATIN1); |
|
525 } |
|
526 byte[] dst = new byte[len<<1]; |
|
527 int dp = 0; |
|
528 while (dp < len) { |
|
529 int b = ba[off++]; |
|
530 putChar(dst, dp++, (b >= 0) ? (char)b : repl); |
|
531 } |
|
532 return result.with(dst, UTF16); |
|
533 } |
|
534 |
|
535 private static byte[] encodeASCII(byte coder, byte[] val) { |
|
536 if (coder == LATIN1) { |
|
537 byte[] dst = new byte[val.length]; |
|
538 for (int i = 0; i < val.length; i++) { |
|
539 if (val[i] < 0) { |
|
540 dst[i] = '?'; |
|
541 } else { |
|
542 dst[i] = val[i]; |
|
543 } |
|
544 } |
|
545 return dst; |
|
546 } |
|
547 int len = val.length >> 1; |
|
548 byte[] dst = new byte[len]; |
|
549 int dp = 0; |
|
550 for (int i = 0; i < len; i++) { |
|
551 char c = StringUTF16.getChar(val, i); |
|
552 if (c < 0x80) { |
|
553 dst[dp++] = (byte)c; |
|
554 continue; |
|
555 } |
|
556 if (Character.isHighSurrogate(c) && i + 1 < len && |
|
557 Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) { |
|
558 i++; |
|
559 } |
|
560 dst[dp++] = '?'; |
|
561 } |
|
562 if (len == dp) { |
|
563 return dst; |
|
564 } |
|
565 return Arrays.copyOf(dst, dp); |
|
566 } |
|
567 |
|
568 ////////////////////////// latin1/8859_1 /////////////////////////// |
|
569 |
|
570 private static Result decodeLatin1(byte[] ba, int off, int len) { |
|
571 Result result = resultCached.get(); |
|
572 if (COMPACT_STRINGS) { |
|
573 return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1); |
|
574 } else { |
|
575 return result.with(StringLatin1.inflate(ba, off, len), UTF16); |
|
576 } |
|
577 } |
|
578 |
426 @HotSpotIntrinsicCandidate |
579 @HotSpotIntrinsicCandidate |
427 private static int implEncodeISOArray(byte[] sa, int sp, |
580 private static int implEncodeISOArray(byte[] sa, int sp, |
428 byte[] da, int dp, int len) { |
581 byte[] da, int dp, int len) { |
429 int i = 0; |
582 int i = 0; |
430 for (; i < len; i++) { |
583 for (; i < len; i++) { |
463 return dst; |
616 return dst; |
464 } |
617 } |
465 return Arrays.copyOf(dst, dp); |
618 return Arrays.copyOf(dst, dp); |
466 } |
619 } |
467 |
620 |
468 static byte[] encodeASCII(byte coder, byte[] val) { |
621 //////////////////////////////// utf8 //////////////////////////////////// |
469 if (coder == LATIN1) { |
622 |
470 byte[] dst = new byte[val.length]; |
623 private static boolean isNotContinuation(int b) { |
471 for (int i = 0; i < val.length; i++) { |
624 return (b & 0xc0) != 0x80; |
472 if (val[i] < 0) { |
625 } |
473 dst[i] = '?'; |
626 |
474 } else { |
627 private static boolean isMalformed3(int b1, int b2, int b3) { |
475 dst[i] = val[i]; |
628 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || |
476 } |
629 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; |
477 } |
630 } |
|
631 |
|
632 private static boolean isMalformed3_2(int b1, int b2) { |
|
633 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || |
|
634 (b2 & 0xc0) != 0x80; |
|
635 } |
|
636 |
|
637 private static boolean isMalformed4(int b2, int b3, int b4) { |
|
638 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || |
|
639 (b4 & 0xc0) != 0x80; |
|
640 } |
|
641 |
|
642 private static boolean isMalformed4_2(int b1, int b2) { |
|
643 return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || |
|
644 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || |
|
645 (b2 & 0xc0) != 0x80; |
|
646 } |
|
647 |
|
648 private static boolean isMalformed4_3(int b3) { |
|
649 return (b3 & 0xc0) != 0x80; |
|
650 } |
|
651 |
|
652 // for nb == 3/4 |
|
653 private static int malformedN(byte[] src, int sp, int nb) { |
|
654 if (nb == 3) { |
|
655 int b1 = src[sp++]; |
|
656 int b2 = src[sp++]; // no need to lookup b3 |
|
657 return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || |
|
658 isNotContinuation(b2)) ? 1 : 2; |
|
659 } else if (nb == 4) { // we don't care the speed here |
|
660 int b1 = src[sp++] & 0xff; |
|
661 int b2 = src[sp++] & 0xff; |
|
662 if (b1 > 0xf4 || |
|
663 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || |
|
664 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || |
|
665 isNotContinuation(b2)) |
|
666 return 1; |
|
667 if (isNotContinuation(src[sp++])) |
|
668 return 2; |
|
669 return 3; |
|
670 } |
|
671 assert false; |
|
672 return -1; |
|
673 } |
|
674 |
|
675 private static void throwMalformed(int off, int nb) { |
|
676 throw new IllegalArgumentException("malformed input off : " + off + |
|
677 ", length : " + nb); |
|
678 } |
|
679 |
|
680 private static char repl = '\ufffd'; |
|
681 |
|
682 private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { |
|
683 // ascii-bais, which has a relative impact to the non-ascii-only bytes |
|
684 if (COMPACT_STRINGS && !hasNegatives(src, sp, len)) |
|
685 return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len), |
|
686 LATIN1); |
|
687 return decodeUTF8_0(src, sp, len, doReplace); |
|
688 } |
|
689 |
|
690 private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) { |
|
691 Result ret = resultCached.get(); |
|
692 |
|
693 int sl = sp + len; |
|
694 int dp = 0; |
|
695 byte[] dst = new byte[len]; |
|
696 |
|
697 if (COMPACT_STRINGS) { |
|
698 while (sp < sl) { |
|
699 int b1 = src[sp]; |
|
700 if (b1 >= 0) { |
|
701 dst[dp++] = (byte)b1; |
|
702 sp++; |
|
703 continue; |
|
704 } |
|
705 if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) && |
|
706 sp + 1 < sl) { |
|
707 int b2 = src[sp + 1]; |
|
708 if (!isNotContinuation(b2)) { |
|
709 dst[dp++] = (byte)(((b1 << 6) ^ b2)^ |
|
710 (((byte) 0xC0 << 6) ^ |
|
711 ((byte) 0x80 << 0))); |
|
712 sp += 2; |
|
713 continue; |
|
714 } |
|
715 } |
|
716 // anything not a latin1, including the repl |
|
717 // we have to go with the utf16 |
|
718 break; |
|
719 } |
|
720 if (sp == sl) { |
|
721 if (dp != dst.length) { |
|
722 dst = Arrays.copyOf(dst, dp); |
|
723 } |
|
724 return ret.with(dst, LATIN1); |
|
725 } |
|
726 } |
|
727 if (dp == 0) { |
|
728 dst = new byte[len << 1]; |
|
729 } else { |
|
730 byte[] buf = new byte[len << 1]; |
|
731 StringLatin1.inflate(dst, 0, buf, 0, dp); |
|
732 dst = buf; |
|
733 } |
|
734 while (sp < sl) { |
|
735 int b1 = src[sp++]; |
|
736 if (b1 >= 0) { |
|
737 putChar(dst, dp++, (char) b1); |
|
738 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { |
|
739 if (sp < sl) { |
|
740 int b2 = src[sp++]; |
|
741 if (isNotContinuation(b2)) { |
|
742 if (!doReplace) { |
|
743 throwMalformed(sp - 1, 1); |
|
744 } |
|
745 putChar(dst, dp++, repl); |
|
746 sp--; |
|
747 } else { |
|
748 putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^ |
|
749 (((byte) 0xC0 << 6) ^ |
|
750 ((byte) 0x80 << 0)))); |
|
751 } |
|
752 continue; |
|
753 } |
|
754 if (!doReplace) { |
|
755 throwMalformed(sp, 1); // underflow() |
|
756 } |
|
757 putChar(dst, dp++, repl); |
|
758 break; |
|
759 } else if ((b1 >> 4) == -2) { |
|
760 if (sp + 1 < sl) { |
|
761 int b2 = src[sp++]; |
|
762 int b3 = src[sp++]; |
|
763 if (isMalformed3(b1, b2, b3)) { |
|
764 if (!doReplace) { |
|
765 throwMalformed(sp - 3, 3); |
|
766 } |
|
767 putChar(dst, dp++, repl); |
|
768 sp -= 3; |
|
769 sp += malformedN(src, sp, 3); |
|
770 } else { |
|
771 char c = (char)((b1 << 12) ^ |
|
772 (b2 << 6) ^ |
|
773 (b3 ^ |
|
774 (((byte) 0xE0 << 12) ^ |
|
775 ((byte) 0x80 << 6) ^ |
|
776 ((byte) 0x80 << 0)))); |
|
777 if (isSurrogate(c)) { |
|
778 if (!doReplace) { |
|
779 throwMalformed(sp - 3, 3); |
|
780 } |
|
781 putChar(dst, dp++, repl); |
|
782 } else { |
|
783 putChar(dst, dp++, c); |
|
784 } |
|
785 } |
|
786 continue; |
|
787 } |
|
788 if (sp < sl && isMalformed3_2(b1, src[sp])) { |
|
789 if (!doReplace) { |
|
790 throwMalformed(sp - 1, 2); |
|
791 } |
|
792 putChar(dst, dp++, repl); |
|
793 continue; |
|
794 } |
|
795 if (!doReplace){ |
|
796 throwMalformed(sp, 1); |
|
797 } |
|
798 putChar(dst, dp++, repl); |
|
799 break; |
|
800 } else if ((b1 >> 3) == -2) { |
|
801 if (sp + 2 < sl) { |
|
802 int b2 = src[sp++]; |
|
803 int b3 = src[sp++]; |
|
804 int b4 = src[sp++]; |
|
805 int uc = ((b1 << 18) ^ |
|
806 (b2 << 12) ^ |
|
807 (b3 << 6) ^ |
|
808 (b4 ^ |
|
809 (((byte) 0xF0 << 18) ^ |
|
810 ((byte) 0x80 << 12) ^ |
|
811 ((byte) 0x80 << 6) ^ |
|
812 ((byte) 0x80 << 0)))); |
|
813 if (isMalformed4(b2, b3, b4) || |
|
814 !isSupplementaryCodePoint(uc)) { // shortest form check |
|
815 if (!doReplace) { |
|
816 throwMalformed(sp - 4, 4); |
|
817 } |
|
818 putChar(dst, dp++, repl); |
|
819 sp -= 4; |
|
820 sp += malformedN(src, sp, 4); |
|
821 } else { |
|
822 putChar(dst, dp++, highSurrogate(uc)); |
|
823 putChar(dst, dp++, lowSurrogate(uc)); |
|
824 } |
|
825 continue; |
|
826 } |
|
827 b1 &= 0xff; |
|
828 if (b1 > 0xf4 || |
|
829 sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) { |
|
830 if (!doReplace) { |
|
831 throwMalformed(sp - 1, 1); // or 2 |
|
832 } |
|
833 putChar(dst, dp++, repl); |
|
834 continue; |
|
835 } |
|
836 if (!doReplace) { |
|
837 throwMalformed(sp - 1, 1); |
|
838 } |
|
839 sp++; |
|
840 putChar(dst, dp++, repl); |
|
841 if (sp < sl && isMalformed4_3(src[sp])) { |
|
842 continue; |
|
843 } |
|
844 break; |
|
845 } else { |
|
846 if (!doReplace) { |
|
847 throwMalformed(sp - 1, 1); |
|
848 } |
|
849 putChar(dst, dp++, repl); |
|
850 } |
|
851 } |
|
852 if (dp != len) { |
|
853 dst = Arrays.copyOf(dst, dp << 1); |
|
854 } |
|
855 return ret.with(dst, UTF16); |
|
856 } |
|
857 |
|
858 private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { |
|
859 if (coder == UTF16) |
|
860 return encodeUTF8_UTF16(val, doReplace); |
|
861 |
|
862 if (!hasNegatives(val, 0, val.length)) |
|
863 return Arrays.copyOf(val, val.length); |
|
864 |
|
865 int dp = 0; |
|
866 byte[] dst = new byte[val.length << 1]; |
|
867 for (int sp = 0; sp < val.length; sp++) { |
|
868 byte c = val[sp]; |
|
869 if (c < 0) { |
|
870 dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6)); |
|
871 dst[dp++] = (byte)(0x80 | (c & 0x3f)); |
|
872 } else { |
|
873 dst[dp++] = c; |
|
874 } |
|
875 } |
|
876 if (dp == dst.length) |
478 return dst; |
877 return dst; |
479 } |
878 return Arrays.copyOf(dst, dp); |
480 int len = val.length >> 1; |
879 } |
481 byte[] dst = new byte[len]; |
880 |
|
881 private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { |
482 int dp = 0; |
882 int dp = 0; |
483 for (int i = 0; i < len; i++) { |
883 int sp = 0; |
484 char c = StringUTF16.getChar(val, i); |
884 int sl = val.length >> 1; |
|
885 byte[] dst = new byte[sl * 3]; |
|
886 char c; |
|
887 while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') { |
|
888 // ascii fast loop; |
|
889 dst[dp++] = (byte)c; |
|
890 sp++; |
|
891 } |
|
892 while (sp < sl) { |
|
893 c = StringUTF16.getChar(val, sp++); |
485 if (c < 0x80) { |
894 if (c < 0x80) { |
486 dst[dp++] = (byte)c; |
895 dst[dp++] = (byte)c; |
487 continue; |
896 } else if (c < 0x800) { |
488 } |
897 dst[dp++] = (byte)(0xc0 | (c >> 6)); |
489 if (Character.isHighSurrogate(c) && i + 1 < len && |
898 dst[dp++] = (byte)(0x80 | (c & 0x3f)); |
490 Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) { |
899 } else if (Character.isSurrogate(c)) { |
491 i++; |
900 int uc = -1; |
492 } |
901 char c2; |
493 dst[dp++] = '?'; |
902 if (Character.isHighSurrogate(c) && sp < sl && |
494 } |
903 Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) { |
495 if (len == dp) { |
904 uc = Character.toCodePoint(c, c2); |
496 return dst; |
905 } |
497 } |
906 if (uc < 0) { |
498 return Arrays.copyOf(dst, dp); |
907 if (doReplace) { |
499 } |
|
500 |
|
501 static byte[] encodeUTF8(byte coder, byte[] val) { |
|
502 int dp = 0; |
|
503 byte[] dst; |
|
504 if (coder == LATIN1) { |
|
505 dst = new byte[val.length << 1]; |
|
506 for (int sp = 0; sp < val.length; sp++) { |
|
507 byte c = val[sp]; |
|
508 if (c < 0) { |
|
509 dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6)); |
|
510 dst[dp++] = (byte)(0x80 | (c & 0x3f)); |
|
511 } else { |
|
512 dst[dp++] = c; |
|
513 } |
|
514 } |
|
515 } else { |
|
516 int sp = 0; |
|
517 int sl = val.length >> 1; |
|
518 dst = new byte[sl * 3]; |
|
519 char c; |
|
520 while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') { |
|
521 // ascii fast loop; |
|
522 dst[dp++] = (byte)c; |
|
523 sp++; |
|
524 } |
|
525 while (sp < sl) { |
|
526 c = StringUTF16.getChar(val, sp++); |
|
527 if (c < 0x80) { |
|
528 dst[dp++] = (byte)c; |
|
529 } else if (c < 0x800) { |
|
530 dst[dp++] = (byte)(0xc0 | (c >> 6)); |
|
531 dst[dp++] = (byte)(0x80 | (c & 0x3f)); |
|
532 } else if (Character.isSurrogate(c)) { |
|
533 int uc = -1; |
|
534 char c2; |
|
535 if (Character.isHighSurrogate(c) && sp < sl && |
|
536 Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) { |
|
537 uc = Character.toCodePoint(c, c2); |
|
538 } |
|
539 if (uc < 0) { |
|
540 dst[dp++] = '?'; |
908 dst[dp++] = '?'; |
541 } else { |
909 } else { |
542 dst[dp++] = (byte)(0xf0 | ((uc >> 18))); |
910 throwMalformed(sp - 1, 1); // or 2, does not matter here |
543 dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); |
|
544 dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); |
|
545 dst[dp++] = (byte)(0x80 | (uc & 0x3f)); |
|
546 sp++; // 2 chars |
|
547 } |
911 } |
548 } else { |
912 } else { |
549 // 3 bytes, 16 bits |
913 dst[dp++] = (byte)(0xf0 | ((uc >> 18))); |
550 dst[dp++] = (byte)(0xe0 | ((c >> 12))); |
914 dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); |
551 dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); |
915 dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); |
552 dst[dp++] = (byte)(0x80 | (c & 0x3f)); |
916 dst[dp++] = (byte)(0x80 | (uc & 0x3f)); |
553 } |
917 sp++; // 2 chars |
|
918 } |
|
919 } else { |
|
920 // 3 bytes, 16 bits |
|
921 dst[dp++] = (byte)(0xe0 | ((c >> 12))); |
|
922 dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); |
|
923 dst[dp++] = (byte)(0x80 | (c & 0x3f)); |
554 } |
924 } |
555 } |
925 } |
556 if (dp == dst.length) { |
926 if (dp == dst.length) { |
557 return dst; |
927 return dst; |
558 } |
928 } |
559 return Arrays.copyOf(dst, dp); |
929 return Arrays.copyOf(dst, dp); |
560 } |
930 } |
561 |
931 |
562 static byte[] encode(String charsetName, byte coder, byte[] val) |
932 ////////////////////// for j.u.z.ZipCoder ////////////////////////// |
563 throws UnsupportedEncodingException |
933 |
564 { |
934 /* |
565 StringEncoder se = deref(encoder); |
935 * Throws iae, instead of replacing, if malformed or unmappble. |
566 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; |
|
567 if ((se == null) || !(csn.equals(se.requestedCharsetName()) |
|
568 || csn.equals(se.charsetName()))) { |
|
569 se = null; |
|
570 try { |
|
571 Charset cs = lookupCharset(csn); |
|
572 if (cs != null) { |
|
573 if (cs == UTF_8) { |
|
574 return encodeUTF8(coder, val); |
|
575 } else if (cs == ISO_8859_1) { |
|
576 return encode8859_1(coder, val); |
|
577 } else if (cs == US_ASCII) { |
|
578 return encodeASCII(coder, val); |
|
579 } |
|
580 se = new StringEncoder(cs, csn); |
|
581 } |
|
582 } catch (IllegalCharsetNameException x) {} |
|
583 if (se == null) { |
|
584 throw new UnsupportedEncodingException (csn); |
|
585 } |
|
586 set(encoder, se); |
|
587 } |
|
588 return se.encode(coder, val); |
|
589 } |
|
590 |
|
591 static byte[] encode(Charset cs, byte coder, byte[] val) { |
|
592 if (cs == UTF_8) { |
|
593 return encodeUTF8(coder, val); |
|
594 } else if (cs == ISO_8859_1) { |
|
595 return encode8859_1(coder, val); |
|
596 } else if (cs == US_ASCII) { |
|
597 return encodeASCII(coder, val); |
|
598 } |
|
599 CharsetEncoder ce = cs.newEncoder(); |
|
600 // fastpath for ascii compatible |
|
601 if (coder == LATIN1 && (((ce instanceof ArrayEncoder) && |
|
602 ((ArrayEncoder)ce).isASCIICompatible() && |
|
603 !hasNegatives(val, 0, val.length)))) { |
|
604 return Arrays.copyOf(val, val.length); |
|
605 } |
|
606 int len = val.length >> coder; // assume LATIN1=0/UTF16=1; |
|
607 int en = scale(len, ce.maxBytesPerChar()); |
|
608 byte[] ba = new byte[en]; |
|
609 if (len == 0) { |
|
610 return ba; |
|
611 } |
|
612 boolean isTrusted = cs.getClass().getClassLoader0() == null || |
|
613 System.getSecurityManager() == null; |
|
614 ce.onMalformedInput(CodingErrorAction.REPLACE) |
|
615 .onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
616 .reset(); |
|
617 if (ce instanceof ArrayEncoder) { |
|
618 if (!isTrusted) { |
|
619 val = Arrays.copyOf(val, val.length); |
|
620 } |
|
621 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba) |
|
622 : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba); |
|
623 if (blen != -1) { |
|
624 return safeTrim(ba, blen, isTrusted); |
|
625 } |
|
626 } |
|
627 char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) |
|
628 : StringUTF16.toChars(val); |
|
629 ByteBuffer bb = ByteBuffer.wrap(ba); |
|
630 CharBuffer cb = CharBuffer.wrap(ca, 0, len); |
|
631 try { |
|
632 CoderResult cr = ce.encode(cb, bb, true); |
|
633 if (!cr.isUnderflow()) |
|
634 cr.throwException(); |
|
635 cr = ce.flush(bb); |
|
636 if (!cr.isUnderflow()) |
|
637 cr.throwException(); |
|
638 } catch (CharacterCodingException x) { |
|
639 throw new Error(x); |
|
640 } |
|
641 return safeTrim(ba, bb.position(), isTrusted); |
|
642 } |
|
643 |
|
644 static byte[] encode(byte coder, byte[] val) { |
|
645 String csn = Charset.defaultCharset().name(); |
|
646 try { |
|
647 // use charset name encode() variant which provides caching. |
|
648 return encode(csn, coder, val); |
|
649 } catch (UnsupportedEncodingException x) { |
|
650 warnUnsupportedCharset(csn); |
|
651 } |
|
652 try { |
|
653 return encode("ISO-8859-1", coder, val); |
|
654 } catch (UnsupportedEncodingException x) { |
|
655 // If this code is hit during VM initialization, err(String) is |
|
656 // the only way we will be able to get any kind of error message. |
|
657 err("ISO-8859-1 charset not available: " + x.toString() + "\n"); |
|
658 // If we can not find ISO-8859-1 (a required encoding) then things |
|
659 // are seriously wrong with the installation. |
|
660 System.exit(1); |
|
661 return null; |
|
662 } |
|
663 } |
|
664 |
|
665 /** |
|
666 * Print a message directly to stderr, bypassing all character conversion |
|
667 * methods. |
|
668 * @param msg message to print |
|
669 */ |
936 */ |
670 private static native void err(String msg); |
937 static String newStringUTF8NoRepl(byte[] src, int off, int len) { |
|
938 if (COMPACT_STRINGS && !hasNegatives(src, off, len)) |
|
939 return new String(Arrays.copyOfRange(src, off, off + len), LATIN1); |
|
940 Result ret = decodeUTF8_0(src, off, len, false); |
|
941 return new String(ret.value, ret.coder); |
|
942 } |
|
943 |
|
944 /* |
|
945 * Throws iae, instead of replacing, if unmappble. |
|
946 */ |
|
947 static byte[] getBytesUTF8NoRepl(String s) { |
|
948 return encodeUTF8(s.coder(), s.value(), false); |
|
949 } |
671 } |
950 } |