jdk-sandbox: comparison src/java.base/share/classes/java/lang/StringCoding.java

equal deleted inserted replaced

-:43edfde828ab
+:daf3b49f4839
 /*
-* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 import sun.nio.cs.StandardCharsets;
 import static java.lang.String.LATIN1;
 import static java.lang.String.UTF16;
 import static java.lang.String.COMPACT_STRINGS;
+import static java.lang.Character.isSurrogate;
+import static java.lang.Character.highSurrogate;
+import static java.lang.Character.lowSurrogate;
+import static java.lang.Character.isSupplementaryCodePoint;
+import static java.lang.StringUTF16.putChar;
 /**
 * Utility class for string encoding and decoding.
 */
 private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
 private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
 private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
-private static boolean warnUnsupportedCharset = true;
 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
 SoftReference<T> sr = tl.get();
 if (sr == null)
 return null;
 return sr.get();
 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
 tl.set(new SoftReference<>(ob));
 }
 // Trim the given byte array to the given length
-//
 private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
 if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
 return ba;
 else
 return Arrays.copyOf(ba, len);
 } catch (UnsupportedCharsetException x) {
 throw new Error(x);
 }
 }
 return null;
-}
-private static void warnUnsupportedCharset(String csn) {
-if (warnUnsupportedCharset) {
-// Use err(String) rather than the Logging API or System.err
-// since this method may be called during VM initialization
-// before either is available.
-err("WARNING: Default charset " + csn +
-" not supported, using ISO-8859-1 instead\n");
-warnUnsupportedCharset = false;
-}
 }
 static class Result {
 byte[] value;
 byte coder;
 }
 return result.with(ca, 0, cb.position());
 }
 }
-private static class StringDecoder8859_1 extends StringDecoder {
-StringDecoder8859_1(Charset cs, String rcn) {
-super(cs, rcn);
-}
-Result decode(byte[] ba, int off, int len) {
-if (COMPACT_STRINGS) {
-return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
-} else {
-return result.with(StringLatin1.inflate(ba, off, len), UTF16);
-}
-}
-}
 static Result decode(String charsetName, byte[] ba, int off, int len)
 throws UnsupportedEncodingException
 {
 StringDecoder sd = deref(decoder);
 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 sd = null;
 try {
 Charset cs = lookupCharset(csn);
 if (cs != null) {
 if (cs == UTF_8) {
-sd = new StringDecoderUTF8(cs, csn);
+return decodeUTF8(ba, off, len, true);
-} else if (cs == ISO_8859_1) {
+}
-sd = new StringDecoder8859_1(cs, csn);
+if (cs == ISO_8859_1) {
-} else {
+return decodeLatin1(ba, off, len);
-sd = new StringDecoder(cs, csn);
+}
-}
+if (cs == US_ASCII) {
+return decodeASCII(ba, off, len);
+}
+sd = new StringDecoder(cs, csn);
 }
 } catch (IllegalCharsetNameException x) {}
 if (sd == null)
 throw new UnsupportedEncodingException(csn);
 set(decoder, sd);
 }
 return sd.decode(ba, off, len);
 }
 static Result decode(Charset cs, byte[] ba, int off, int len) {
+if (cs == UTF_8) {
+return decodeUTF8(ba, off, len, true);
+}
+if (cs == ISO_8859_1) {
+return decodeLatin1(ba, off, len);
+}
+if (cs == US_ASCII) {
+return decodeASCII(ba, off, len);
+}
 // (1)We never cache the "external" cs, the only benefit of creating
 // an additional StringDe/Encoder object to wrap it is to share the
 // de/encode() method. These SD/E objects are short-lived, the young-gen
 // gc should be able to take care of them well. But the best approach
 // is still not to generate them if not really necessary.
 // possible that the SM==null for now but then SM is NOT null later
 // when safeTrim() is invoked...the "safe" way to do is to redundant
 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
 // but it then can be argued that the SM is null when the operation
 // is started...
-if (cs == UTF_8) {
-return StringDecoderUTF8.decode(ba, off, len, new Result());
-}
 CharsetDecoder cd = cs.newDecoder();
 // ascii fastpath
-if (cs == ISO_8859_1 || ((cd instanceof ArrayDecoder) &&
+if ((cd instanceof ArrayDecoder) &&
-((ArrayDecoder)cd).isASCIICompatible() &&
+((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
-!hasNegatives(ba, off, len))) {
+return decodeLatin1(ba, off, len);
-if (COMPACT_STRINGS) {
-return new Result().with(Arrays.copyOfRange(ba, off, off + len),
-LATIN1);
-} else {
-return new Result().with(StringLatin1.inflate(ba, off, len), UTF16);
-}
 }
 int en = scale(len, cd.maxCharsPerByte());
 if (len == 0) {
 return new Result().with();
 }
-if (cs.getClass().getClassLoader0() != null &&
-System.getSecurityManager() != null) {
-ba =  Arrays.copyOfRange(ba, off, off + len);
-off = 0;
-}
 cd.onMalformedInput(CodingErrorAction.REPLACE)
 .onUnmappableCharacter(CodingErrorAction.REPLACE)
 .reset();
 char[] ca = new char[en];
 if (cd instanceof ArrayDecoder) {
 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
 return new Result().with(ca, 0, clen);
+}
+if (cs.getClass().getClassLoader0() != null &&
+System.getSecurityManager() != null) {
+ba = Arrays.copyOfRange(ba, off, off + len);
+off = 0;
 }
 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 CharBuffer cb = CharBuffer.wrap(ca);
 try {
 CoderResult cr = cd.decode(bb, cb, true);
 }
 return new Result().with(ca, 0, cb.position());
 }
 static Result decode(byte[] ba, int off, int len) {
-String csn = Charset.defaultCharset().name();
+Charset cs = Charset.defaultCharset();
-try {
+if (cs == UTF_8) {
-// use charset name decode() variant which provides caching.
+return decodeUTF8(ba, off, len, true);
-return decode(csn, ba, off, len);
+}
-} catch (UnsupportedEncodingException x) {
+if (cs == ISO_8859_1) {
-warnUnsupportedCharset(csn);
+return decodeLatin1(ba, off, len);
 }
-try {
+if (cs == US_ASCII) {
-return decode("ISO-8859-1", ba, off, len);
+return decodeASCII(ba, off, len);
-} catch (UnsupportedEncodingException x) {
+}
-// If this code is hit during VM initialization, err(String) is
+StringDecoder sd = deref(decoder);
-// the only way we will be able to get any kind of error message.
+if (sd == null || !cs.name().equals(sd.cs.name())) {
-err("ISO-8859-1 charset not available: " + x.toString() + "\n");
+sd = new StringDecoder(cs, cs.name());
-// If we can not find ISO-8859-1 (a required encoding) then things
+set(decoder, sd);
-// are seriously wrong with the installation.
+}
-System.exit(1);
+return sd.decode(ba, off, len);
-return null;
-}
 }
 // -- Encoding --
 private static class StringEncoder {
 private Charset cs;
 byte[] ba = new byte[en];
 if (len == 0) {
 return ba;
 }
 if (ce instanceof ArrayEncoder) {
-if (!isTrusted) {
-val = Arrays.copyOf(val, val.length);
-}
 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
 : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
 if (blen != -1) {
 return safeTrim(ba, blen, isTrusted);
 }
 }
 return safeTrim(ba, bb.position(), isTrusted);
 }
 }
+static byte[] encode(String charsetName, byte coder, byte[] val)
+throws UnsupportedEncodingException
+{
+StringEncoder se = deref(encoder);
+String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
+if ((se == null) || !(csn.equals(se.requestedCharsetName())
+|| csn.equals(se.charsetName()))) {
+se = null;
+try {
+Charset cs = lookupCharset(csn);
+if (cs != null) {
+if (cs == UTF_8) {
+return encodeUTF8(coder, val, true);
+}
+if (cs == ISO_8859_1) {
+return encode8859_1(coder, val);
+}
+if (cs == US_ASCII) {
+return encodeASCII(coder, val);
+}
+se = new StringEncoder(cs, csn);
+}
+} catch (IllegalCharsetNameException x) {}
+if (se == null) {
+throw new UnsupportedEncodingException (csn);
+}
+set(encoder, se);
+}
+return se.encode(coder, val);
+}
+static byte[] encode(Charset cs, byte coder, byte[] val) {
+if (cs == UTF_8) {
+return encodeUTF8(coder, val, true);
+}
+if (cs == ISO_8859_1) {
+return encode8859_1(coder, val);
+}
+if (cs == US_ASCII) {
+return encodeASCII(coder, val);
+}
+CharsetEncoder ce = cs.newEncoder();
+// fastpath for ascii compatible
+if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
+((ArrayEncoder)ce).isASCIICompatible() &&
+!hasNegatives(val, 0, val.length)))) {
+return Arrays.copyOf(val, val.length);
+}
+int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
+int en = scale(len, ce.maxBytesPerChar());
+byte[] ba = new byte[en];
+if (len == 0) {
+return ba;
+}
+ce.onMalformedInput(CodingErrorAction.REPLACE)
+.onUnmappableCharacter(CodingErrorAction.REPLACE)
+.reset();
+if (ce instanceof ArrayEncoder) {
+int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
+: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
+if (blen != -1) {
+return safeTrim(ba, blen, true);
+}
+}
+boolean isTrusted = cs.getClass().getClassLoader0() == null ||
+System.getSecurityManager() == null;
+char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
+: StringUTF16.toChars(val);
+ByteBuffer bb = ByteBuffer.wrap(ba);
+CharBuffer cb = CharBuffer.wrap(ca, 0, len);
+try {
+CoderResult cr = ce.encode(cb, bb, true);
+if (!cr.isUnderflow())
+cr.throwException();
+cr = ce.flush(bb);
+if (!cr.isUnderflow())
+cr.throwException();
+} catch (CharacterCodingException x) {
+throw new Error(x);
+}
+return safeTrim(ba, bb.position(), isTrusted);
+}
+static byte[] encode(byte coder, byte[] val) {
+Charset cs = Charset.defaultCharset();
+if (cs == UTF_8) {
+return encodeUTF8(coder, val, true);
+}
+if (cs == ISO_8859_1) {
+return encode8859_1(coder, val);
+}
+if (cs == US_ASCII) {
+return encodeASCII(coder, val);
+}
+StringEncoder se = deref(encoder);
+if (se == null || !cs.name().equals(se.cs.name())) {
+se = new StringEncoder(cs, cs.name());
+set(encoder, se);
+}
+return se.encode(coder, val);
+}
+/**
+*  Print a message directly to stderr, bypassing all character conversion
+*  methods.
+*  @param msg  message to print
+*/
+private static native void err(String msg);
+/* The cached Result for each thread */
+private static final ThreadLocal<StringCoding.Result>
+resultCached = new ThreadLocal<>() {
+protected StringCoding.Result initialValue() {
+return new StringCoding.Result();
+}};
+////////////////////////// ascii //////////////////////////////
+private static Result decodeASCII(byte[] ba, int off, int len) {
+Result result = resultCached.get();
+if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
+return result.with(Arrays.copyOfRange(ba, off, off + len),
+LATIN1);
+}
+byte[] dst = new byte[len<<1];
+int dp = 0;
+while (dp < len) {
+int b = ba[off++];
+putChar(dst, dp++, (b >= 0) ? (char)b : repl);
+}
+return result.with(dst, UTF16);
+}
+private static byte[] encodeASCII(byte coder, byte[] val) {
+if (coder == LATIN1) {
+byte[] dst = new byte[val.length];
+for (int i = 0; i < val.length; i++) {
+if (val[i] < 0) {
+dst[i] = '?';
+} else {
+dst[i] = val[i];
+}
+}
+return dst;
+}
+int len = val.length >> 1;
+byte[] dst = new byte[len];
+int dp = 0;
+for (int i = 0; i < len; i++) {
+char c = StringUTF16.getChar(val, i);
+if (c < 0x80) {
+dst[dp++] = (byte)c;
+continue;
+}
+if (Character.isHighSurrogate(c) && i + 1 < len &&
+Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
+i++;
+}
+dst[dp++] = '?';
+}
+if (len == dp) {
+return dst;
+}
+return Arrays.copyOf(dst, dp);
+}
+////////////////////////// latin1/8859_1 ///////////////////////////
+private static Result decodeLatin1(byte[] ba, int off, int len) {
+Result result = resultCached.get();
+if (COMPACT_STRINGS) {
+return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
+} else {
+return result.with(StringLatin1.inflate(ba, off, len), UTF16);
+}
+}
 @HotSpotIntrinsicCandidate
 private static int implEncodeISOArray(byte[] sa, int sp,
 byte[] da, int dp, int len) {
 int i = 0;
 for (; i < len; i++) {
 da[dp++] = (byte)c;
 }
 return i;
 }
-static byte[] encode8859_1(byte coder, byte[] val) {
+private static byte[] encode8859_1(byte coder, byte[] val) {
 if (coder == LATIN1) {
 return Arrays.copyOf(val, val.length);
 }
 int len = val.length >> 1;
 byte[] dst = new byte[len];
 return dst;
 }
 return Arrays.copyOf(dst, dp);
 }
-static byte[] encodeASCII(byte coder, byte[] val) {
+//////////////////////////////// utf8 ////////////////////////////////////
-if (coder == LATIN1) {
-byte[] dst = new byte[val.length];
+private static boolean isNotContinuation(int b) {
-for (int i = 0; i < val.length; i++) {
+return (b & 0xc0) != 0x80;
-if (val[i] < 0) {
+}
-dst[i] = '?';
-} else {
+private static boolean isMalformed3(int b1, int b2, int b3) {
-dst[i] = val[i];
+return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
-}
+(b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
 }
+private static boolean isMalformed3_2(int b1, int b2) {
+return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+(b2 & 0xc0) != 0x80;
+}
+private static boolean isMalformed4(int b2, int b3, int b4) {
+return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
+(b4 & 0xc0) != 0x80;
+}
+private static boolean isMalformed4_2(int b1, int b2) {
+return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
+(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
+(b2 & 0xc0) != 0x80;
+}
+private static boolean isMalformed4_3(int b3) {
+return (b3 & 0xc0) != 0x80;
+}
+// for nb == 3/4
+private static int malformedN(byte[] src, int sp, int nb) {
+if (nb == 3) {
+int b1 = src[sp++];
+int b2 = src[sp++];    // no need to lookup b3
+return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+isNotContinuation(b2)) ? 1 : 2;
+} else if (nb == 4) { // we don't care the speed here
+int b1 = src[sp++] & 0xff;
+int b2 = src[sp++] & 0xff;
+if (b1 > 0xf4 ||
+(b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
+(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
+isNotContinuation(b2))
+return 1;
+if (isNotContinuation(src[sp++]))
+return 2;
+return 3;
+}
+assert false;
+return -1;
+}
+private static void throwMalformed(int off, int nb) {
+throw new IllegalArgumentException("malformed input off : " + off +
+", length : " + nb);
+}
+private static char repl = '\ufffd';
+private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
+// ascii-bais, which has a relative impact to the non-ascii-only bytes
+if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
+return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
+LATIN1);
+return decodeUTF8_0(src, sp, len, doReplace);
+}
+private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
+Result ret = resultCached.get();
+int sl = sp + len;
+int dp = 0;
+byte[] dst = new byte[len];
+if (COMPACT_STRINGS) {
+while (sp < sl) {
+int b1 = src[sp];
+if (b1 >= 0) {
+dst[dp++] = (byte)b1;
+sp++;
+continue;
+}
+if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
+sp + 1 < sl) {
+int b2 = src[sp + 1];
+if (!isNotContinuation(b2)) {
+dst[dp++] = (byte)(((b1 << 6) ^ b2)^
+(((byte) 0xC0 << 6) ^
+((byte) 0x80 << 0)));
+sp += 2;
+continue;
+}
+}
+// anything not a latin1, including the repl
+// we have to go with the utf16
+break;
+}
+if (sp == sl) {
+if (dp != dst.length) {
+dst = Arrays.copyOf(dst, dp);
+}
+return ret.with(dst, LATIN1);
+}
+}
+if (dp == 0) {
+dst = new byte[len << 1];
+} else {
+byte[] buf = new byte[len << 1];
+StringLatin1.inflate(dst, 0, buf, 0, dp);
+dst = buf;
+}
+while (sp < sl) {
+int b1 = src[sp++];
+if (b1 >= 0) {
+putChar(dst, dp++, (char) b1);
+} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+if (sp < sl) {
+int b2 = src[sp++];
+if (isNotContinuation(b2)) {
+if (!doReplace) {
+throwMalformed(sp - 1, 1);
+}
+putChar(dst, dp++, repl);
+sp--;
+} else {
+putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
+(((byte) 0xC0 << 6) ^
+((byte) 0x80 << 0))));
+}
+continue;
+}
+if (!doReplace) {
+throwMalformed(sp, 1);  // underflow()
+}
+putChar(dst, dp++, repl);
+break;
+} else if ((b1 >> 4) == -2) {
+if (sp + 1 < sl) {
+int b2 = src[sp++];
+int b3 = src[sp++];
+if (isMalformed3(b1, b2, b3)) {
+if (!doReplace) {
+throwMalformed(sp - 3, 3);
+}
+putChar(dst, dp++, repl);
+sp -= 3;
+sp += malformedN(src, sp, 3);
+} else {
+char c = (char)((b1 << 12) ^
+(b2 <<  6) ^
+(b3 ^
+(((byte) 0xE0 << 12) ^
+((byte) 0x80 <<  6) ^
+((byte) 0x80 <<  0))));
+if (isSurrogate(c)) {
+if (!doReplace) {
+throwMalformed(sp - 3, 3);
+}
+putChar(dst, dp++, repl);
+} else {
+putChar(dst, dp++, c);
+}
+}
+continue;
+}
+if (sp  < sl && isMalformed3_2(b1, src[sp])) {
+if (!doReplace) {
+throwMalformed(sp - 1, 2);
+}
+putChar(dst, dp++, repl);
+continue;
+}
+if (!doReplace){
+throwMalformed(sp, 1);
+}
+putChar(dst, dp++, repl);
+break;
+} else if ((b1 >> 3) == -2) {
+if (sp + 2 < sl) {
+int b2 = src[sp++];
+int b3 = src[sp++];
+int b4 = src[sp++];
+int uc = ((b1 << 18) ^
+(b2 << 12) ^
+(b3 <<  6) ^
+(b4 ^
+(((byte) 0xF0 << 18) ^
+((byte) 0x80 << 12) ^
+((byte) 0x80 <<  6) ^
+((byte) 0x80 <<  0))));
+if (isMalformed4(b2, b3, b4) ||
+!isSupplementaryCodePoint(uc)) { // shortest form check
+if (!doReplace) {
+throwMalformed(sp - 4, 4);
+}
+putChar(dst, dp++, repl);
+sp -= 4;
+sp += malformedN(src, sp, 4);
+} else {
+putChar(dst, dp++, highSurrogate(uc));
+putChar(dst, dp++, lowSurrogate(uc));
+}
+continue;
+}
+b1 &= 0xff;
+if (b1 > 0xf4 ||
+sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
+if (!doReplace) {
+throwMalformed(sp - 1, 1);  // or 2
+}
+putChar(dst, dp++, repl);
+continue;
+}
+if (!doReplace) {
+throwMalformed(sp - 1, 1);
+}
+sp++;
+putChar(dst, dp++, repl);
+if (sp  < sl && isMalformed4_3(src[sp])) {
+continue;
+}
+break;
+} else {
+if (!doReplace) {
+throwMalformed(sp - 1, 1);
+}
+putChar(dst, dp++, repl);
+}
+}
+if (dp != len) {
+dst = Arrays.copyOf(dst, dp << 1);
+}
+return ret.with(dst, UTF16);
+}
+private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
+if (coder == UTF16)
+return encodeUTF8_UTF16(val, doReplace);
+if (!hasNegatives(val, 0, val.length))
+return Arrays.copyOf(val, val.length);
+int dp = 0;
+byte[] dst = new byte[val.length << 1];
+for (int sp = 0; sp < val.length; sp++) {
+byte c = val[sp];
+if (c < 0) {
+dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
+dst[dp++] = (byte)(0x80 | (c & 0x3f));
+} else {
+dst[dp++] = c;
+}
+}
+if (dp == dst.length)
 return dst;
-}
+return Arrays.copyOf(dst, dp);
-int len = val.length >> 1;
+}
-byte[] dst = new byte[len];
+private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
 int dp = 0;
-for (int i = 0; i < len; i++) {
+int sp = 0;
-char c = StringUTF16.getChar(val, i);
+int sl = val.length >> 1;
+byte[] dst = new byte[sl * 3];
+char c;
+while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
+// ascii fast loop;
+dst[dp++] = (byte)c;
+sp++;
+}
+while (sp < sl) {
+c = StringUTF16.getChar(val, sp++);
 if (c < 0x80) {
 dst[dp++] = (byte)c;
-continue;
+} else if (c < 0x800) {
-}
+dst[dp++] = (byte)(0xc0 | (c >> 6));
-if (Character.isHighSurrogate(c) && i + 1 < len &&
+dst[dp++] = (byte)(0x80 | (c & 0x3f));
-Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
+} else if (Character.isSurrogate(c)) {
-i++;
+int uc = -1;
-}
+char c2;
-dst[dp++] = '?';
+if (Character.isHighSurrogate(c) && sp < sl &&
-}
+Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
-if (len == dp) {
+uc = Character.toCodePoint(c, c2);
-return dst;
+}
-}
+if (uc < 0) {
-return Arrays.copyOf(dst, dp);
+if (doReplace) {
-}
-static byte[] encodeUTF8(byte coder, byte[] val) {
-int dp = 0;
-byte[] dst;
-if (coder == LATIN1) {
-dst = new byte[val.length << 1];
-for (int sp = 0; sp < val.length; sp++) {
-byte c = val[sp];
-if (c < 0) {
-dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
-dst[dp++] = (byte)(0x80 | (c & 0x3f));
-} else {
-dst[dp++] = c;
-}
-}
-} else {
-int sp = 0;
-int sl = val.length >> 1;
-dst = new byte[sl * 3];
-char c;
-while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
-// ascii fast loop;
-dst[dp++] = (byte)c;
-sp++;
-}
-while (sp < sl) {
-c = StringUTF16.getChar(val, sp++);
-if (c < 0x80) {
-dst[dp++] = (byte)c;
-} else if (c < 0x800) {
-dst[dp++] = (byte)(0xc0 | (c >> 6));
-dst[dp++] = (byte)(0x80 | (c & 0x3f));
-} else if (Character.isSurrogate(c)) {
-int uc = -1;
-char c2;
-if (Character.isHighSurrogate(c) && sp < sl &&
-Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
-uc = Character.toCodePoint(c, c2);
-}
-if (uc < 0) {
 dst[dp++] = '?';
 } else {
-dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
+throwMalformed(sp - 1, 1); // or 2, does not matter here
-dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
-dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
-dst[dp++] = (byte)(0x80 | (uc & 0x3f));
-sp++;  // 2 chars
 }
 } else {
-// 3 bytes, 16 bits
+dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
-dst[dp++] = (byte)(0xe0 | ((c >> 12)));
+dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
-dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
+dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
-dst[dp++] = (byte)(0x80 | (c & 0x3f));
+dst[dp++] = (byte)(0x80 | (uc & 0x3f));
-}
+sp++;  // 2 chars
+}
+} else {
+// 3 bytes, 16 bits
+dst[dp++] = (byte)(0xe0 | ((c >> 12)));
+dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
+dst[dp++] = (byte)(0x80 | (c & 0x3f));
 }
 }
 if (dp == dst.length) {
 return dst;
 }
 return Arrays.copyOf(dst, dp);
 }
-static byte[] encode(String charsetName, byte coder, byte[] val)
+////////////////////// for j.u.z.ZipCoder //////////////////////////
-throws UnsupportedEncodingException
-{
+/*
-StringEncoder se = deref(encoder);
+* Throws iae, instead of replacing, if malformed or unmappble.
-String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
-if ((se == null) || !(csn.equals(se.requestedCharsetName())
-|| csn.equals(se.charsetName()))) {
-se = null;
-try {
-Charset cs = lookupCharset(csn);
-if (cs != null) {
-if (cs == UTF_8) {
-return encodeUTF8(coder, val);
-} else if (cs == ISO_8859_1) {
-return encode8859_1(coder, val);
-} else if (cs == US_ASCII) {
-return encodeASCII(coder, val);
-}
-se = new StringEncoder(cs, csn);
-}
-} catch (IllegalCharsetNameException x) {}
-if (se == null) {
-throw new UnsupportedEncodingException (csn);
-}
-set(encoder, se);
-}
-return se.encode(coder, val);
-}
-static byte[] encode(Charset cs, byte coder, byte[] val) {
-if (cs == UTF_8) {
-return encodeUTF8(coder, val);
-} else if (cs == ISO_8859_1) {
-return encode8859_1(coder, val);
-} else if (cs == US_ASCII) {
-return encodeASCII(coder, val);
-}
-CharsetEncoder ce = cs.newEncoder();
-// fastpath for ascii compatible
-if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
-((ArrayEncoder)ce).isASCIICompatible() &&
-!hasNegatives(val, 0, val.length)))) {
-return Arrays.copyOf(val, val.length);
-}
-int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
-int en = scale(len, ce.maxBytesPerChar());
-byte[] ba = new byte[en];
-if (len == 0) {
-return ba;
-}
-boolean isTrusted = cs.getClass().getClassLoader0() == null ||
-System.getSecurityManager() == null;
-ce.onMalformedInput(CodingErrorAction.REPLACE)
-.onUnmappableCharacter(CodingErrorAction.REPLACE)
-.reset();
-if (ce instanceof ArrayEncoder) {
-if (!isTrusted) {
-val = Arrays.copyOf(val, val.length);
-}
-int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
-: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
-if (blen != -1) {
-return safeTrim(ba, blen, isTrusted);
-}
-}
-char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
-: StringUTF16.toChars(val);
-ByteBuffer bb = ByteBuffer.wrap(ba);
-CharBuffer cb = CharBuffer.wrap(ca, 0, len);
-try {
-CoderResult cr = ce.encode(cb, bb, true);
-if (!cr.isUnderflow())
-cr.throwException();
-cr = ce.flush(bb);
-if (!cr.isUnderflow())
-cr.throwException();
-} catch (CharacterCodingException x) {
-throw new Error(x);
-}
-return safeTrim(ba, bb.position(), isTrusted);
-}
-static byte[] encode(byte coder, byte[] val) {
-String csn = Charset.defaultCharset().name();
-try {
-// use charset name encode() variant which provides caching.
-return encode(csn, coder, val);
-} catch (UnsupportedEncodingException x) {
-warnUnsupportedCharset(csn);
-}
-try {
-return encode("ISO-8859-1", coder, val);
-} catch (UnsupportedEncodingException x) {
-// If this code is hit during VM initialization, err(String) is
-// the only way we will be able to get any kind of error message.
-err("ISO-8859-1 charset not available: " + x.toString() + "\n");
-// If we can not find ISO-8859-1 (a required encoding) then things
-// are seriously wrong with the installation.
-System.exit(1);
-return null;
-}
-}
-/**
-*  Print a message directly to stderr, bypassing all character conversion
-*  methods.
-*  @param msg  message to print
 */
-private static native void err(String msg);
+static String newStringUTF8NoRepl(byte[] src, int off, int len) {
+if (COMPACT_STRINGS && !hasNegatives(src, off, len))
+return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
+Result ret = decodeUTF8_0(src, off, len, false);
+return new String(ret.value, ret.coder);
+}
+/*
+* Throws iae, instead of replacing, if unmappble.
+*/
+static byte[] getBytesUTF8NoRepl(String s) {
+return encodeUTF8(s.coder(), s.value(), false);
+}
 }

changeset 48262	daf3b49f4839
parent 47216	71c04702a3d5
child 49443	e5679a6661d6