src/java.base/share/classes/java/lang/StringCoding.java
changeset 48262 daf3b49f4839
parent 47216 71c04702a3d5
child 49443 e5679a6661d6
equal deleted inserted replaced
48261:43edfde828ab 48262:daf3b49f4839
     1 /*
     1 /*
     2  * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
     2  * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     7  * published by the Free Software Foundation.  Oracle designates this
    45 import sun.nio.cs.StandardCharsets;
    45 import sun.nio.cs.StandardCharsets;
    46 
    46 
    47 import static java.lang.String.LATIN1;
    47 import static java.lang.String.LATIN1;
    48 import static java.lang.String.UTF16;
    48 import static java.lang.String.UTF16;
    49 import static java.lang.String.COMPACT_STRINGS;
    49 import static java.lang.String.COMPACT_STRINGS;
       
    50 import static java.lang.Character.isSurrogate;
       
    51 import static java.lang.Character.highSurrogate;
       
    52 import static java.lang.Character.lowSurrogate;
       
    53 import static java.lang.Character.isSupplementaryCodePoint;
       
    54 import static java.lang.StringUTF16.putChar;
    50 
    55 
    51 /**
    56 /**
    52  * Utility class for string encoding and decoding.
    57  * Utility class for string encoding and decoding.
    53  */
    58  */
    54 
    59 
    64 
    69 
    65     private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
    70     private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
    66     private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
    71     private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
    67     private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
    72     private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
    68 
    73 
    69     private static boolean warnUnsupportedCharset = true;
       
    70 
       
    71     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
    74     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
    72         SoftReference<T> sr = tl.get();
    75         SoftReference<T> sr = tl.get();
    73         if (sr == null)
    76         if (sr == null)
    74             return null;
    77             return null;
    75         return sr.get();
    78         return sr.get();
    78     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
    81     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
    79         tl.set(new SoftReference<>(ob));
    82         tl.set(new SoftReference<>(ob));
    80     }
    83     }
    81 
    84 
    82     // Trim the given byte array to the given length
    85     // Trim the given byte array to the given length
    83     //
       
    84     private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
    86     private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
    85         if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
    87         if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
    86             return ba;
    88             return ba;
    87         else
    89         else
    88             return Arrays.copyOf(ba, len);
    90             return Arrays.copyOf(ba, len);
   101             } catch (UnsupportedCharsetException x) {
   103             } catch (UnsupportedCharsetException x) {
   102                 throw new Error(x);
   104                 throw new Error(x);
   103             }
   105             }
   104         }
   106         }
   105         return null;
   107         return null;
   106     }
       
   107 
       
   108     private static void warnUnsupportedCharset(String csn) {
       
   109         if (warnUnsupportedCharset) {
       
   110             // Use err(String) rather than the Logging API or System.err
       
   111             // since this method may be called during VM initialization
       
   112             // before either is available.
       
   113             err("WARNING: Default charset " + csn +
       
   114                 " not supported, using ISO-8859-1 instead\n");
       
   115             warnUnsupportedCharset = false;
       
   116         }
       
   117     }
   108     }
   118 
   109 
   119     static class Result {
   110     static class Result {
   120         byte[] value;
   111         byte[] value;
   121         byte coder;
   112         byte coder;
   222             }
   213             }
   223             return result.with(ca, 0, cb.position());
   214             return result.with(ca, 0, cb.position());
   224         }
   215         }
   225     }
   216     }
   226 
   217 
   227     private static class StringDecoder8859_1 extends StringDecoder {
       
   228         StringDecoder8859_1(Charset cs, String rcn) {
       
   229             super(cs, rcn);
       
   230         }
       
   231         Result decode(byte[] ba, int off, int len) {
       
   232             if (COMPACT_STRINGS) {
       
   233                 return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
       
   234             } else {
       
   235                 return result.with(StringLatin1.inflate(ba, off, len), UTF16);
       
   236             }
       
   237         }
       
   238     }
       
   239 
       
   240     static Result decode(String charsetName, byte[] ba, int off, int len)
   218     static Result decode(String charsetName, byte[] ba, int off, int len)
   241         throws UnsupportedEncodingException
   219         throws UnsupportedEncodingException
   242     {
   220     {
   243         StringDecoder sd = deref(decoder);
   221         StringDecoder sd = deref(decoder);
   244         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
   222         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
   247             sd = null;
   225             sd = null;
   248             try {
   226             try {
   249                 Charset cs = lookupCharset(csn);
   227                 Charset cs = lookupCharset(csn);
   250                 if (cs != null) {
   228                 if (cs != null) {
   251                     if (cs == UTF_8) {
   229                     if (cs == UTF_8) {
   252                         sd = new StringDecoderUTF8(cs, csn);
   230                         return decodeUTF8(ba, off, len, true);
   253                     } else if (cs == ISO_8859_1) {
   231                     }
   254                         sd = new StringDecoder8859_1(cs, csn);
   232                     if (cs == ISO_8859_1) {
   255                     } else {
   233                         return decodeLatin1(ba, off, len);
   256                         sd = new StringDecoder(cs, csn);
   234                     }
   257                     }
   235                     if (cs == US_ASCII) {
       
   236                         return decodeASCII(ba, off, len);
       
   237                     }
       
   238                     sd = new StringDecoder(cs, csn);
   258                 }
   239                 }
   259             } catch (IllegalCharsetNameException x) {}
   240             } catch (IllegalCharsetNameException x) {}
   260             if (sd == null)
   241             if (sd == null)
   261                 throw new UnsupportedEncodingException(csn);
   242                 throw new UnsupportedEncodingException(csn);
   262             set(decoder, sd);
   243             set(decoder, sd);
   263         }
   244         }
   264         return sd.decode(ba, off, len);
   245         return sd.decode(ba, off, len);
   265     }
   246     }
   266 
   247 
   267     static Result decode(Charset cs, byte[] ba, int off, int len) {
   248     static Result decode(Charset cs, byte[] ba, int off, int len) {
       
   249         if (cs == UTF_8) {
       
   250             return decodeUTF8(ba, off, len, true);
       
   251         }
       
   252         if (cs == ISO_8859_1) {
       
   253             return decodeLatin1(ba, off, len);
       
   254         }
       
   255         if (cs == US_ASCII) {
       
   256             return decodeASCII(ba, off, len);
       
   257         }
       
   258 
   268         // (1)We never cache the "external" cs, the only benefit of creating
   259         // (1)We never cache the "external" cs, the only benefit of creating
   269         // an additional StringDe/Encoder object to wrap it is to share the
   260         // an additional StringDe/Encoder object to wrap it is to share the
   270         // de/encode() method. These SD/E objects are short-lived, the young-gen
   261         // de/encode() method. These SD/E objects are short-lived, the young-gen
   271         // gc should be able to take care of them well. But the best approach
   262         // gc should be able to take care of them well. But the best approach
   272         // is still not to generate them if not really necessary.
   263         // is still not to generate them if not really necessary.
   278         // possible that the SM==null for now but then SM is NOT null later
   269         // possible that the SM==null for now but then SM is NOT null later
   279         // when safeTrim() is invoked...the "safe" way to do is to redundant
   270         // when safeTrim() is invoked...the "safe" way to do is to redundant
   280         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
   271         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
   281         // but it then can be argued that the SM is null when the operation
   272         // but it then can be argued that the SM is null when the operation
   282         // is started...
   273         // is started...
   283         if (cs == UTF_8) {
       
   284             return StringDecoderUTF8.decode(ba, off, len, new Result());
       
   285         }
       
   286         CharsetDecoder cd = cs.newDecoder();
   274         CharsetDecoder cd = cs.newDecoder();
   287         // ascii fastpath
   275         // ascii fastpath
   288         if (cs == ISO_8859_1 || ((cd instanceof ArrayDecoder) &&
   276         if ((cd instanceof ArrayDecoder) &&
   289                                  ((ArrayDecoder)cd).isASCIICompatible() &&
   277             ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
   290                                  !hasNegatives(ba, off, len))) {
   278             return decodeLatin1(ba, off, len);
   291              if (COMPACT_STRINGS) {
       
   292                  return new Result().with(Arrays.copyOfRange(ba, off, off + len),
       
   293                                           LATIN1);
       
   294              } else {
       
   295                  return new Result().with(StringLatin1.inflate(ba, off, len), UTF16);
       
   296              }
       
   297         }
   279         }
   298         int en = scale(len, cd.maxCharsPerByte());
   280         int en = scale(len, cd.maxCharsPerByte());
   299         if (len == 0) {
   281         if (len == 0) {
   300             return new Result().with();
   282             return new Result().with();
   301         }
   283         }
   302         if (cs.getClass().getClassLoader0() != null &&
       
   303             System.getSecurityManager() != null) {
       
   304             ba =  Arrays.copyOfRange(ba, off, off + len);
       
   305             off = 0;
       
   306         }
       
   307         cd.onMalformedInput(CodingErrorAction.REPLACE)
   284         cd.onMalformedInput(CodingErrorAction.REPLACE)
   308           .onUnmappableCharacter(CodingErrorAction.REPLACE)
   285           .onUnmappableCharacter(CodingErrorAction.REPLACE)
   309           .reset();
   286           .reset();
   310 
       
   311         char[] ca = new char[en];
   287         char[] ca = new char[en];
   312         if (cd instanceof ArrayDecoder) {
   288         if (cd instanceof ArrayDecoder) {
   313             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
   289             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
   314             return new Result().with(ca, 0, clen);
   290             return new Result().with(ca, 0, clen);
       
   291         }
       
   292         if (cs.getClass().getClassLoader0() != null &&
       
   293             System.getSecurityManager() != null) {
       
   294             ba = Arrays.copyOfRange(ba, off, off + len);
       
   295             off = 0;
   315         }
   296         }
   316         ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
   297         ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
   317         CharBuffer cb = CharBuffer.wrap(ca);
   298         CharBuffer cb = CharBuffer.wrap(ca);
   318         try {
   299         try {
   319             CoderResult cr = cd.decode(bb, cb, true);
   300             CoderResult cr = cd.decode(bb, cb, true);
   329         }
   310         }
   330         return new Result().with(ca, 0, cb.position());
   311         return new Result().with(ca, 0, cb.position());
   331     }
   312     }
   332 
   313 
   333     static Result decode(byte[] ba, int off, int len) {
   314     static Result decode(byte[] ba, int off, int len) {
   334         String csn = Charset.defaultCharset().name();
   315         Charset cs = Charset.defaultCharset();
   335         try {
   316         if (cs == UTF_8) {
   336             // use charset name decode() variant which provides caching.
   317             return decodeUTF8(ba, off, len, true);
   337             return decode(csn, ba, off, len);
   318         }
   338         } catch (UnsupportedEncodingException x) {
   319         if (cs == ISO_8859_1) {
   339             warnUnsupportedCharset(csn);
   320             return decodeLatin1(ba, off, len);
   340         }
   321         }
   341         try {
   322         if (cs == US_ASCII) {
   342             return decode("ISO-8859-1", ba, off, len);
   323             return decodeASCII(ba, off, len);
   343         } catch (UnsupportedEncodingException x) {
   324         }
   344             // If this code is hit during VM initialization, err(String) is
   325         StringDecoder sd = deref(decoder);
   345             // the only way we will be able to get any kind of error message.
   326         if (sd == null || !cs.name().equals(sd.cs.name())) {
   346             err("ISO-8859-1 charset not available: " + x.toString() + "\n");
   327             sd = new StringDecoder(cs, cs.name());
   347             // If we can not find ISO-8859-1 (a required encoding) then things
   328             set(decoder, sd);
   348             // are seriously wrong with the installation.
   329         }
   349             System.exit(1);
   330         return sd.decode(ba, off, len);
   350             return null;
       
   351         }
       
   352     }
   331     }
   353 
   332 
   354     // -- Encoding --
   333     // -- Encoding --
   355     private static class StringEncoder {
   334     private static class StringEncoder {
   356         private Charset cs;
   335         private Charset cs;
   391             byte[] ba = new byte[en];
   370             byte[] ba = new byte[en];
   392             if (len == 0) {
   371             if (len == 0) {
   393                 return ba;
   372                 return ba;
   394             }
   373             }
   395             if (ce instanceof ArrayEncoder) {
   374             if (ce instanceof ArrayEncoder) {
   396                 if (!isTrusted) {
       
   397                     val = Arrays.copyOf(val, val.length);
       
   398                 }
       
   399                 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
   375                 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
   400                                               : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
   376                                               : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
   401                 if (blen != -1) {
   377                 if (blen != -1) {
   402                     return safeTrim(ba, blen, isTrusted);
   378                     return safeTrim(ba, blen, isTrusted);
   403                 }
   379                 }
   421             }
   397             }
   422             return safeTrim(ba, bb.position(), isTrusted);
   398             return safeTrim(ba, bb.position(), isTrusted);
   423         }
   399         }
   424     }
   400     }
   425 
   401 
       
   402     static byte[] encode(String charsetName, byte coder, byte[] val)
       
   403         throws UnsupportedEncodingException
       
   404     {
       
   405         StringEncoder se = deref(encoder);
       
   406         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
       
   407         if ((se == null) || !(csn.equals(se.requestedCharsetName())
       
   408                               || csn.equals(se.charsetName()))) {
       
   409             se = null;
       
   410             try {
       
   411                 Charset cs = lookupCharset(csn);
       
   412                 if (cs != null) {
       
   413                     if (cs == UTF_8) {
       
   414                         return encodeUTF8(coder, val, true);
       
   415                     }
       
   416                     if (cs == ISO_8859_1) {
       
   417                         return encode8859_1(coder, val);
       
   418                     }
       
   419                     if (cs == US_ASCII) {
       
   420                         return encodeASCII(coder, val);
       
   421                     }
       
   422                     se = new StringEncoder(cs, csn);
       
   423                 }
       
   424             } catch (IllegalCharsetNameException x) {}
       
   425             if (se == null) {
       
   426                 throw new UnsupportedEncodingException (csn);
       
   427             }
       
   428             set(encoder, se);
       
   429         }
       
   430         return se.encode(coder, val);
       
   431     }
       
   432 
       
   433     static byte[] encode(Charset cs, byte coder, byte[] val) {
       
   434         if (cs == UTF_8) {
       
   435             return encodeUTF8(coder, val, true);
       
   436         }
       
   437         if (cs == ISO_8859_1) {
       
   438             return encode8859_1(coder, val);
       
   439         }
       
   440         if (cs == US_ASCII) {
       
   441             return encodeASCII(coder, val);
       
   442         }
       
   443         CharsetEncoder ce = cs.newEncoder();
       
   444         // fastpath for ascii compatible
       
   445         if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
       
   446                                  ((ArrayEncoder)ce).isASCIICompatible() &&
       
   447                                  !hasNegatives(val, 0, val.length)))) {
       
   448             return Arrays.copyOf(val, val.length);
       
   449         }
       
   450         int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
       
   451         int en = scale(len, ce.maxBytesPerChar());
       
   452         byte[] ba = new byte[en];
       
   453         if (len == 0) {
       
   454             return ba;
       
   455         }
       
   456         ce.onMalformedInput(CodingErrorAction.REPLACE)
       
   457           .onUnmappableCharacter(CodingErrorAction.REPLACE)
       
   458           .reset();
       
   459         if (ce instanceof ArrayEncoder) {
       
   460             int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
       
   461                                           : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
       
   462             if (blen != -1) {
       
   463                 return safeTrim(ba, blen, true);
       
   464             }
       
   465         }
       
   466         boolean isTrusted = cs.getClass().getClassLoader0() == null ||
       
   467                             System.getSecurityManager() == null;
       
   468         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
       
   469                                        : StringUTF16.toChars(val);
       
   470         ByteBuffer bb = ByteBuffer.wrap(ba);
       
   471         CharBuffer cb = CharBuffer.wrap(ca, 0, len);
       
   472         try {
       
   473             CoderResult cr = ce.encode(cb, bb, true);
       
   474             if (!cr.isUnderflow())
       
   475                 cr.throwException();
       
   476             cr = ce.flush(bb);
       
   477             if (!cr.isUnderflow())
       
   478                 cr.throwException();
       
   479         } catch (CharacterCodingException x) {
       
   480             throw new Error(x);
       
   481         }
       
   482         return safeTrim(ba, bb.position(), isTrusted);
       
   483     }
       
   484 
       
   485     static byte[] encode(byte coder, byte[] val) {
       
   486         Charset cs = Charset.defaultCharset();
       
   487         if (cs == UTF_8) {
       
   488             return encodeUTF8(coder, val, true);
       
   489         }
       
   490         if (cs == ISO_8859_1) {
       
   491             return encode8859_1(coder, val);
       
   492         }
       
   493         if (cs == US_ASCII) {
       
   494             return encodeASCII(coder, val);
       
   495         }
       
   496         StringEncoder se = deref(encoder);
       
   497         if (se == null || !cs.name().equals(se.cs.name())) {
       
   498             se = new StringEncoder(cs, cs.name());
       
   499             set(encoder, se);
       
   500         }
       
   501         return se.encode(coder, val);
       
   502     }
       
   503 
       
   504     /**
       
   505      *  Print a message directly to stderr, bypassing all character conversion
       
   506      *  methods.
       
   507      *  @param msg  message to print
       
   508      */
       
   509     private static native void err(String msg);
       
   510 
       
   511      /* The cached Result for each thread */
       
   512     private static final ThreadLocal<StringCoding.Result>
       
   513         resultCached = new ThreadLocal<>() {
       
   514             protected StringCoding.Result initialValue() {
       
   515                 return new StringCoding.Result();
       
   516             }};
       
   517 
       
   518     ////////////////////////// ascii //////////////////////////////
       
   519 
       
   520     private static Result decodeASCII(byte[] ba, int off, int len) {
       
   521         Result result = resultCached.get();
       
   522         if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
       
   523             return result.with(Arrays.copyOfRange(ba, off, off + len),
       
   524                                LATIN1);
       
   525         }
       
   526         byte[] dst = new byte[len<<1];
       
   527         int dp = 0;
       
   528         while (dp < len) {
       
   529             int b = ba[off++];
       
   530             putChar(dst, dp++, (b >= 0) ? (char)b : repl);
       
   531         }
       
   532         return result.with(dst, UTF16);
       
   533     }
       
   534 
       
   535     private static byte[] encodeASCII(byte coder, byte[] val) {
       
   536         if (coder == LATIN1) {
       
   537             byte[] dst = new byte[val.length];
       
   538             for (int i = 0; i < val.length; i++) {
       
   539                 if (val[i] < 0) {
       
   540                     dst[i] = '?';
       
   541                 } else {
       
   542                     dst[i] = val[i];
       
   543                 }
       
   544             }
       
   545             return dst;
       
   546         }
       
   547         int len = val.length >> 1;
       
   548         byte[] dst = new byte[len];
       
   549         int dp = 0;
       
   550         for (int i = 0; i < len; i++) {
       
   551             char c = StringUTF16.getChar(val, i);
       
   552             if (c < 0x80) {
       
   553                 dst[dp++] = (byte)c;
       
   554                 continue;
       
   555             }
       
   556             if (Character.isHighSurrogate(c) && i + 1 < len &&
       
   557                 Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
       
   558                 i++;
       
   559             }
       
   560             dst[dp++] = '?';
       
   561         }
       
   562         if (len == dp) {
       
   563             return dst;
       
   564         }
       
   565         return Arrays.copyOf(dst, dp);
       
   566     }
       
   567 
       
   568     ////////////////////////// latin1/8859_1 ///////////////////////////
       
   569 
       
   570     private static Result decodeLatin1(byte[] ba, int off, int len) {
       
   571        Result result = resultCached.get();
       
   572        if (COMPACT_STRINGS) {
       
   573            return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
       
   574        } else {
       
   575            return result.with(StringLatin1.inflate(ba, off, len), UTF16);
       
   576        }
       
   577     }
       
   578 
   426     @HotSpotIntrinsicCandidate
   579     @HotSpotIntrinsicCandidate
   427     private static int implEncodeISOArray(byte[] sa, int sp,
   580     private static int implEncodeISOArray(byte[] sa, int sp,
   428                                           byte[] da, int dp, int len) {
   581                                           byte[] da, int dp, int len) {
   429         int i = 0;
   582         int i = 0;
   430         for (; i < len; i++) {
   583         for (; i < len; i++) {
   434             da[dp++] = (byte)c;
   587             da[dp++] = (byte)c;
   435         }
   588         }
   436         return i;
   589         return i;
   437     }
   590     }
   438 
   591 
   439     static byte[] encode8859_1(byte coder, byte[] val) {
   592     private static byte[] encode8859_1(byte coder, byte[] val) {
   440         if (coder == LATIN1) {
   593         if (coder == LATIN1) {
   441             return Arrays.copyOf(val, val.length);
   594             return Arrays.copyOf(val, val.length);
   442         }
   595         }
   443         int len = val.length >> 1;
   596         int len = val.length >> 1;
   444         byte[] dst = new byte[len];
   597         byte[] dst = new byte[len];
   463             return dst;
   616             return dst;
   464         }
   617         }
   465         return Arrays.copyOf(dst, dp);
   618         return Arrays.copyOf(dst, dp);
   466     }
   619     }
   467 
   620 
   468     static byte[] encodeASCII(byte coder, byte[] val) {
   621     //////////////////////////////// utf8 ////////////////////////////////////
   469         if (coder == LATIN1) {
   622 
   470             byte[] dst = new byte[val.length];
   623     private static boolean isNotContinuation(int b) {
   471             for (int i = 0; i < val.length; i++) {
   624         return (b & 0xc0) != 0x80;
   472                 if (val[i] < 0) {
   625     }
   473                     dst[i] = '?';
   626 
   474                 } else {
   627     private static boolean isMalformed3(int b1, int b2, int b3) {
   475                     dst[i] = val[i];
   628         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
   476                 }
   629                (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
   477             }
   630     }
       
   631 
       
   632     private static boolean isMalformed3_2(int b1, int b2) {
       
   633         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
       
   634                (b2 & 0xc0) != 0x80;
       
   635     }
       
   636 
       
   637     private static boolean isMalformed4(int b2, int b3, int b4) {
       
   638         return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
       
   639                (b4 & 0xc0) != 0x80;
       
   640     }
       
   641 
       
   642     private static boolean isMalformed4_2(int b1, int b2) {
       
   643         return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
       
   644                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
       
   645                (b2 & 0xc0) != 0x80;
       
   646     }
       
   647 
       
   648     private static boolean isMalformed4_3(int b3) {
       
   649         return (b3 & 0xc0) != 0x80;
       
   650     }
       
   651 
       
   652     // for nb == 3/4
       
   653     private static int malformedN(byte[] src, int sp, int nb) {
       
   654         if (nb == 3) {
       
   655             int b1 = src[sp++];
       
   656             int b2 = src[sp++];    // no need to lookup b3
       
   657             return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
       
   658                     isNotContinuation(b2)) ? 1 : 2;
       
   659         } else if (nb == 4) { // we don't care the speed here
       
   660             int b1 = src[sp++] & 0xff;
       
   661             int b2 = src[sp++] & 0xff;
       
   662             if (b1 > 0xf4 ||
       
   663                 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
       
   664                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
       
   665                 isNotContinuation(b2))
       
   666                 return 1;
       
   667             if (isNotContinuation(src[sp++]))
       
   668                 return 2;
       
   669             return 3;
       
   670         }
       
   671         assert false;
       
   672         return -1;
       
   673     }
       
   674 
       
   675     private static void throwMalformed(int off, int nb) {
       
   676         throw new IllegalArgumentException("malformed input off : " + off +
       
   677                                            ", length : " + nb);
       
   678     }
       
   679 
       
   680     private static char repl = '\ufffd';
       
   681 
       
   682     private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
       
   683         // ascii-bais, which has a relative impact to the non-ascii-only bytes
       
   684         if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
       
   685             return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
       
   686                                            LATIN1);
       
   687         return decodeUTF8_0(src, sp, len, doReplace);
       
   688     }
       
   689 
       
   690     private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
       
   691         Result ret = resultCached.get();
       
   692 
       
   693         int sl = sp + len;
       
   694         int dp = 0;
       
   695         byte[] dst = new byte[len];
       
   696 
       
   697         if (COMPACT_STRINGS) {
       
   698             while (sp < sl) {
       
   699                 int b1 = src[sp];
       
   700                 if (b1 >= 0) {
       
   701                     dst[dp++] = (byte)b1;
       
   702                     sp++;
       
   703                     continue;
       
   704                 }
       
   705                 if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
       
   706                     sp + 1 < sl) {
       
   707                     int b2 = src[sp + 1];
       
   708                     if (!isNotContinuation(b2)) {
       
   709                         dst[dp++] = (byte)(((b1 << 6) ^ b2)^
       
   710                                            (((byte) 0xC0 << 6) ^
       
   711                                            ((byte) 0x80 << 0)));
       
   712                         sp += 2;
       
   713                         continue;
       
   714                     }
       
   715                 }
       
   716                 // anything not a latin1, including the repl
       
   717                 // we have to go with the utf16
       
   718                 break;
       
   719             }
       
   720             if (sp == sl) {
       
   721                 if (dp != dst.length) {
       
   722                     dst = Arrays.copyOf(dst, dp);
       
   723                 }
       
   724                 return ret.with(dst, LATIN1);
       
   725             }
       
   726         }
       
   727         if (dp == 0) {
       
   728             dst = new byte[len << 1];
       
   729         } else {
       
   730             byte[] buf = new byte[len << 1];
       
   731             StringLatin1.inflate(dst, 0, buf, 0, dp);
       
   732             dst = buf;
       
   733         }
       
   734         while (sp < sl) {
       
   735             int b1 = src[sp++];
       
   736             if (b1 >= 0) {
       
   737                 putChar(dst, dp++, (char) b1);
       
   738             } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
       
   739                 if (sp < sl) {
       
   740                     int b2 = src[sp++];
       
   741                     if (isNotContinuation(b2)) {
       
   742                         if (!doReplace) {
       
   743                             throwMalformed(sp - 1, 1);
       
   744                         }
       
   745                         putChar(dst, dp++, repl);
       
   746                         sp--;
       
   747                     } else {
       
   748                         putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
       
   749                                                   (((byte) 0xC0 << 6) ^
       
   750                                                   ((byte) 0x80 << 0))));
       
   751                     }
       
   752                     continue;
       
   753                 }
       
   754                 if (!doReplace) {
       
   755                     throwMalformed(sp, 1);  // underflow()
       
   756                 }
       
   757                 putChar(dst, dp++, repl);
       
   758                 break;
       
   759             } else if ((b1 >> 4) == -2) {
       
   760                 if (sp + 1 < sl) {
       
   761                     int b2 = src[sp++];
       
   762                     int b3 = src[sp++];
       
   763                     if (isMalformed3(b1, b2, b3)) {
       
   764                         if (!doReplace) {
       
   765                             throwMalformed(sp - 3, 3);
       
   766                         }
       
   767                         putChar(dst, dp++, repl);
       
   768                         sp -= 3;
       
   769                         sp += malformedN(src, sp, 3);
       
   770                     } else {
       
   771                         char c = (char)((b1 << 12) ^
       
   772                                         (b2 <<  6) ^
       
   773                                         (b3 ^
       
   774                                          (((byte) 0xE0 << 12) ^
       
   775                                          ((byte) 0x80 <<  6) ^
       
   776                                          ((byte) 0x80 <<  0))));
       
   777                         if (isSurrogate(c)) {
       
   778                             if (!doReplace) {
       
   779                                 throwMalformed(sp - 3, 3);
       
   780                             }
       
   781                             putChar(dst, dp++, repl);
       
   782                         } else {
       
   783                             putChar(dst, dp++, c);
       
   784                         }
       
   785                     }
       
   786                     continue;
       
   787                 }
       
   788                 if (sp  < sl && isMalformed3_2(b1, src[sp])) {
       
   789                     if (!doReplace) {
       
   790                         throwMalformed(sp - 1, 2);
       
   791                     }
       
   792                     putChar(dst, dp++, repl);
       
   793                     continue;
       
   794                 }
       
   795                 if (!doReplace){
       
   796                     throwMalformed(sp, 1);
       
   797                 }
       
   798                 putChar(dst, dp++, repl);
       
   799                 break;
       
   800             } else if ((b1 >> 3) == -2) {
       
   801                 if (sp + 2 < sl) {
       
   802                     int b2 = src[sp++];
       
   803                     int b3 = src[sp++];
       
   804                     int b4 = src[sp++];
       
   805                     int uc = ((b1 << 18) ^
       
   806                               (b2 << 12) ^
       
   807                               (b3 <<  6) ^
       
   808                               (b4 ^
       
   809                                (((byte) 0xF0 << 18) ^
       
   810                                ((byte) 0x80 << 12) ^
       
   811                                ((byte) 0x80 <<  6) ^
       
   812                                ((byte) 0x80 <<  0))));
       
   813                     if (isMalformed4(b2, b3, b4) ||
       
   814                         !isSupplementaryCodePoint(uc)) { // shortest form check
       
   815                         if (!doReplace) {
       
   816                             throwMalformed(sp - 4, 4);
       
   817                         }
       
   818                         putChar(dst, dp++, repl);
       
   819                         sp -= 4;
       
   820                         sp += malformedN(src, sp, 4);
       
   821                     } else {
       
   822                         putChar(dst, dp++, highSurrogate(uc));
       
   823                         putChar(dst, dp++, lowSurrogate(uc));
       
   824                     }
       
   825                     continue;
       
   826                 }
       
   827                 b1 &= 0xff;
       
   828                 if (b1 > 0xf4 ||
       
   829                     sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
       
   830                     if (!doReplace) {
       
   831                         throwMalformed(sp - 1, 1);  // or 2
       
   832                     }
       
   833                     putChar(dst, dp++, repl);
       
   834                     continue;
       
   835                 }
       
   836                 if (!doReplace) {
       
   837                     throwMalformed(sp - 1, 1);
       
   838                 }
       
   839                 sp++;
       
   840                 putChar(dst, dp++, repl);
       
   841                 if (sp  < sl && isMalformed4_3(src[sp])) {
       
   842                     continue;
       
   843                 }
       
   844                 break;
       
   845             } else {
       
   846                 if (!doReplace) {
       
   847                     throwMalformed(sp - 1, 1);
       
   848                 }
       
   849                 putChar(dst, dp++, repl);
       
   850             }
       
   851         }
       
   852         if (dp != len) {
       
   853             dst = Arrays.copyOf(dst, dp << 1);
       
   854         }
       
   855         return ret.with(dst, UTF16);
       
   856     }
       
   857 
       
   858     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
       
   859         if (coder == UTF16)
       
   860             return encodeUTF8_UTF16(val, doReplace);
       
   861 
       
   862         if (!hasNegatives(val, 0, val.length))
       
   863             return Arrays.copyOf(val, val.length);
       
   864 
       
   865         int dp = 0;
       
   866         byte[] dst = new byte[val.length << 1];
       
   867         for (int sp = 0; sp < val.length; sp++) {
       
   868             byte c = val[sp];
       
   869             if (c < 0) {
       
   870                 dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
       
   871                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
       
   872             } else {
       
   873                 dst[dp++] = c;
       
   874             }
       
   875         }
       
   876         if (dp == dst.length)
   478             return dst;
   877             return dst;
   479         }
   878         return Arrays.copyOf(dst, dp);
   480         int len = val.length >> 1;
   879     }
   481         byte[] dst = new byte[len];
   880 
       
   881     private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
   482         int dp = 0;
   882         int dp = 0;
   483         for (int i = 0; i < len; i++) {
   883         int sp = 0;
   484             char c = StringUTF16.getChar(val, i);
   884         int sl = val.length >> 1;
       
   885         byte[] dst = new byte[sl * 3];
       
   886         char c;
       
   887         while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
       
   888             // ascii fast loop;
       
   889             dst[dp++] = (byte)c;
       
   890             sp++;
       
   891         }
       
   892         while (sp < sl) {
       
   893             c = StringUTF16.getChar(val, sp++);
   485             if (c < 0x80) {
   894             if (c < 0x80) {
   486                 dst[dp++] = (byte)c;
   895                 dst[dp++] = (byte)c;
   487                 continue;
   896             } else if (c < 0x800) {
   488             }
   897                 dst[dp++] = (byte)(0xc0 | (c >> 6));
   489             if (Character.isHighSurrogate(c) && i + 1 < len &&
   898                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
   490                 Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
   899             } else if (Character.isSurrogate(c)) {
   491                 i++;
   900                 int uc = -1;
   492             }
   901                 char c2;
   493             dst[dp++] = '?';
   902                 if (Character.isHighSurrogate(c) && sp < sl &&
   494         }
   903                     Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
   495         if (len == dp) {
   904                     uc = Character.toCodePoint(c, c2);
   496             return dst;
   905                 }
   497         }
   906                 if (uc < 0) {
   498         return Arrays.copyOf(dst, dp);
   907                     if (doReplace) {
   499     }
       
   500 
       
   501    static byte[] encodeUTF8(byte coder, byte[] val) {
       
   502         int dp = 0;
       
   503         byte[] dst;
       
   504         if (coder == LATIN1) {
       
   505             dst = new byte[val.length << 1];
       
   506             for (int sp = 0; sp < val.length; sp++) {
       
   507                 byte c = val[sp];
       
   508                 if (c < 0) {
       
   509                     dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
       
   510                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
       
   511                 } else {
       
   512                     dst[dp++] = c;
       
   513                 }
       
   514             }
       
   515         } else {
       
   516             int sp = 0;
       
   517             int sl = val.length >> 1;
       
   518             dst = new byte[sl * 3];
       
   519             char c;
       
   520             while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
       
   521                 // ascii fast loop;
       
   522                 dst[dp++] = (byte)c;
       
   523                 sp++;
       
   524             }
       
   525             while (sp < sl) {
       
   526                 c = StringUTF16.getChar(val, sp++);
       
   527                 if (c < 0x80) {
       
   528                     dst[dp++] = (byte)c;
       
   529                 } else if (c < 0x800) {
       
   530                     dst[dp++] = (byte)(0xc0 | (c >> 6));
       
   531                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
       
   532                 } else if (Character.isSurrogate(c)) {
       
   533                     int uc = -1;
       
   534                     char c2;
       
   535                     if (Character.isHighSurrogate(c) && sp < sl &&
       
   536                         Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
       
   537                         uc = Character.toCodePoint(c, c2);
       
   538                     }
       
   539                     if (uc < 0) {
       
   540                         dst[dp++] = '?';
   908                         dst[dp++] = '?';
   541                     } else {
   909                     } else {
   542                         dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
   910                         throwMalformed(sp - 1, 1); // or 2, does not matter here
   543                         dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
       
   544                         dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
       
   545                         dst[dp++] = (byte)(0x80 | (uc & 0x3f));
       
   546                         sp++;  // 2 chars
       
   547                     }
   911                     }
   548                 } else {
   912                 } else {
   549                     // 3 bytes, 16 bits
   913                     dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
   550                     dst[dp++] = (byte)(0xe0 | ((c >> 12)));
   914                     dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
   551                     dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
   915                     dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
   552                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
   916                     dst[dp++] = (byte)(0x80 | (uc & 0x3f));
   553                 }
   917                     sp++;  // 2 chars
       
   918                 }
       
   919             } else {
       
   920                 // 3 bytes, 16 bits
       
   921                 dst[dp++] = (byte)(0xe0 | ((c >> 12)));
       
   922                 dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
       
   923                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
   554             }
   924             }
   555         }
   925         }
   556         if (dp == dst.length) {
   926         if (dp == dst.length) {
   557             return dst;
   927             return dst;
   558         }
   928         }
   559         return Arrays.copyOf(dst, dp);
   929         return Arrays.copyOf(dst, dp);
   560     }
   930     }
   561 
   931 
   562     static byte[] encode(String charsetName, byte coder, byte[] val)
   932     ////////////////////// for j.u.z.ZipCoder //////////////////////////
   563         throws UnsupportedEncodingException
   933 
   564     {
   934     /*
   565         StringEncoder se = deref(encoder);
   935      * Throws iae, instead of replacing, if malformed or unmappble.
   566         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
       
   567         if ((se == null) || !(csn.equals(se.requestedCharsetName())
       
   568                               || csn.equals(se.charsetName()))) {
       
   569             se = null;
       
   570             try {
       
   571                 Charset cs = lookupCharset(csn);
       
   572                 if (cs != null) {
       
   573                     if (cs == UTF_8) {
       
   574                         return encodeUTF8(coder, val);
       
   575                     } else if (cs == ISO_8859_1) {
       
   576                         return encode8859_1(coder, val);
       
   577                     } else if (cs == US_ASCII) {
       
   578                         return encodeASCII(coder, val);
       
   579                     }
       
   580                     se = new StringEncoder(cs, csn);
       
   581                 }
       
   582             } catch (IllegalCharsetNameException x) {}
       
   583             if (se == null) {
       
   584                 throw new UnsupportedEncodingException (csn);
       
   585             }
       
   586             set(encoder, se);
       
   587         }
       
   588         return se.encode(coder, val);
       
   589     }
       
   590 
       
   591     static byte[] encode(Charset cs, byte coder, byte[] val) {
       
   592         if (cs == UTF_8) {
       
   593             return encodeUTF8(coder, val);
       
   594         } else if (cs == ISO_8859_1) {
       
   595             return encode8859_1(coder, val);
       
   596         } else if (cs == US_ASCII) {
       
   597             return encodeASCII(coder, val);
       
   598         }
       
   599         CharsetEncoder ce = cs.newEncoder();
       
   600         // fastpath for ascii compatible
       
   601         if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
       
   602                                  ((ArrayEncoder)ce).isASCIICompatible() &&
       
   603                                  !hasNegatives(val, 0, val.length)))) {
       
   604             return Arrays.copyOf(val, val.length);
       
   605         }
       
   606         int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
       
   607         int en = scale(len, ce.maxBytesPerChar());
       
   608         byte[] ba = new byte[en];
       
   609         if (len == 0) {
       
   610             return ba;
       
   611         }
       
   612         boolean isTrusted = cs.getClass().getClassLoader0() == null ||
       
   613                             System.getSecurityManager() == null;
       
   614         ce.onMalformedInput(CodingErrorAction.REPLACE)
       
   615           .onUnmappableCharacter(CodingErrorAction.REPLACE)
       
   616           .reset();
       
   617         if (ce instanceof ArrayEncoder) {
       
   618             if (!isTrusted) {
       
   619                 val = Arrays.copyOf(val, val.length);
       
   620             }
       
   621             int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
       
   622                                           : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
       
   623             if (blen != -1) {
       
   624                 return safeTrim(ba, blen, isTrusted);
       
   625             }
       
   626         }
       
   627         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
       
   628                                        : StringUTF16.toChars(val);
       
   629         ByteBuffer bb = ByteBuffer.wrap(ba);
       
   630         CharBuffer cb = CharBuffer.wrap(ca, 0, len);
       
   631         try {
       
   632             CoderResult cr = ce.encode(cb, bb, true);
       
   633             if (!cr.isUnderflow())
       
   634                 cr.throwException();
       
   635             cr = ce.flush(bb);
       
   636             if (!cr.isUnderflow())
       
   637                 cr.throwException();
       
   638         } catch (CharacterCodingException x) {
       
   639             throw new Error(x);
       
   640         }
       
   641         return safeTrim(ba, bb.position(), isTrusted);
       
   642     }
       
   643 
       
   644     static byte[] encode(byte coder, byte[] val) {
       
   645         String csn = Charset.defaultCharset().name();
       
   646         try {
       
   647             // use charset name encode() variant which provides caching.
       
   648             return encode(csn, coder, val);
       
   649         } catch (UnsupportedEncodingException x) {
       
   650             warnUnsupportedCharset(csn);
       
   651         }
       
   652         try {
       
   653             return encode("ISO-8859-1", coder, val);
       
   654         } catch (UnsupportedEncodingException x) {
       
   655             // If this code is hit during VM initialization, err(String) is
       
   656             // the only way we will be able to get any kind of error message.
       
   657             err("ISO-8859-1 charset not available: " + x.toString() + "\n");
       
   658             // If we can not find ISO-8859-1 (a required encoding) then things
       
   659             // are seriously wrong with the installation.
       
   660             System.exit(1);
       
   661             return null;
       
   662         }
       
   663     }
       
   664 
       
   665     /**
       
   666      *  Print a message directly to stderr, bypassing all character conversion
       
   667      *  methods.
       
   668      *  @param msg  message to print
       
   669      */
   936      */
   670     private static native void err(String msg);
   937     static String newStringUTF8NoRepl(byte[] src, int off, int len) {
       
   938         if (COMPACT_STRINGS && !hasNegatives(src, off, len))
       
   939             return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
       
   940         Result ret = decodeUTF8_0(src, off, len, false);
       
   941         return new String(ret.value, ret.coder);
       
   942     }
       
   943 
       
   944     /*
       
   945      * Throws iae, instead of replacing, if unmappble.
       
   946      */
       
   947     static byte[] getBytesUTF8NoRepl(String s) {
       
   948         return encodeUTF8(s.coder(), s.value(), false);
       
   949     }
   671 }
   950 }