src/java.base/share/classes/java/lang/StringCoding.java
author aleonard
Thu, 10 Oct 2019 10:28:55 +0100
changeset 58561 3968bf3673c5
parent 51447 8dfed4387312
permissions -rw-r--r--
8231717: Improve performance of charset decoding when charset is always compactable Reviewed-by: rriggs, redestad, alanb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
58561
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
     2
 * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2294
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2294
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2294
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2294
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2294
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package java.lang;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import java.io.UnsupportedEncodingException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
import java.lang.ref.SoftReference;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
import java.nio.ByteBuffer;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
import java.nio.CharBuffer;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
import java.nio.charset.Charset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
import java.nio.charset.CharsetDecoder;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
import java.nio.charset.CharsetEncoder;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
import java.nio.charset.CharacterCodingException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
import java.nio.charset.CoderResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
import java.nio.charset.CodingErrorAction;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
import java.nio.charset.IllegalCharsetNameException;
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
    39
import java.nio.charset.MalformedInputException;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
    40
import java.nio.charset.UnmappableCharacterException;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
import java.nio.charset.UnsupportedCharsetException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
import java.util.Arrays;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
    43
import jdk.internal.HotSpotIntrinsicCandidate;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
import sun.nio.cs.HistoricallyNamedCharset;
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
    45
import sun.nio.cs.ArrayDecoder;
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
    46
import sun.nio.cs.ArrayEncoder;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
    48
import static java.lang.String.LATIN1;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
    49
import static java.lang.String.UTF16;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
    50
import static java.lang.String.COMPACT_STRINGS;
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
    51
import static java.lang.Character.isSurrogate;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
    52
import static java.lang.Character.highSurrogate;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
    53
import static java.lang.Character.lowSurrogate;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
    54
import static java.lang.Character.isSupplementaryCodePoint;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
    55
import static java.lang.StringUTF16.putChar;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
    56
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * Utility class for string encoding and decoding.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
class StringCoding {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
    private StringCoding() { }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
41
dfebd2609e77 6671834: (str) Eliminate StringCoding.java compile warnings
martin
parents: 2
diff changeset
    65
    /** The cached coders for each thread */
32649
2ee9017c7597 8136583: Core libraries should use blessed modifier order
martin
parents: 25991
diff changeset
    66
    private static final ThreadLocal<SoftReference<StringDecoder>> decoder =
7803
56bc97d69d93 6880112: Project Coin: Port JDK core library code to use diamond operator
smarks
parents: 5506
diff changeset
    67
        new ThreadLocal<>();
32649
2ee9017c7597 8136583: Core libraries should use blessed modifier order
martin
parents: 25991
diff changeset
    68
    private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
7803
56bc97d69d93 6880112: Project Coin: Port JDK core library code to use diamond operator
smarks
parents: 5506
diff changeset
    69
        new ThreadLocal<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
49556
809b178407cc 8201179: Regression due loading java.nio.charset.StandardCharsets during bootstrap
redestad
parents: 49443
diff changeset
    71
    private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
809b178407cc 8201179: Regression due loading java.nio.charset.StandardCharsets during bootstrap
redestad
parents: 49443
diff changeset
    72
    private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
809b178407cc 8201179: Regression due loading java.nio.charset.StandardCharsets during bootstrap
redestad
parents: 49443
diff changeset
    73
    private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
809b178407cc 8201179: Regression due loading java.nio.charset.StandardCharsets during bootstrap
redestad
parents: 49443
diff changeset
    74
41
dfebd2609e77 6671834: (str) Eliminate StringCoding.java compile warnings
martin
parents: 2
diff changeset
    75
    private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
dfebd2609e77 6671834: (str) Eliminate StringCoding.java compile warnings
martin
parents: 2
diff changeset
    76
        SoftReference<T> sr = tl.get();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
        if (sr == null)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
            return null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
        return sr.get();
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
41
dfebd2609e77 6671834: (str) Eliminate StringCoding.java compile warnings
martin
parents: 2
diff changeset
    82
    private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
22581
e868cde95050 8032779: Update code in java.lang to use newer language features
psandoz
parents: 14342
diff changeset
    83
        tl.set(new SoftReference<>(ob));
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
    // Trim the given byte array to the given length
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
    87
    private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
    88
        if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
            return ba;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
        else
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
            return Arrays.copyOf(ba, len);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    private static int scale(int len, float expansionFactor) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
        // We need to perform double, not float, arithmetic; otherwise
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
        // we lose low order bits when len is larger than 2**24.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
        return (int)(len * (double)expansionFactor);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
    private static Charset lookupCharset(String csn) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
        if (Charset.isSupported(csn)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
                return Charset.forName(csn);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
            } catch (UnsupportedCharsetException x) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
                throw new Error(x);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
        return null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   111
    static class Result {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   112
        byte[] value;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   113
        byte coder;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   114
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   115
        Result with() {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   116
            coder = COMPACT_STRINGS ? LATIN1 : UTF16;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   117
            value = new byte[0];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   118
            return this;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   119
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   120
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   121
        Result with(char[] val, int off, int len) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   122
            if (String.COMPACT_STRINGS) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   123
                byte[] bs = StringUTF16.compress(val, off, len);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   124
                if (bs != null) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   125
                    value = bs;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   126
                    coder = LATIN1;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   127
                    return this;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   128
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   129
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   130
            coder = UTF16;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   131
            value = StringUTF16.toBytes(val, off, len);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   132
            return this;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   133
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   134
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   135
        Result with(byte[] val, byte coder) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   136
            this.coder = coder;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   137
            value = val;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   138
            return this;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   139
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   140
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   141
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   142
    @HotSpotIntrinsicCandidate
36929
0dc62e7c6a29 8144693: Intrinsify StringCoding.hasNegatives() on SPARC
thartmann
parents: 34885
diff changeset
   143
    public static boolean hasNegatives(byte[] ba, int off, int len) {
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   144
        for (int i = off; i < off + len; i++) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   145
            if (ba[i] < 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   146
                return true;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   147
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   148
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   149
        return false;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   150
    }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    // -- Decoding --
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   153
    static class StringDecoder {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
        private final String requestedCharsetName;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
        private final Charset cs;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   156
        private final boolean isASCIICompatible;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
        private final CharsetDecoder cd;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   158
        protected final Result result;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   160
        StringDecoder(Charset cs, String rcn) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
            this.requestedCharsetName = rcn;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
            this.cs = cs;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
            this.cd = cs.newDecoder()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
                .onMalformedInput(CodingErrorAction.REPLACE)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
                .onUnmappableCharacter(CodingErrorAction.REPLACE);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   166
            this.result = new Result();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   167
            this.isASCIICompatible = (cd instanceof ArrayDecoder) &&
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   168
                    ((ArrayDecoder)cd).isASCIICompatible();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
        String charsetName() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
            if (cs instanceof HistoricallyNamedCharset)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
                return ((HistoricallyNamedCharset)cs).historicalName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
            return cs.name();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
        final String requestedCharsetName() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
            return requestedCharsetName;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   181
        Result decode(byte[] ba, int off, int len) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   182
            if (len == 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   183
                return result.with();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   184
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   185
            // fastpath for ascii compatible
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   186
            if (isASCIICompatible && !hasNegatives(ba, off, len)) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   187
                if (COMPACT_STRINGS) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   188
                    return result.with(Arrays.copyOfRange(ba, off, off + len),
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   189
                                      LATIN1);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   190
                } else {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   191
                    return result.with(StringLatin1.inflate(ba, off, len), UTF16);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   192
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   193
            }
58561
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   194
            // fastpath for always Latin1 decodable single byte
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   195
            if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   196
                byte[] dst = new byte[len];
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   197
                ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   198
                return result.with(dst, LATIN1);
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   199
            }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
            int en = scale(len, cd.maxCharsPerByte());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
            char[] ca = new char[en];
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   202
            if (cd instanceof ArrayDecoder) {
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   203
                int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   204
                return result.with(ca, 0, clen);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   205
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   206
            cd.reset();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   207
            ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   208
            CharBuffer cb = CharBuffer.wrap(ca);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   209
            try {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   210
                CoderResult cr = cd.decode(bb, cb, true);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   211
                if (!cr.isUnderflow())
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   212
                    cr.throwException();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   213
                cr = cd.flush(cb);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   214
                if (!cr.isUnderflow())
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   215
                    cr.throwException();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   216
            } catch (CharacterCodingException x) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   217
                // Substitution is always enabled,
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   218
                // so this shouldn't happen
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   219
                throw new Error(x);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   220
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   221
            return result.with(ca, 0, cb.position());
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   222
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   223
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   224
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   225
    static Result decode(String charsetName, byte[] ba, int off, int len)
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
        throws UnsupportedEncodingException
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
    {
41
dfebd2609e77 6671834: (str) Eliminate StringCoding.java compile warnings
martin
parents: 2
diff changeset
   228
        StringDecoder sd = deref(decoder);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
        if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
                              || csn.equals(sd.charsetName()))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
            sd = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
                Charset cs = lookupCharset(csn);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   235
                if (cs != null) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   236
                    if (cs == UTF_8) {
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   237
                        return decodeUTF8(ba, off, len, true);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   238
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   239
                    if (cs == ISO_8859_1) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   240
                        return decodeLatin1(ba, off, len);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   241
                    }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   242
                    if (cs == US_ASCII) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   243
                        return decodeASCII(ba, off, len);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   244
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   245
                    sd = new StringDecoder(cs, csn);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   246
                }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
            } catch (IllegalCharsetNameException x) {}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
            if (sd == null)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
                throw new UnsupportedEncodingException(csn);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
            set(decoder, sd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
        return sd.decode(ba, off, len);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   255
    static Result decode(Charset cs, byte[] ba, int off, int len) {
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   256
        if (cs == UTF_8) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   257
            return decodeUTF8(ba, off, len, true);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   258
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   259
        if (cs == ISO_8859_1) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   260
            return decodeLatin1(ba, off, len);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   261
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   262
        if (cs == US_ASCII) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   263
            return decodeASCII(ba, off, len);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   264
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   265
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   266
        // (1)We never cache the "external" cs, the only benefit of creating
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   267
        // an additional StringDe/Encoder object to wrap it is to share the
25979
42e5d9f8087e 8054857: Fix typos in java.lang.** packages
prappo
parents: 22581
diff changeset
   268
        // de/encode() method. These SD/E objects are short-lived, the young-gen
42e5d9f8087e 8054857: Fix typos in java.lang.** packages
prappo
parents: 22581
diff changeset
   269
        // gc should be able to take care of them well. But the best approach
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   270
        // is still not to generate them if not really necessary.
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   271
        // (2)The defensive copy of the input byte/char[] has a big performance
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   272
        // impact, as well as the outgoing result byte/char[]. Need to do the
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   273
        // optimization check of (sm==null && classLoader0==null) for both.
44477
3d2229b0037b 8177631: Outdated performance advice in StringCoding
redestad
parents: 43790
diff changeset
   274
        // (3)There might be a timing gap in isTrusted setting. getClassLoader0()
25979
42e5d9f8087e 8054857: Fix typos in java.lang.** packages
prappo
parents: 22581
diff changeset
   275
        // is only checked (and then isTrusted gets set) when (SM==null). It is
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   276
        // possible that the SM==null for now but then SM is NOT null later
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   277
        // when safeTrim() is invoked...the "safe" way to do is to redundant
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   278
        // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
25979
42e5d9f8087e 8054857: Fix typos in java.lang.** packages
prappo
parents: 22581
diff changeset
   279
        // but it then can be argued that the SM is null when the operation
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   280
        // is started...
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   281
        CharsetDecoder cd = cs.newDecoder();
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   282
        // ascii fastpath
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   283
        if ((cd instanceof ArrayDecoder) &&
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   284
            ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   285
            return decodeLatin1(ba, off, len);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   286
        }
58561
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   287
        // fastpath for always Latin1 decodable single byte
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   288
        if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   289
            byte[] dst = new byte[len];
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   290
            ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   291
            return new Result().with(dst, LATIN1);
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   292
        }
3968bf3673c5 8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents: 51447
diff changeset
   293
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   294
        int en = scale(len, cd.maxCharsPerByte());
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   295
        if (len == 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   296
            return new Result().with();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   297
        }
9547
454881baaca0 7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents: 9035
diff changeset
   298
        cd.onMalformedInput(CodingErrorAction.REPLACE)
454881baaca0 7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents: 9035
diff changeset
   299
          .onUnmappableCharacter(CodingErrorAction.REPLACE)
454881baaca0 7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents: 9035
diff changeset
   300
          .reset();
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   301
        char[] ca = new char[en];
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   302
        if (cd instanceof ArrayDecoder) {
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   303
            int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   304
            return new Result().with(ca, 0, clen);
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   305
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   306
        if (cs.getClass().getClassLoader0() != null &&
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   307
            System.getSecurityManager() != null) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   308
            ba = Arrays.copyOfRange(ba, off, off + len);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   309
            off = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   310
        }
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   311
        ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   312
        CharBuffer cb = CharBuffer.wrap(ca);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   313
        try {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   314
            CoderResult cr = cd.decode(bb, cb, true);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   315
            if (!cr.isUnderflow())
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   316
                cr.throwException();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   317
            cr = cd.flush(cb);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   318
            if (!cr.isUnderflow())
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   319
                cr.throwException();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   320
        } catch (CharacterCodingException x) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   321
            // Substitution is always enabled,
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   322
            // so this shouldn't happen
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   323
            throw new Error(x);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   324
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   325
        return new Result().with(ca, 0, cb.position());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   328
    static Result decode(byte[] ba, int off, int len) {
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   329
        Charset cs = Charset.defaultCharset();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   330
        if (cs == UTF_8) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   331
            return decodeUTF8(ba, off, len, true);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   332
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   333
        if (cs == ISO_8859_1) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   334
            return decodeLatin1(ba, off, len);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   336
        if (cs == US_ASCII) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   337
            return decodeASCII(ba, off, len);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   339
        StringDecoder sd = deref(decoder);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   340
        if (sd == null || !cs.name().equals(sd.cs.name())) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   341
            sd = new StringDecoder(cs, cs.name());
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   342
            set(decoder, sd);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   343
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   344
        return sd.decode(ba, off, len);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
    // -- Encoding --
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
    private static class StringEncoder {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
        private Charset cs;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
        private CharsetEncoder ce;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   351
        private final boolean isASCIICompatible;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
        private final String requestedCharsetName;
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   353
        private final boolean isTrusted;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        private StringEncoder(Charset cs, String rcn) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
            this.requestedCharsetName = rcn;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
            this.cs = cs;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
            this.ce = cs.newEncoder()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
                .onMalformedInput(CodingErrorAction.REPLACE)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
                .onUnmappableCharacter(CodingErrorAction.REPLACE);
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   361
            this.isTrusted = (cs.getClass().getClassLoader0() == null);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   362
            this.isASCIICompatible = (ce instanceof ArrayEncoder) &&
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   363
                    ((ArrayEncoder)ce).isASCIICompatible();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
        String charsetName() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
            if (cs instanceof HistoricallyNamedCharset)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
                return ((HistoricallyNamedCharset)cs).historicalName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
            return cs.name();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
        final String requestedCharsetName() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
            return requestedCharsetName;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   376
        byte[] encode(byte coder, byte[] val) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   377
            // fastpath for ascii compatible
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   378
            if (coder == LATIN1 && isASCIICompatible &&
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   379
                !hasNegatives(val, 0, val.length)) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   380
                return Arrays.copyOf(val, val.length);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   381
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   382
            int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
            int en = scale(len, ce.maxBytesPerChar());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
            byte[] ba = new byte[en];
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   385
            if (len == 0) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
                return ba;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   387
            }
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   388
            if (ce instanceof ArrayEncoder) {
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   389
                int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   390
                                              : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   391
                if (blen != -1) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   392
                    return safeTrim(ba, blen, isTrusted);
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   393
                }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
            }
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   395
            char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   396
                                           : StringUTF16.toChars(val);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   397
            ce.reset();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   398
            ByteBuffer bb = ByteBuffer.wrap(ba);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   399
            CharBuffer cb = CharBuffer.wrap(ca, 0, len);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   400
            try {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   401
                CoderResult cr = ce.encode(cb, bb, true);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   402
                if (!cr.isUnderflow())
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   403
                    cr.throwException();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   404
                cr = ce.flush(bb);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   405
                if (!cr.isUnderflow())
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   406
                    cr.throwException();
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   407
            } catch (CharacterCodingException x) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   408
                // Substitution is always enabled,
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   409
                // so this shouldn't happen
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   410
                throw new Error(x);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   411
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   412
            return safeTrim(ba, bb.position(), isTrusted);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   416
    static byte[] encode(String charsetName, byte coder, byte[] val)
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   417
        throws UnsupportedEncodingException
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   418
    {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   419
        StringEncoder se = deref(encoder);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   420
        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   421
        if ((se == null) || !(csn.equals(se.requestedCharsetName())
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   422
                              || csn.equals(se.charsetName()))) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   423
            se = null;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   424
            try {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   425
                Charset cs = lookupCharset(csn);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   426
                if (cs != null) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   427
                    if (cs == UTF_8) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   428
                        return encodeUTF8(coder, val, true);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   429
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   430
                    if (cs == ISO_8859_1) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   431
                        return encode8859_1(coder, val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   432
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   433
                    if (cs == US_ASCII) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   434
                        return encodeASCII(coder, val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   435
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   436
                    se = new StringEncoder(cs, csn);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   437
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   438
            } catch (IllegalCharsetNameException x) {}
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   439
            if (se == null) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   440
                throw new UnsupportedEncodingException (csn);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   441
            }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   442
            set(encoder, se);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   443
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   444
        return se.encode(coder, val);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   445
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   446
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   447
    static byte[] encode(Charset cs, byte coder, byte[] val) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   448
        if (cs == UTF_8) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   449
            return encodeUTF8(coder, val, true);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   450
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   451
        if (cs == ISO_8859_1) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   452
            return encode8859_1(coder, val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   453
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   454
        if (cs == US_ASCII) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   455
            return encodeASCII(coder, val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   456
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   457
        CharsetEncoder ce = cs.newEncoder();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   458
        // fastpath for ascii compatible
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   459
        if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   460
                                 ((ArrayEncoder)ce).isASCIICompatible() &&
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   461
                                 !hasNegatives(val, 0, val.length)))) {
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   462
            return Arrays.copyOf(val, val.length);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   463
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   464
        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   465
        int en = scale(len, ce.maxBytesPerChar());
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   466
        byte[] ba = new byte[en];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   467
        if (len == 0) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   468
            return ba;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   469
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   470
        ce.onMalformedInput(CodingErrorAction.REPLACE)
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   471
          .onUnmappableCharacter(CodingErrorAction.REPLACE)
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   472
          .reset();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   473
        if (ce instanceof ArrayEncoder) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   474
            int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   475
                                          : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   476
            if (blen != -1) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   477
                return safeTrim(ba, blen, true);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   478
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   479
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   480
        boolean isTrusted = cs.getClass().getClassLoader0() == null ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   481
                            System.getSecurityManager() == null;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   482
        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   483
                                       : StringUTF16.toChars(val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   484
        ByteBuffer bb = ByteBuffer.wrap(ba);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   485
        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   486
        try {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   487
            CoderResult cr = ce.encode(cb, bb, true);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   488
            if (!cr.isUnderflow())
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   489
                cr.throwException();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   490
            cr = ce.flush(bb);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   491
            if (!cr.isUnderflow())
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   492
                cr.throwException();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   493
        } catch (CharacterCodingException x) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   494
            throw new Error(x);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   495
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   496
        return safeTrim(ba, bb.position(), isTrusted);
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   497
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   498
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   499
    static byte[] encode(byte coder, byte[] val) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   500
        Charset cs = Charset.defaultCharset();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   501
        if (cs == UTF_8) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   502
            return encodeUTF8(coder, val, true);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   503
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   504
        if (cs == ISO_8859_1) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   505
            return encode8859_1(coder, val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   506
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   507
        if (cs == US_ASCII) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   508
            return encodeASCII(coder, val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   509
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   510
        StringEncoder se = deref(encoder);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   511
        if (se == null || !cs.name().equals(se.cs.name())) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   512
            se = new StringEncoder(cs, cs.name());
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   513
            set(encoder, se);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   514
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   515
        return se.encode(coder, val);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   516
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   517
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   518
    /**
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   519
     *  Print a message directly to stderr, bypassing all character conversion
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   520
     *  methods.
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   521
     *  @param msg  message to print
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   522
     */
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   523
    private static native void err(String msg);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   524
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   525
     /* The cached Result for each thread */
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   526
    private static final ThreadLocal<StringCoding.Result>
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   527
        resultCached = new ThreadLocal<>() {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   528
            protected StringCoding.Result initialValue() {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   529
                return new StringCoding.Result();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   530
            }};
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   531
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   532
    ////////////////////////// ascii //////////////////////////////
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   533
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   534
    private static Result decodeASCII(byte[] ba, int off, int len) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   535
        Result result = resultCached.get();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   536
        if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   537
            return result.with(Arrays.copyOfRange(ba, off, off + len),
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   538
                               LATIN1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   539
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   540
        byte[] dst = new byte[len<<1];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   541
        int dp = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   542
        while (dp < len) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   543
            int b = ba[off++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   544
            putChar(dst, dp++, (b >= 0) ? (char)b : repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   545
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   546
        return result.with(dst, UTF16);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   547
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   548
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   549
    private static byte[] encodeASCII(byte coder, byte[] val) {
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   550
        if (coder == LATIN1) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   551
            byte[] dst = new byte[val.length];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   552
            for (int i = 0; i < val.length; i++) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   553
                if (val[i] < 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   554
                    dst[i] = '?';
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   555
                } else {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   556
                    dst[i] = val[i];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   557
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   558
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   559
            return dst;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   560
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   561
        int len = val.length >> 1;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   562
        byte[] dst = new byte[len];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   563
        int dp = 0;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   564
        for (int i = 0; i < len; i++) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   565
            char c = StringUTF16.getChar(val, i);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   566
            if (c < 0x80) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   567
                dst[dp++] = (byte)c;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   568
                continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   569
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   570
            if (Character.isHighSurrogate(c) && i + 1 < len &&
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   571
                Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   572
                i++;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   573
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   574
            dst[dp++] = '?';
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   575
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   576
        if (len == dp) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   577
            return dst;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   578
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   579
        return Arrays.copyOf(dst, dp);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   580
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   581
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   582
    ////////////////////////// latin1/8859_1 ///////////////////////////
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   583
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   584
    private static Result decodeLatin1(byte[] ba, int off, int len) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   585
       Result result = resultCached.get();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   586
       if (COMPACT_STRINGS) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   587
           return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   588
       } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   589
           return result.with(StringLatin1.inflate(ba, off, len), UTF16);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   590
       }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   591
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   592
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   593
    @HotSpotIntrinsicCandidate
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   594
    private static int implEncodeISOArray(byte[] sa, int sp,
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   595
                                          byte[] da, int dp, int len) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   596
        int i = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   597
        for (; i < len; i++) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   598
            char c = StringUTF16.getChar(sa, sp++);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   599
            if (c > '\u00FF')
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   600
                break;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   601
            da[dp++] = (byte)c;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   602
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   603
        return i;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   604
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   605
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   606
    private static byte[] encode8859_1(byte coder, byte[] val) {
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   607
        return encode8859_1(coder, val, true);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   608
    }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   609
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   610
    private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   611
        if (coder == LATIN1) {
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   612
            return Arrays.copyOf(val, val.length);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   613
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   614
        int len = val.length >> 1;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   615
        byte[] dst = new byte[len];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   616
        int dp = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   617
        int sp = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   618
        int sl = len;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   619
        while (sp < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   620
            int ret = implEncodeISOArray(val, sp, dst, dp, len);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   621
            sp = sp + ret;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   622
            dp = dp + ret;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   623
            if (ret != len) {
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   624
                if (!doReplace) {
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   625
                    throwUnmappable(sp, 1);
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   626
                }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   627
                char c = StringUTF16.getChar(val, sp++);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   628
                if (Character.isHighSurrogate(c) && sp < sl &&
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   629
                    Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   630
                    sp++;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   631
                }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   632
                dst[dp++] = '?';
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   633
                len = sl - sp;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   634
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   635
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   636
        if (dp == dst.length) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   637
            return dst;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   638
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   639
        return Arrays.copyOf(dst, dp);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   640
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   641
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   642
    //////////////////////////////// utf8 ////////////////////////////////////
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   643
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   644
    private static boolean isNotContinuation(int b) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   645
        return (b & 0xc0) != 0x80;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   646
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   647
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   648
    private static boolean isMalformed3(int b1, int b2, int b3) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   649
        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   650
               (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   651
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   652
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   653
    private static boolean isMalformed3_2(int b1, int b2) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   654
        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   655
               (b2 & 0xc0) != 0x80;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   656
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   657
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   658
    private static boolean isMalformed4(int b2, int b3, int b4) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   659
        return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   660
               (b4 & 0xc0) != 0x80;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   661
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   662
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   663
    private static boolean isMalformed4_2(int b1, int b2) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   664
        return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   665
               (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   666
               (b2 & 0xc0) != 0x80;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   667
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   668
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   669
    private static boolean isMalformed4_3(int b3) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   670
        return (b3 & 0xc0) != 0x80;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   671
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   672
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   673
    // for nb == 3/4
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   674
    private static int malformedN(byte[] src, int sp, int nb) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   675
        if (nb == 3) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   676
            int b1 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   677
            int b2 = src[sp++];    // no need to lookup b3
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   678
            return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   679
                    isNotContinuation(b2)) ? 1 : 2;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   680
        } else if (nb == 4) { // we don't care the speed here
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   681
            int b1 = src[sp++] & 0xff;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   682
            int b2 = src[sp++] & 0xff;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   683
            if (b1 > 0xf4 ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   684
                (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   685
                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   686
                isNotContinuation(b2))
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   687
                return 1;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   688
            if (isNotContinuation(src[sp++]))
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   689
                return 2;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   690
            return 3;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   692
        assert false;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   693
        return -1;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   694
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   695
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   696
    private static void throwMalformed(int off, int nb) {
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   697
        String msg = "malformed input off : " + off + ", length : " + nb;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   698
        throw new IllegalArgumentException(msg, new MalformedInputException(nb));
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   699
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   700
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   701
    private static void throwMalformed(byte[] val) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   702
        int dp = 0;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   703
        while (dp < val.length && val[dp] >=0) { dp++; }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   704
        throwMalformed(dp, 1);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   705
    }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   706
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   707
    private static void throwUnmappable(int off, int nb) {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   708
        String msg = "malformed input off : " + off + ", length : " + nb;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   709
        throw new IllegalArgumentException(msg, new UnmappableCharacterException(nb));
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   710
    }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   711
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   712
    private static void throwUnmappable(byte[] val) {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   713
        int dp = 0;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   714
        while (dp < val.length && val[dp] >=0) { dp++; }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   715
        throwUnmappable(dp, 1);
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   716
    }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   717
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   718
    private static char repl = '\ufffd';
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   719
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   720
    private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   721
        // ascii-bais, which has a relative impact to the non-ascii-only bytes
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   722
        if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   723
            return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   724
                                           LATIN1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   725
        return decodeUTF8_0(src, sp, len, doReplace);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   728
    private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   729
        Result ret = resultCached.get();
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   730
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   731
        int sl = sp + len;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   732
        int dp = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   733
        byte[] dst = new byte[len];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   734
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   735
        if (COMPACT_STRINGS) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   736
            while (sp < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   737
                int b1 = src[sp];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   738
                if (b1 >= 0) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   739
                    dst[dp++] = (byte)b1;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   740
                    sp++;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   741
                    continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   742
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   743
                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   744
                    sp + 1 < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   745
                    int b2 = src[sp + 1];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   746
                    if (!isNotContinuation(b2)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   747
                        dst[dp++] = (byte)(((b1 << 6) ^ b2)^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   748
                                           (((byte) 0xC0 << 6) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   749
                                           ((byte) 0x80 << 0)));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   750
                        sp += 2;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   751
                        continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   752
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   753
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   754
                // anything not a latin1, including the repl
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   755
                // we have to go with the utf16
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   756
                break;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   757
            }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   758
            if (sp == sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   759
                if (dp != dst.length) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   760
                    dst = Arrays.copyOf(dst, dp);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   761
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   762
                return ret.with(dst, LATIN1);
2294
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   763
            }
4259115772f7 6636323: Optimize handling of builtin charsets
sherman
parents: 715
diff changeset
   764
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   765
        if (dp == 0) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   766
            dst = new byte[len << 1];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   767
        } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   768
            byte[] buf = new byte[len << 1];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   769
            StringLatin1.inflate(dst, 0, buf, 0, dp);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   770
            dst = buf;
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents: 32649
diff changeset
   771
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   772
        while (sp < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   773
            int b1 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   774
            if (b1 >= 0) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   775
                putChar(dst, dp++, (char) b1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   776
            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   777
                if (sp < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   778
                    int b2 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   779
                    if (isNotContinuation(b2)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   780
                        if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   781
                            throwMalformed(sp - 1, 1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   782
                        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   783
                        putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   784
                        sp--;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   785
                    } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   786
                        putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   787
                                                  (((byte) 0xC0 << 6) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   788
                                                  ((byte) 0x80 << 0))));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   789
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   790
                    continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   791
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   792
                if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   793
                    throwMalformed(sp, 1);  // underflow()
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   794
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   795
                putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   796
                break;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   797
            } else if ((b1 >> 4) == -2) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   798
                if (sp + 1 < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   799
                    int b2 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   800
                    int b3 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   801
                    if (isMalformed3(b1, b2, b3)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   802
                        if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   803
                            throwMalformed(sp - 3, 3);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   804
                        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   805
                        putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   806
                        sp -= 3;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   807
                        sp += malformedN(src, sp, 3);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   808
                    } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   809
                        char c = (char)((b1 << 12) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   810
                                        (b2 <<  6) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   811
                                        (b3 ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   812
                                         (((byte) 0xE0 << 12) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   813
                                         ((byte) 0x80 <<  6) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   814
                                         ((byte) 0x80 <<  0))));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   815
                        if (isSurrogate(c)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   816
                            if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   817
                                throwMalformed(sp - 3, 3);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   818
                            }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   819
                            putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   820
                        } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   821
                            putChar(dst, dp++, c);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   822
                        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   823
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   824
                    continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   825
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   826
                if (sp  < sl && isMalformed3_2(b1, src[sp])) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   827
                    if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   828
                        throwMalformed(sp - 1, 2);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   829
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   830
                    putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   831
                    continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   832
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   833
                if (!doReplace){
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   834
                    throwMalformed(sp, 1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   835
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   836
                putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   837
                break;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   838
            } else if ((b1 >> 3) == -2) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   839
                if (sp + 2 < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   840
                    int b2 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   841
                    int b3 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   842
                    int b4 = src[sp++];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   843
                    int uc = ((b1 << 18) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   844
                              (b2 << 12) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   845
                              (b3 <<  6) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   846
                              (b4 ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   847
                               (((byte) 0xF0 << 18) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   848
                               ((byte) 0x80 << 12) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   849
                               ((byte) 0x80 <<  6) ^
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   850
                               ((byte) 0x80 <<  0))));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   851
                    if (isMalformed4(b2, b3, b4) ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   852
                        !isSupplementaryCodePoint(uc)) { // shortest form check
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   853
                        if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   854
                            throwMalformed(sp - 4, 4);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   855
                        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   856
                        putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   857
                        sp -= 4;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   858
                        sp += malformedN(src, sp, 4);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   859
                    } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   860
                        putChar(dst, dp++, highSurrogate(uc));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   861
                        putChar(dst, dp++, lowSurrogate(uc));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   862
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   863
                    continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   864
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   865
                b1 &= 0xff;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   866
                if (b1 > 0xf4 ||
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   867
                    sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   868
                    if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   869
                        throwMalformed(sp - 1, 1);  // or 2
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   870
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   871
                    putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   872
                    continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   873
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   874
                if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   875
                    throwMalformed(sp - 1, 1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   876
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   877
                sp++;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   878
                putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   879
                if (sp  < sl && isMalformed4_3(src[sp])) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   880
                    continue;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   881
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   882
                break;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   883
            } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   884
                if (!doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   885
                    throwMalformed(sp - 1, 1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   886
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   887
                putChar(dst, dp++, repl);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   888
            }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   889
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   890
        if (dp != len) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   891
            dst = Arrays.copyOf(dst, dp << 1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   892
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   893
        return ret.with(dst, UTF16);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   896
    private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   897
        if (coder == UTF16)
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   898
            return encodeUTF8_UTF16(val, doReplace);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   899
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   900
        if (!hasNegatives(val, 0, val.length))
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   901
            return Arrays.copyOf(val, val.length);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   902
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   903
        int dp = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   904
        byte[] dst = new byte[val.length << 1];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   905
        for (int sp = 0; sp < val.length; sp++) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   906
            byte c = val[sp];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   907
            if (c < 0) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   908
                dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   909
                dst[dp++] = (byte)(0x80 | (c & 0x3f));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   910
            } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   911
                dst[dp++] = c;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   912
            }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
        }
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   914
        if (dp == dst.length)
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   915
            return dst;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   916
        return Arrays.copyOf(dst, dp);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
    }
34885
63d4a8c733f8 8146484: Examine sun.misc.MessageUtils
chegar
parents: 33663
diff changeset
   918
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   919
    private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   920
        int dp = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   921
        int sp = 0;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   922
        int sl = val.length >> 1;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   923
        byte[] dst = new byte[sl * 3];
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   924
        char c;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   925
        while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   926
            // ascii fast loop;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   927
            dst[dp++] = (byte)c;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   928
            sp++;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   929
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   930
        while (sp < sl) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   931
            c = StringUTF16.getChar(val, sp++);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   932
            if (c < 0x80) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   933
                dst[dp++] = (byte)c;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   934
            } else if (c < 0x800) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   935
                dst[dp++] = (byte)(0xc0 | (c >> 6));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   936
                dst[dp++] = (byte)(0x80 | (c & 0x3f));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   937
            } else if (Character.isSurrogate(c)) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   938
                int uc = -1;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   939
                char c2;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   940
                if (Character.isHighSurrogate(c) && sp < sl &&
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   941
                    Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   942
                    uc = Character.toCodePoint(c, c2);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   943
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   944
                if (uc < 0) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   945
                    if (doReplace) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   946
                        dst[dp++] = '?';
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   947
                    } else {
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
   948
                        throwUnmappable(sp - 1, 1); // or 2, does not matter here
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   949
                    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   950
                } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   951
                    dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   952
                    dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   953
                    dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   954
                    dst[dp++] = (byte)(0x80 | (uc & 0x3f));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   955
                    sp++;  // 2 chars
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   956
                }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   957
            } else {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   958
                // 3 bytes, 16 bits
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   959
                dst[dp++] = (byte)(0xe0 | ((c >> 12)));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   960
                dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   961
                dst[dp++] = (byte)(0x80 | (c & 0x3f));
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   962
            }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   963
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   964
        if (dp == dst.length) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   965
            return dst;
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   966
        }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   967
        return Arrays.copyOf(dst, dp);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   968
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   969
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   970
    ////////////////////// for j.u.z.ZipCoder //////////////////////////
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   971
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   972
    /*
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   973
     * Throws iae, instead of replacing, if malformed or unmappable.
34885
63d4a8c733f8 8146484: Examine sun.misc.MessageUtils
chegar
parents: 33663
diff changeset
   974
     */
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   975
    static String newStringUTF8NoRepl(byte[] src, int off, int len) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   976
        if (COMPACT_STRINGS && !hasNegatives(src, off, len))
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   977
            return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   978
        Result ret = decodeUTF8_0(src, off, len, false);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   979
        return new String(ret.value, ret.coder);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   980
    }
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   981
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   982
    /*
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   983
     * Throws iae, instead of replacing, if unmappable.
48262
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   984
     */
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   985
    static byte[] getBytesUTF8NoRepl(String s) {
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   986
        return encodeUTF8(s.coder(), s.value(), false);
daf3b49f4839 8184947: ZipCoder performance improvements
sherman
parents: 47216
diff changeset
   987
    }
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   988
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   989
    ////////////////////// for j.n.f.Files //////////////////////////
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   990
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   991
    private static boolean isASCII(byte[] src) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   992
        return !hasNegatives(src, 0, src.length);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   993
    }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   994
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   995
    private static String newStringLatin1(byte[] src) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   996
        if (COMPACT_STRINGS)
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   997
           return new String(src, LATIN1);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   998
        return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
   999
    }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1000
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1001
    static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1002
        try {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1003
            return newStringNoRepl1(src, cs);
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1004
        } catch (IllegalArgumentException e) {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1005
            //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1006
            Throwable cause = e.getCause();
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1007
            if (cause instanceof MalformedInputException) {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1008
                throw (MalformedInputException)cause;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1009
            }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1010
            throw (CharacterCodingException)cause;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1011
        }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1012
    }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1013
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1014
    static String newStringNoRepl1(byte[] src, Charset cs) {
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1015
        if (cs == UTF_8) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1016
            if (COMPACT_STRINGS && isASCII(src))
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1017
                return new String(src, LATIN1);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1018
            Result ret = decodeUTF8_0(src, 0, src.length, false);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1019
            return new String(ret.value, ret.coder);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1020
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1021
        if (cs == ISO_8859_1) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1022
            return newStringLatin1(src);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1023
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1024
        if (cs == US_ASCII) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1025
            if (isASCII(src)) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1026
                return newStringLatin1(src);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1027
            } else {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1028
                throwMalformed(src);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1029
            }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1030
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1031
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1032
        CharsetDecoder cd = cs.newDecoder();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1033
        // ascii fastpath
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1034
        if ((cd instanceof ArrayDecoder) &&
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1035
            ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1036
            return newStringLatin1(src);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1037
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1038
        int len = src.length;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1039
        if (len == 0) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1040
            return "";
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1041
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1042
        int en = scale(len, cd.maxCharsPerByte());
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1043
        char[] ca = new char[en];
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1044
        if (cs.getClass().getClassLoader0() != null &&
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1045
            System.getSecurityManager() != null) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1046
            src = Arrays.copyOf(src, len);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1047
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1048
        ByteBuffer bb = ByteBuffer.wrap(src);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1049
        CharBuffer cb = CharBuffer.wrap(ca);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1050
        try {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1051
            CoderResult cr = cd.decode(bb, cb, true);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1052
            if (!cr.isUnderflow())
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1053
                cr.throwException();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1054
            cr = cd.flush(cb);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1055
            if (!cr.isUnderflow())
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1056
                cr.throwException();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1057
        } catch (CharacterCodingException x) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1058
            throw new IllegalArgumentException(x);  // todo
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1059
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1060
        Result ret = resultCached.get().with(ca, 0, cb.position());
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1061
        return new String(ret.value, ret.coder);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1062
    }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1063
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1064
    /*
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1065
     * Throws CCE, instead of replacing, if unmappable.
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1066
     */
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1067
    static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1068
        try {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1069
            return getBytesNoRepl1(s, cs);
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1070
        } catch (IllegalArgumentException e) {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1071
            //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1072
            Throwable cause = e.getCause();
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1073
            if (cause instanceof UnmappableCharacterException) {
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1074
                throw (UnmappableCharacterException)cause;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1075
            }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1076
            throw (CharacterCodingException)cause;
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1077
        }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1078
    }
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1079
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1080
    static byte[] getBytesNoRepl1(String s, Charset cs) {
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1081
        byte[] val = s.value();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1082
        byte coder = s.coder();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1083
        if (cs == UTF_8) {
51447
8dfed4387312 8209576: java.nio.file.Files.writeString writes garbled UTF-16 instead of UTF-8
joehw
parents: 50820
diff changeset
  1084
            if (coder == LATIN1 && isASCII(val)) {
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1085
                return val;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1086
            }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1087
            return encodeUTF8(coder, val, false);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1088
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1089
        if (cs == ISO_8859_1) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1090
            if (coder == LATIN1) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1091
                return val;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1092
            }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1093
            return encode8859_1(coder, val, false);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1094
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1095
        if (cs == US_ASCII) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1096
            if (coder == LATIN1) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1097
                if (isASCII(val)) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1098
                    return val;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1099
                } else {
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1100
                    throwUnmappable(val);
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1101
                }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1102
            }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1103
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1104
        CharsetEncoder ce = cs.newEncoder();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1105
        // fastpath for ascii compatible
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1106
        if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1107
                                 ((ArrayEncoder)ce).isASCIICompatible() &&
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1108
                                 isASCII(val)))) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1109
            return val;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1110
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1111
        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1112
        int en = scale(len, ce.maxBytesPerChar());
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1113
        byte[] ba = new byte[en];
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1114
        if (len == 0) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1115
            return ba;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1116
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1117
        if (ce instanceof ArrayEncoder) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1118
            int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1119
                                          : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1120
            if (blen != -1) {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1121
                return safeTrim(ba, blen, true);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1122
            }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1123
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1124
        boolean isTrusted = cs.getClass().getClassLoader0() == null ||
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1125
                            System.getSecurityManager() == null;
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1126
        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1127
                                       : StringUTF16.toChars(val);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1128
        ByteBuffer bb = ByteBuffer.wrap(ba);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1129
        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1130
        try {
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1131
            CoderResult cr = ce.encode(cb, bb, true);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1132
            if (!cr.isUnderflow())
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1133
                cr.throwException();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1134
            cr = ce.flush(bb);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1135
            if (!cr.isUnderflow())
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1136
                cr.throwException();
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1137
        } catch (CharacterCodingException x) {
50820
35f52a3cd6bd 8205058: (fs) Files read/writeString should throw CharacterCodingException instead of IOException with an IllegalArgumentException as cause
joehw
parents: 50552
diff changeset
  1138
            throw new IllegalArgumentException(x);
50552
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1139
        }
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1140
        return safeTrim(ba, bb.position(), isTrusted);
7439ceaae8e4 8201276: (fs) Add methods to Files for reading/writing a string from/to a file
joehw
parents: 49556
diff changeset
  1141
    }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
}