jdk/src/java.base/share/classes/java/lang/StringDecoderUTF8.java
author rriggs
Tue, 16 Feb 2016 11:36:20 -0500
changeset 35978 752d505da547
parent 33663 2cd62a4bd471
permissions -rw-r--r--
8148775: Spec for j.l.ProcessBuilder.Redirect.DISCARD need to be improved Reviewed-by: martin
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33663
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     1
/*
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     2
 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     4
 *
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    10
 *
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    15
 * accompanied this code).
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    16
 *
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    20
 *
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    23
 * questions.
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    24
 */
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    25
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    26
package java.lang;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    27
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    28
import java.nio.charset.Charset;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    29
import java.util.Arrays;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    30
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    31
import static java.lang.String.LATIN1;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    32
import static java.lang.String.UTF16;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    33
import static java.lang.String.COMPACT_STRINGS;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    34
import static java.lang.Character.isSurrogate;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    35
import static java.lang.Character.highSurrogate;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    36
import static java.lang.Character.lowSurrogate;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    37
import static java.lang.Character.isSupplementaryCodePoint;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    38
import static java.lang.StringUTF16.putChar;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    39
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    40
class StringDecoderUTF8 extends StringCoding.StringDecoder {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    41
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    42
    StringDecoderUTF8(Charset cs, String rcn) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    43
        super(cs, rcn);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    44
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    45
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    46
    private static boolean isNotContinuation(int b) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    47
        return (b & 0xc0) != 0x80;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    48
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    49
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    50
    private static boolean isMalformed3(int b1, int b2, int b3) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    51
        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    52
               (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    53
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    54
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    55
    private static boolean isMalformed3_2(int b1, int b2) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    56
        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    57
               (b2 & 0xc0) != 0x80;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    58
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    59
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    60
    private static boolean isMalformed4(int b2, int b3, int b4) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    61
        return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    62
               (b4 & 0xc0) != 0x80;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    63
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    64
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    65
    private static boolean isMalformed4_2(int b1, int b2) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    66
        return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    67
               (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    68
               (b2 & 0xc0) != 0x80;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    69
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    70
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    71
    private static boolean isMalformed4_3(int b3) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    72
        return (b3 & 0xc0) != 0x80;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    73
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    74
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    75
    // for nb == 3/4
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    76
    private static int malformedN(byte[] src, int sp, int nb) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    77
        if (nb == 3) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    78
            int b1 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    79
            int b2 = src[sp++];    // no need to lookup b3
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    80
            return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    81
                    isNotContinuation(b2)) ? 1 : 2;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    82
        } else if (nb == 4) { // we don't care the speed here
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    83
            int b1 = src[sp++] & 0xff;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    84
            int b2 = src[sp++] & 0xff;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    85
            if (b1 > 0xf4 ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    86
                (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    87
                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    88
                isNotContinuation(b2))
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    89
                return 1;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    90
            if (isNotContinuation(src[sp++]))
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    91
                return 2;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    92
            return 3;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    93
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    94
        assert false;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    95
        return -1;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    96
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    97
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    98
    private static char repl = '\ufffd';
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
    99
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   100
    StringCoding.Result decode(byte[] src, int sp, int len) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   101
        return decode(src, sp, len, result);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   102
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   103
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   104
    static StringCoding.Result decode(byte[] src, int sp, int len,
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   105
                                      StringCoding.Result ret) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   106
        int sl = sp + len;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   107
        byte[] dst = new byte[len];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   108
        int dp = 0;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   109
        if (COMPACT_STRINGS) {   // Latin1 only loop
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   110
            while (sp < sl) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   111
                int b1 = src[sp];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   112
                if (b1 >= 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   113
                    dst[dp++] = (byte)b1;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   114
                    sp++;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   115
                    continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   116
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   117
                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   118
                    sp + 1 < sl) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   119
                    int b2 = src[sp + 1];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   120
                    if (!isNotContinuation(b2)) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   121
                        dst[dp++] = (byte)(((b1 << 6) ^ b2)^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   122
                                           (((byte) 0xC0 << 6) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   123
                                           ((byte) 0x80 << 0)));
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   124
                        sp += 2;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   125
                        continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   126
                    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   127
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   128
                // anything not a latin1, including the repl
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   129
                // we have to go with the utf16
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   130
                break;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   131
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   132
            if (sp == sl) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   133
                if (dp != dst.length) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   134
                    dst = Arrays.copyOf(dst, dp);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   135
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   136
                return ret.with(dst, LATIN1);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   137
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   138
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   139
        if (dp == 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   140
            dst = new byte[len << 1];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   141
        } else {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   142
            byte[] buf = new byte[len << 1];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   143
            StringLatin1.inflate(dst, 0, buf, 0, dp);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   144
            dst = buf;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   145
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   146
        while (sp < sl) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   147
            int b1 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   148
            if (b1 >= 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   149
                putChar(dst, dp++, (char) b1);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   150
            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   151
                if (sp < sl) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   152
                    int b2 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   153
                    if (isNotContinuation(b2)) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   154
                        putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   155
                        sp--;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   156
                    } else {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   157
                        putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   158
                                                  (((byte) 0xC0 << 6) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   159
                                                  ((byte) 0x80 << 0))));
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   160
                    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   161
                    continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   162
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   163
                putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   164
                break;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   165
            } else if ((b1 >> 4) == -2) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   166
                if (sp + 1 < sl) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   167
                    int b2 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   168
                    int b3 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   169
                    if (isMalformed3(b1, b2, b3)) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   170
                        putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   171
                        sp -= 3;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   172
                        sp += malformedN(src, sp, 3);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   173
                    } else {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   174
                        char c = (char)((b1 << 12) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   175
                                        (b2 <<  6) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   176
                                        (b3 ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   177
                                         (((byte) 0xE0 << 12) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   178
                                         ((byte) 0x80 <<  6) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   179
                                         ((byte) 0x80 <<  0))));
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   180
                        putChar(dst, dp++, isSurrogate(c) ?  repl : c);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   181
                    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   182
                    continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   183
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   184
                if (sp  < sl && isMalformed3_2(b1, src[sp])) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   185
                    putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   186
                    continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   187
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   188
                putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   189
                break;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   190
            } else if ((b1 >> 3) == -2) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   191
                if (sp + 2 < sl) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   192
                    int b2 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   193
                    int b3 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   194
                    int b4 = src[sp++];
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   195
                    int uc = ((b1 << 18) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   196
                              (b2 << 12) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   197
                              (b3 <<  6) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   198
                              (b4 ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   199
                               (((byte) 0xF0 << 18) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   200
                               ((byte) 0x80 << 12) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   201
                               ((byte) 0x80 <<  6) ^
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   202
                               ((byte) 0x80 <<  0))));
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   203
                    if (isMalformed4(b2, b3, b4) ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   204
                        !isSupplementaryCodePoint(uc)) { // shortest form check
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   205
                        putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   206
                        sp -= 4;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   207
                        sp += malformedN(src, sp, 4);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   208
                    } else {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   209
                        putChar(dst, dp++, highSurrogate(uc));
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   210
                        putChar(dst, dp++, lowSurrogate(uc));
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   211
                    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   212
                    continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   213
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   214
                b1 &= 0xff;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   215
                if (b1 > 0xf4 ||
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   216
                    sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   217
                    putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   218
                    continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   219
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   220
                sp++;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   221
                putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   222
                if (sp  < sl && isMalformed4_3(src[sp])) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   223
                    continue;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   224
                }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   225
                break;
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   226
            } else {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   227
                putChar(dst, dp++, repl);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   228
            }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   229
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   230
        if (dp != len) {
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   231
            dst = Arrays.copyOf(dst, dp << 1);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   232
        }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   233
        return ret.with(dst, UTF16);
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   234
    }
2cd62a4bd471 8141132: JEP 254: Compact Strings
thartmann
parents:
diff changeset
   235
}