src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64StringLatin1InflateOp.java
author dlong
Tue, 24 Sep 2019 12:47:15 -0400
changeset 58299 6df94ce3ab2f
parent 55631 1831d28d34bb
child 58679 9c3209ff7550
child 58877 aec7bf35d6f5
permissions -rw-r--r--
8229201: Update Graal Reviewed-by: kvn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     1
/*
58299
6df94ce3ab2f 8229201: Update Graal
dlong
parents: 55631
diff changeset
     2
 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     4
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     7
 * published by the Free Software Foundation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     8
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    13
 * accompanied this code).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    14
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    18
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    21
 * questions.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    22
 */
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    23
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    24
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    25
package org.graalvm.compiler.lir.amd64;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    26
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    27
import static jdk.vm.ci.amd64.AMD64.k1;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    28
import static jdk.vm.ci.amd64.AMD64.k2;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    29
import static jdk.vm.ci.amd64.AMD64.rdi;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    30
import static jdk.vm.ci.amd64.AMD64.rdx;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    31
import static jdk.vm.ci.amd64.AMD64.rsi;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    32
import static jdk.vm.ci.code.ValueUtil.asRegister;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    33
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    34
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    35
import org.graalvm.compiler.asm.Label;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    36
import org.graalvm.compiler.asm.amd64.AMD64Address;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    37
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    38
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    39
import org.graalvm.compiler.core.common.LIRKind;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    40
import org.graalvm.compiler.lir.LIRInstructionClass;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    41
import org.graalvm.compiler.lir.Opcode;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    42
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    43
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    44
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    45
import jdk.vm.ci.amd64.AMD64;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    46
import jdk.vm.ci.amd64.AMD64Kind;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    47
import jdk.vm.ci.code.Register;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    48
import jdk.vm.ci.meta.Value;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    49
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    50
@Opcode("AMD64_STRING_INFLATE")
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    51
public final class AMD64StringLatin1InflateOp extends AMD64LIRInstruction {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    52
    public static final LIRInstructionClass<AMD64StringLatin1InflateOp> TYPE = LIRInstructionClass.create(AMD64StringLatin1InflateOp.class);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    53
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    54
    @Use({REG}) private Value rsrc;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    55
    @Use({REG}) private Value rdst;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    56
    @Use({REG}) private Value rlen;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    57
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    58
    @Temp({REG}) private Value rsrcTemp;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    59
    @Temp({REG}) private Value rdstTemp;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    60
    @Temp({REG}) private Value rlenTemp;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    61
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    62
    @Temp({REG}) private Value vtmp1;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    63
    @Temp({REG}) private Value rtmp2;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    64
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    65
    public AMD64StringLatin1InflateOp(LIRGeneratorTool tool, Value src, Value dst, Value len) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    66
        super(TYPE);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    67
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    68
        assert asRegister(src).equals(rsi);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    69
        assert asRegister(dst).equals(rdi);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    70
        assert asRegister(len).equals(rdx);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    71
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    72
        rsrcTemp = rsrc = src;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    73
        rdstTemp = rdst = dst;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    74
        rlenTemp = rlen = len;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    75
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    76
        vtmp1 = tool.newVariable(LIRKind.value(AMD64Kind.V512_BYTE));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    77
        rtmp2 = tool.newVariable(LIRKind.value(AMD64Kind.DWORD));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    78
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    79
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    80
    @Override
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    81
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    82
        Register src = asRegister(rsrc);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    83
        Register dst = asRegister(rdst);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    84
        Register len = asRegister(rlen);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    85
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    86
        Register tmp1 = asRegister(vtmp1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    87
        Register tmp2 = asRegister(rtmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    88
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    89
        byteArrayInflate(masm, src, dst, len, tmp1, tmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    90
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    91
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    92
    /**
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    93
     * Inflate a Latin1 string using a byte[] array representation into a UTF16 string using a
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    94
     * char[] array representation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    95
     *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    96
     * @param masm the assembler
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    97
     * @param src (rsi) the start address of source byte[] to be inflated
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    98
     * @param dst (rdi) the start address of destination char[] array
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    99
     * @param len (rdx) the length
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   100
     * @param vtmp (xmm) temporary xmm register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   101
     * @param tmp (gpr) temporary gpr register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   102
     */
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   103
    private static void byteArrayInflate(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register vtmp, Register tmp) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   104
        assert vtmp.getRegisterCategory().equals(AMD64.XMM);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   105
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   106
        Label labelDone = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   107
        Label labelBelowThreshold = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   108
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   109
        assert src.number != dst.number && src.number != len.number && src.number != tmp.number;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   110
        assert dst.number != len.number && dst.number != tmp.number;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   111
        assert len.number != tmp.number;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   112
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   113
        if (masm.supports(AMD64.CPUFeature.AVX512BW) &&
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   114
                        masm.supports(AMD64.CPUFeature.AVX512VL) &&
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   115
                        masm.supports(AMD64.CPUFeature.BMI2)) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   116
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   117
            // If the length of the string is less than 16, we chose not to use the
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   118
            // AVX512 instructions.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   119
            masm.testl(len, -16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   120
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   121
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   122
            Label labelAvx512Tail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   123
            // Test for suitable number chunks with respect to the size of the vector
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   124
            // operation, mask off remaining number of chars (bytes) to inflate (such
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   125
            // that 'len' will always hold the number of bytes left to inflate) after
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   126
            // committing to the vector loop.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   127
            // Adjust vector pointers to upper address bounds and inverse loop index.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   128
            // This will keep the loop condition simple.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   129
            //
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   130
            // NOTE: The above idiom/pattern is used in all the loops below.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   131
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   132
            masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   133
            masm.andl(tmp, -32);     // The vector count (in chars).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   134
            masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelAvx512Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   135
            masm.andl(len, 32 - 1);  // The tail count (in chars).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   136
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   137
            masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   138
            masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   139
            masm.negq(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   140
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   141
            Label labelAvx512Loop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   142
            // Inflate 32 chars per iteration, reading 256-bit compact vectors
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   143
            // and writing 512-bit inflated ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   144
            masm.bind(labelAvx512Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   145
            masm.evpmovzxbw(vtmp, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   146
            masm.evmovdqu16(new AMD64Address(dst, tmp, AMD64Address.Scale.Times2), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   147
            masm.addq(tmp, 32);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   148
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelAvx512Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   149
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   150
            masm.bind(labelAvx512Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   151
            // All done if the tail count is zero.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   152
            masm.testl(len, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   153
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   154
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   155
            masm.kmovq(k2, k1);      // Save k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   156
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   157
            // Compute (1 << N) - 1 = ~(~0 << N), where N is the remaining number
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   158
            // of characters to process.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   159
            masm.movl(tmp, -1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   160
            masm.shlxl(tmp, tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   161
            masm.notl(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   162
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   163
            masm.kmovd(k1, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   164
            masm.evpmovzxbw(vtmp, k1, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   165
            masm.evmovdqu16(new AMD64Address(dst), k1, vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   166
            masm.kmovq(k1, k2);      // Restore k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   167
            masm.jmp(labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   168
        }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   169
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   170
        if (masm.supports(AMD64.CPUFeature.SSE4_1)) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   171
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   172
            Label labelSSETail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   173
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   174
            if (masm.supports(AMD64.CPUFeature.AVX2)) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   175
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   176
                Label labelAvx2Tail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   177
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   178
                masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   179
                masm.andl(tmp, -16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   180
                masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelAvx2Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   181
                masm.andl(len, 16 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   182
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   183
                masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   184
                masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   185
                masm.negq(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   186
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   187
                Label labelAvx2Loop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   188
                // Inflate 16 bytes (chars) per iteration, reading 128-bit compact vectors
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   189
                // and writing 256-bit inflated ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   190
                masm.bind(labelAvx2Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   191
                masm.vpmovzxbw(vtmp, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   192
                masm.vmovdqu(new AMD64Address(dst, tmp, AMD64Address.Scale.Times2), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   193
                masm.addq(tmp, 16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   194
                masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelAvx2Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   195
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   196
                masm.bind(labelBelowThreshold);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   197
                masm.bind(labelAvx2Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   198
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   199
                masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   200
                masm.andl(tmp, -8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   201
                masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   202
                masm.andl(len, 8 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   203
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   204
                // Inflate another 8 bytes before final tail copy.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   205
                masm.pmovzxbw(vtmp, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   206
                masm.movdqu(new AMD64Address(dst), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   207
                masm.addq(src, 8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   208
                masm.addq(dst, 16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   209
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   210
                // Fall-through to labelSSETail.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   211
            } else {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   212
                // When there is no AVX2 support available, we use AVX/SSE support to
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   213
                // inflate into maximum 128-bits per operation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   214
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   215
                masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   216
                masm.andl(tmp, -8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   217
                masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   218
                masm.andl(len, 8 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   219
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   220
                masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   221
                masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   222
                masm.negq(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   223
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   224
                Label labelSSECopy8Loop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   225
                // Inflate 8 bytes (chars) per iteration, reading 64-bit compact vectors
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   226
                // and writing 128-bit inflated ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   227
                masm.bind(labelSSECopy8Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   228
                masm.pmovzxbw(vtmp, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   229
                masm.movdqu(new AMD64Address(dst, tmp, AMD64Address.Scale.Times2), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   230
                masm.addq(tmp, 8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   231
                masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelSSECopy8Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   232
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   233
                // Fall-through to labelSSETail.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   234
            }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   235
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   236
            Label labelCopyChars = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   237
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   238
            masm.bind(labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   239
            masm.cmpl(len, 4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   240
            masm.jccb(AMD64Assembler.ConditionFlag.Less, labelCopyChars);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   241
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   242
            masm.movdl(vtmp, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   243
            masm.pmovzxbw(vtmp, vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   244
            masm.movq(new AMD64Address(dst), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   245
            masm.subq(len, 4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   246
            masm.addq(src, 4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   247
            masm.addq(dst, 8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   248
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   249
            masm.bind(labelCopyChars);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   250
        }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   251
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   252
        // Inflate any remaining characters (bytes) using a vanilla implementation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   253
        masm.testl(len, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   254
        masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   255
        masm.leaq(src, new AMD64Address(src, len, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   256
        masm.leaq(dst, new AMD64Address(dst, len, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   257
        masm.negq(len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   258
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   259
        Label labelCopyCharsLoop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   260
        // Inflate a single byte (char) per iteration.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   261
        masm.bind(labelCopyCharsLoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   262
        masm.movzbl(tmp, new AMD64Address(src, len, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   263
        masm.movw(new AMD64Address(dst, len, AMD64Address.Scale.Times2), tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   264
        masm.incrementq(len, 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   265
        masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   266
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   267
        masm.bind(labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   268
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   269
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   270
}