src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64StringLatin1InflateOp.java
author dlong
Thu, 14 Nov 2019 12:21:00 -0800
changeset 59095 03fbcd06b4c0
parent 58877 aec7bf35d6f5
permissions -rw-r--r--
8233841: Update Graal Reviewed-by: kvn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     1
/*
58299
6df94ce3ab2f 8229201: Update Graal
dlong
parents: 55631
diff changeset
     2
 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     4
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     7
 * published by the Free Software Foundation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     8
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    13
 * accompanied this code).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    14
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    18
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    21
 * questions.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    22
 */
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    23
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    24
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    25
package org.graalvm.compiler.lir.amd64;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    26
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    27
import static jdk.vm.ci.amd64.AMD64.k1;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    28
import static jdk.vm.ci.amd64.AMD64.k2;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    29
import static jdk.vm.ci.amd64.AMD64.rdi;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    30
import static jdk.vm.ci.amd64.AMD64.rdx;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    31
import static jdk.vm.ci.amd64.AMD64.rsi;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    32
import static jdk.vm.ci.code.ValueUtil.asRegister;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    33
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    34
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    35
import java.util.EnumSet;
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    36
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    37
import org.graalvm.compiler.asm.Label;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    38
import org.graalvm.compiler.asm.amd64.AMD64Address;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    39
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    40
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    41
import org.graalvm.compiler.core.common.LIRKind;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    42
import org.graalvm.compiler.lir.LIRInstructionClass;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    43
import org.graalvm.compiler.lir.Opcode;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    44
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    45
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    46
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    47
import jdk.vm.ci.amd64.AMD64;
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    48
import jdk.vm.ci.amd64.AMD64.CPUFeature;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    49
import jdk.vm.ci.amd64.AMD64Kind;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    50
import jdk.vm.ci.code.Register;
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    51
import jdk.vm.ci.code.TargetDescription;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    52
import jdk.vm.ci.meta.Value;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    53
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    54
@Opcode("AMD64_STRING_INFLATE")
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    55
public final class AMD64StringLatin1InflateOp extends AMD64LIRInstruction {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    56
    public static final LIRInstructionClass<AMD64StringLatin1InflateOp> TYPE = LIRInstructionClass.create(AMD64StringLatin1InflateOp.class);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    57
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    58
    @Use({REG}) private Value rsrc;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    59
    @Use({REG}) private Value rdst;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    60
    @Use({REG}) private Value rlen;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    61
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    62
    @Temp({REG}) private Value rsrcTemp;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    63
    @Temp({REG}) private Value rdstTemp;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    64
    @Temp({REG}) private Value rlenTemp;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    65
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    66
    @Temp({REG}) private Value vtmp1;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    67
    @Temp({REG}) private Value rtmp2;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    68
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    69
    public AMD64StringLatin1InflateOp(LIRGeneratorTool tool, Value src, Value dst, Value len) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    70
        super(TYPE);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    71
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    72
        assert asRegister(src).equals(rsi);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    73
        assert asRegister(dst).equals(rdi);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    74
        assert asRegister(len).equals(rdx);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    75
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    76
        rsrcTemp = rsrc = src;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    77
        rdstTemp = rdst = dst;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    78
        rlenTemp = rlen = len;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    79
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    80
        vtmp1 = useAVX512ForStringInflateCompress(tool.target()) ? tool.newVariable(LIRKind.value(AMD64Kind.V512_BYTE)) : tool.newVariable(LIRKind.value(AMD64Kind.V128_BYTE));
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    81
        rtmp2 = tool.newVariable(LIRKind.value(AMD64Kind.DWORD));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    82
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    83
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    84
    @Override
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    85
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    86
        Register src = asRegister(rsrc);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    87
        Register dst = asRegister(rdst);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    88
        Register len = asRegister(rlen);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    89
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    90
        Register tmp1 = asRegister(vtmp1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    91
        Register tmp2 = asRegister(rtmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    92
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    93
        byteArrayInflate(masm, src, dst, len, tmp1, tmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    94
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    95
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    96
    public static boolean useAVX512ForStringInflateCompress(TargetDescription target) {
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    97
        EnumSet<CPUFeature> features = ((AMD64) target.arch).getFeatures();
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    98
        return features.contains(AMD64.CPUFeature.AVX512BW) &&
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    99
                        features.contains(AMD64.CPUFeature.AVX512VL) &&
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
   100
                        features.contains(AMD64.CPUFeature.BMI2);
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
   101
    }
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
   102
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   103
    /**
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   104
     * Inflate a Latin1 string using a byte[] array representation into a UTF16 string using a
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   105
     * char[] array representation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   106
     *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   107
     * @param masm the assembler
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   108
     * @param src (rsi) the start address of source byte[] to be inflated
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   109
     * @param dst (rdi) the start address of destination char[] array
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   110
     * @param len (rdx) the length
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   111
     * @param vtmp (xmm) temporary xmm register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   112
     * @param tmp (gpr) temporary gpr register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   113
     */
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   114
    private static void byteArrayInflate(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register vtmp, Register tmp) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   115
        assert vtmp.getRegisterCategory().equals(AMD64.XMM);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   116
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   117
        Label labelDone = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   118
        Label labelBelowThreshold = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   119
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   120
        assert src.number != dst.number && src.number != len.number && src.number != tmp.number;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   121
        assert dst.number != len.number && dst.number != tmp.number;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   122
        assert len.number != tmp.number;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   123
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
   124
        if (useAVX512ForStringInflateCompress(masm.target)) {
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   125
            // If the length of the string is less than 16, we chose not to use the
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   126
            // AVX512 instructions.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   127
            masm.testl(len, -16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   128
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   129
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   130
            Label labelAvx512Tail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   131
            // Test for suitable number chunks with respect to the size of the vector
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   132
            // operation, mask off remaining number of chars (bytes) to inflate (such
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   133
            // that 'len' will always hold the number of bytes left to inflate) after
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   134
            // committing to the vector loop.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   135
            // Adjust vector pointers to upper address bounds and inverse loop index.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   136
            // This will keep the loop condition simple.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   137
            //
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   138
            // NOTE: The above idiom/pattern is used in all the loops below.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   139
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   140
            masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   141
            masm.andl(tmp, -32);     // The vector count (in chars).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   142
            masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelAvx512Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   143
            masm.andl(len, 32 - 1);  // The tail count (in chars).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   144
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   145
            masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   146
            masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   147
            masm.negq(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   148
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   149
            Label labelAvx512Loop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   150
            // Inflate 32 chars per iteration, reading 256-bit compact vectors
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   151
            // and writing 512-bit inflated ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   152
            masm.bind(labelAvx512Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   153
            masm.evpmovzxbw(vtmp, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   154
            masm.evmovdqu16(new AMD64Address(dst, tmp, AMD64Address.Scale.Times2), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   155
            masm.addq(tmp, 32);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   156
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelAvx512Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   157
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   158
            masm.bind(labelAvx512Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   159
            // All done if the tail count is zero.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   160
            masm.testl(len, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   161
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   162
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   163
            masm.kmovq(k2, k1);      // Save k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   164
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   165
            // Compute (1 << N) - 1 = ~(~0 << N), where N is the remaining number
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   166
            // of characters to process.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   167
            masm.movl(tmp, -1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   168
            masm.shlxl(tmp, tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   169
            masm.notl(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   170
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   171
            masm.kmovd(k1, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   172
            masm.evpmovzxbw(vtmp, k1, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   173
            masm.evmovdqu16(new AMD64Address(dst), k1, vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   174
            masm.kmovq(k1, k2);      // Restore k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   175
            masm.jmp(labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   176
        }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   177
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   178
        if (masm.supports(AMD64.CPUFeature.SSE4_1)) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   179
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   180
            Label labelSSETail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   181
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   182
            if (masm.supports(AMD64.CPUFeature.AVX2)) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   183
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   184
                Label labelAvx2Tail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   185
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   186
                masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   187
                masm.andl(tmp, -16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   188
                masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelAvx2Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   189
                masm.andl(len, 16 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   190
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   191
                masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   192
                masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   193
                masm.negq(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   194
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   195
                Label labelAvx2Loop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   196
                // Inflate 16 bytes (chars) per iteration, reading 128-bit compact vectors
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   197
                // and writing 256-bit inflated ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   198
                masm.bind(labelAvx2Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   199
                masm.vpmovzxbw(vtmp, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   200
                masm.vmovdqu(new AMD64Address(dst, tmp, AMD64Address.Scale.Times2), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   201
                masm.addq(tmp, 16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   202
                masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelAvx2Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   203
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   204
                masm.bind(labelBelowThreshold);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   205
                masm.bind(labelAvx2Tail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   206
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   207
                masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   208
                masm.andl(tmp, -8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   209
                masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   210
                masm.andl(len, 8 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   211
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   212
                // Inflate another 8 bytes before final tail copy.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   213
                masm.pmovzxbw(vtmp, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   214
                masm.movdqu(new AMD64Address(dst), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   215
                masm.addq(src, 8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   216
                masm.addq(dst, 16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   217
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   218
                // Fall-through to labelSSETail.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   219
            } else {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   220
                // When there is no AVX2 support available, we use AVX/SSE support to
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   221
                // inflate into maximum 128-bits per operation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   222
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   223
                masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   224
                masm.andl(tmp, -8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   225
                masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   226
                masm.andl(len, 8 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   227
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   228
                masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   229
                masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   230
                masm.negq(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   231
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   232
                Label labelSSECopy8Loop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   233
                // Inflate 8 bytes (chars) per iteration, reading 64-bit compact vectors
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   234
                // and writing 128-bit inflated ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   235
                masm.bind(labelSSECopy8Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   236
                masm.pmovzxbw(vtmp, new AMD64Address(src, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   237
                masm.movdqu(new AMD64Address(dst, tmp, AMD64Address.Scale.Times2), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   238
                masm.addq(tmp, 8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   239
                masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelSSECopy8Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   240
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   241
                // Fall-through to labelSSETail.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   242
            }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   243
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   244
            Label labelCopyChars = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   245
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   246
            masm.bind(labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   247
            masm.cmpl(len, 4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   248
            masm.jccb(AMD64Assembler.ConditionFlag.Less, labelCopyChars);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   249
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   250
            masm.movdl(vtmp, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   251
            masm.pmovzxbw(vtmp, vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   252
            masm.movq(new AMD64Address(dst), vtmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   253
            masm.subq(len, 4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   254
            masm.addq(src, 4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   255
            masm.addq(dst, 8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   256
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   257
            masm.bind(labelCopyChars);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   258
        }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   259
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   260
        // Inflate any remaining characters (bytes) using a vanilla implementation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   261
        masm.testl(len, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   262
        masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   263
        masm.leaq(src, new AMD64Address(src, len, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   264
        masm.leaq(dst, new AMD64Address(dst, len, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   265
        masm.negq(len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   266
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   267
        Label labelCopyCharsLoop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   268
        // Inflate a single byte (char) per iteration.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   269
        masm.bind(labelCopyCharsLoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   270
        masm.movzbl(tmp, new AMD64Address(src, len, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   271
        masm.movw(new AMD64Address(dst, len, AMD64Address.Scale.Times2), tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   272
        masm.incrementq(len, 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   273
        masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   274
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   275
        masm.bind(labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   276
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   277
59095
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   278
    @Override
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   279
    public boolean needsClearUpperVectorRegisters() {
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   280
        return true;
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   281
    }
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   282
}