src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64StringUTF16CompressOp.java
author dlong
Thu, 14 Nov 2019 12:21:00 -0800
changeset 59095 03fbcd06b4c0
parent 58877 aec7bf35d6f5
permissions -rw-r--r--
8233841: Update Graal Reviewed-by: kvn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     1
/*
58299
6df94ce3ab2f 8229201: Update Graal
dlong
parents: 55631
diff changeset
     2
 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     4
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     7
 * published by the Free Software Foundation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     8
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    13
 * accompanied this code).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    14
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    18
 *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    21
 * questions.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    22
 */
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    23
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    24
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    25
package org.graalvm.compiler.lir.amd64;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    26
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    27
import static jdk.vm.ci.amd64.AMD64.k1;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    28
import static jdk.vm.ci.amd64.AMD64.k2;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    29
import static jdk.vm.ci.amd64.AMD64.k3;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    30
import static jdk.vm.ci.amd64.AMD64.rax;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    31
import static jdk.vm.ci.amd64.AMD64.rdi;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    32
import static jdk.vm.ci.amd64.AMD64.rdx;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    33
import static jdk.vm.ci.amd64.AMD64.rsi;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    34
import static jdk.vm.ci.amd64.AMD64.rsp;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    35
import static jdk.vm.ci.code.ValueUtil.asRegister;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    36
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    37
import static org.graalvm.compiler.lir.amd64.AMD64StringLatin1InflateOp.useAVX512ForStringInflateCompress;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    38
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    39
import org.graalvm.compiler.asm.Label;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    40
import org.graalvm.compiler.asm.amd64.AMD64Address;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    41
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    42
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    43
import org.graalvm.compiler.core.common.LIRKind;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    44
import org.graalvm.compiler.lir.LIRInstructionClass;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    45
import org.graalvm.compiler.lir.Opcode;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    46
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    47
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    48
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    49
import jdk.vm.ci.amd64.AMD64;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    50
import jdk.vm.ci.amd64.AMD64Kind;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    51
import jdk.vm.ci.code.Register;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    52
import jdk.vm.ci.meta.Value;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    53
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    54
@Opcode("AMD64_STRING_COMPRESS")
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    55
public final class AMD64StringUTF16CompressOp extends AMD64LIRInstruction {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    56
    public static final LIRInstructionClass<AMD64StringUTF16CompressOp> TYPE = LIRInstructionClass.create(AMD64StringUTF16CompressOp.class);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    57
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    58
    @Def({REG}) private Value rres;
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    59
    @Use({REG}) private Value rsrc;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    60
    @Use({REG}) private Value rdst;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    61
    @Use({REG}) private Value rlen;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    62
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    63
    @Temp({REG}) private Value rsrcTemp;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    64
    @Temp({REG}) private Value rdstTemp;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    65
    @Temp({REG}) private Value rlenTemp;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    66
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    67
    @Temp({REG}) private Value vtmp1;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    68
    @Temp({REG}) private Value vtmp2;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    69
    @Temp({REG}) private Value vtmp3;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    70
    @Temp({REG}) private Value vtmp4;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    71
    @Temp({REG}) private Value rtmp5;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    72
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    73
    public AMD64StringUTF16CompressOp(LIRGeneratorTool tool, Value res, Value src, Value dst, Value len) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    74
        super(TYPE);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    75
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    76
        assert asRegister(src).equals(rsi);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    77
        assert asRegister(dst).equals(rdi);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    78
        assert asRegister(len).equals(rdx);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    79
        assert asRegister(res).equals(rax);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    80
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    81
        rres = res;
55631
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    82
        rsrcTemp = rsrc = src;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    83
        rdstTemp = rdst = dst;
1831d28d34bb 8227034: Graal crash with gcbasher
never
parents: 52578
diff changeset
    84
        rlenTemp = rlen = len;
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    85
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
    86
        LIRKind vkind = useAVX512ForStringInflateCompress(tool.target()) ? LIRKind.value(AMD64Kind.V512_BYTE) : LIRKind.value(AMD64Kind.V128_BYTE);
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    87
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    88
        vtmp1 = tool.newVariable(vkind);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    89
        vtmp2 = tool.newVariable(vkind);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    90
        vtmp3 = tool.newVariable(vkind);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    91
        vtmp4 = tool.newVariable(vkind);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    92
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    93
        rtmp5 = tool.newVariable(LIRKind.value(AMD64Kind.DWORD));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    94
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    95
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    96
    @Override
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    97
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    98
        Register res = asRegister(rres);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
    99
        Register src = asRegister(rsrc);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   100
        Register dst = asRegister(rdst);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   101
        Register len = asRegister(rlen);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   102
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   103
        Register tmp1 = asRegister(vtmp1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   104
        Register tmp2 = asRegister(vtmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   105
        Register tmp3 = asRegister(vtmp3);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   106
        Register tmp4 = asRegister(vtmp4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   107
        Register tmp5 = asRegister(rtmp5);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   108
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   109
        charArrayCompress(masm, src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   110
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   111
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   112
    /**
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   113
     * Compress a UTF16 string which de facto is a Latin1 string into a byte array representation
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   114
     * (buffer).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   115
     *
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   116
     * @param masm the assembler
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   117
     * @param src (rsi) the start address of source char[] to be compressed
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   118
     * @param dst (rdi) the start address of destination byte[] vector
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   119
     * @param len (rdx) the length
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   120
     * @param tmp1 (xmm) temporary xmm register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   121
     * @param tmp2 (xmm) temporary xmm register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   122
     * @param tmp3 (xmm) temporary xmm register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   123
     * @param tmp4 (xmm) temporary xmm register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   124
     * @param tmp (gpr) temporary gpr register
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   125
     * @param res (rax) the result code (length on success, zero otherwise)
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   126
     */
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   127
    private static void charArrayCompress(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp1,
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   128
                    Register tmp2, Register tmp3, Register tmp4, Register tmp, Register res) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   129
        assert tmp1.getRegisterCategory().equals(AMD64.XMM);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   130
        assert tmp2.getRegisterCategory().equals(AMD64.XMM);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   131
        assert tmp3.getRegisterCategory().equals(AMD64.XMM);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   132
        assert tmp4.getRegisterCategory().equals(AMD64.XMM);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   133
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   134
        Label labelReturnLength = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   135
        Label labelReturnZero = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   136
        Label labelDone = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   137
        Label labelBelowThreshold = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   138
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   139
        assert len.number != res.number;
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   140
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   141
        masm.push(len);      // Save length for return.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   142
58877
aec7bf35d6f5 8233273: Update Graal
dlong
parents: 58299
diff changeset
   143
        if (useAVX512ForStringInflateCompress(masm.target)) {
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   144
            Label labelRestoreK1ReturnZero = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   145
            Label labelAvxPostAlignment = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   146
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   147
            // If the length of the string is less than 32, we chose not to use the
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   148
            // AVX512 instructions.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   149
            masm.testl(len, -32);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   150
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   151
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   152
            // First check whether a character is compressible (<= 0xff).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   153
            // Create mask to test for Unicode chars inside (zmm) vector.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   154
            masm.movl(res, 0x00ff);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   155
            masm.evpbroadcastw(tmp2, res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   156
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   157
            masm.kmovq(k3, k1);      // Save k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   158
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   159
            masm.testl(len, -64);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   160
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelAvxPostAlignment);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   161
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   162
            masm.movl(tmp, dst);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   163
            masm.andl(tmp, (32 - 1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   164
            masm.negl(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   165
            masm.andl(tmp, (32 - 1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   166
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   167
            // bail out when there is nothing to be done
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   168
            masm.testl(tmp, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   169
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelAvxPostAlignment);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   170
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   171
            // Compute (1 << N) - 1 = ~(~0 << N), where N is the remaining number
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   172
            // of characters to process.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   173
            masm.movl(res, -1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   174
            masm.shlxl(res, res, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   175
            masm.notl(res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   176
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   177
            masm.kmovd(k1, res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   178
            masm.evmovdqu16(tmp1, k1, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   179
            masm.evpcmpuw(k2, k1, tmp1, tmp2, 2 /* le */);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   180
            masm.ktestd(k2, k1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   181
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelRestoreK1ReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   182
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   183
            masm.evpmovwb(new AMD64Address(dst), k1, tmp1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   184
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   185
            masm.addq(src, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   186
            masm.addq(src, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   187
            masm.addq(dst, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   188
            masm.subl(len, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   189
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   190
            masm.bind(labelAvxPostAlignment);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   191
            // end of alignment
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   192
            Label labelAvx512LoopTail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   193
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   194
            masm.movl(tmp, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   195
            masm.andl(tmp, -32);         // The vector count (in chars).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   196
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelAvx512LoopTail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   197
            masm.andl(len, 32 - 1);      // The tail count (in chars).
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   198
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   199
            masm.leaq(src, new AMD64Address(src, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   200
            masm.leaq(dst, new AMD64Address(dst, tmp, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   201
            masm.negq(tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   202
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   203
            Label labelAvx512Loop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   204
            // Test and compress 32 chars per iteration, reading 512-bit vectors and
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   205
            // writing 256-bit compressed ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   206
            masm.bind(labelAvx512Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   207
            masm.evmovdqu16(tmp1, new AMD64Address(src, tmp, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   208
            masm.evpcmpuw(k2, tmp1, tmp2, 2 /* le */);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   209
            masm.kortestd(k2, k2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   210
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelRestoreK1ReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   211
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   212
            // All 32 chars in the current vector (chunk) are valid for compression,
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   213
            // write truncated byte elements to memory.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   214
            masm.evpmovwb(new AMD64Address(dst, tmp, AMD64Address.Scale.Times1), tmp1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   215
            masm.addq(tmp, 32);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   216
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelAvx512Loop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   217
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   218
            masm.bind(labelAvx512LoopTail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   219
            masm.kmovq(k1, k3);      // Restore k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   220
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   221
            // All done if the tail count is zero.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   222
            masm.testl(len, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   223
            masm.jcc(AMD64Assembler.ConditionFlag.Zero, labelReturnLength);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   224
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   225
            // Compute (1 << N) - 1 = ~(~0 << N), where N is the remaining number
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   226
            // of characters to process.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   227
            masm.movl(res, -1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   228
            masm.shlxl(res, res, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   229
            masm.notl(res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   230
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   231
            masm.kmovd(k1, res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   232
            masm.evmovdqu16(tmp1, k1, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   233
            masm.evpcmpuw(k2, k1, tmp1, tmp2, 2 /* le */);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   234
            masm.ktestd(k2, k1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   235
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelRestoreK1ReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   236
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   237
            masm.evpmovwb(new AMD64Address(dst), k1, tmp1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   238
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   239
            masm.kmovq(k1, k3);      // Restore k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   240
            masm.jmp(labelReturnLength);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   241
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   242
            masm.bind(labelRestoreK1ReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   243
            masm.kmovq(k1, k3);      // Restore k1
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   244
            masm.jmp(labelReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   245
        }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   246
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   247
        if (masm.supports(AMD64.CPUFeature.SSE4_2)) {
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   248
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   249
            Label labelSSETail = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   250
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   251
            masm.bind(labelBelowThreshold);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   252
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   253
            masm.movl(tmp, 0xff00ff00);  // Create mask to test for Unicode chars in vectors.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   254
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   255
            masm.movl(res, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   256
            masm.andl(res, -16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   257
            masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   258
            masm.andl(len, 16 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   259
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   260
            // Compress 16 chars per iteration.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   261
            masm.movdl(tmp1, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   262
            masm.pshufd(tmp1, tmp1, 0);    // Store Unicode mask in 'vtmp1'.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   263
            masm.pxor(tmp4, tmp4);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   264
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   265
            masm.leaq(src, new AMD64Address(src, res, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   266
            masm.leaq(dst, new AMD64Address(dst, res, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   267
            masm.negq(res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   268
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   269
            Label lSSELoop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   270
            // Test and compress 16 chars per iteration, reading 128-bit vectors and
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   271
            // writing 64-bit compressed ditto.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   272
            masm.bind(lSSELoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   273
            masm.movdqu(tmp2, new AMD64Address(src, res, AMD64Address.Scale.Times2));     // load
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   274
                                                                                          // 1st 8
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   275
                                                                                          // characters
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   276
            masm.movdqu(tmp3, new AMD64Address(src, res, AMD64Address.Scale.Times2, 16)); // load
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   277
                                                                                          // next 8
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   278
                                                                                          // characters
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   279
            masm.por(tmp4, tmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   280
            masm.por(tmp4, tmp3);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   281
            masm.ptest(tmp4, tmp1);        // Check for Unicode chars in vector.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   282
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   283
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   284
            masm.packuswb(tmp2, tmp3);     // Only ASCII chars; compress each to a byte.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   285
            masm.movdqu(new AMD64Address(dst, res, AMD64Address.Scale.Times1), tmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   286
            masm.addq(res, 16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   287
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, lSSELoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   288
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   289
            Label labelCopyChars = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   290
            // Test and compress another 8 chars before final tail copy.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   291
            masm.bind(labelSSETail);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   292
            masm.movl(res, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   293
            masm.andl(res, -8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   294
            masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelCopyChars);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   295
            masm.andl(len, 8 - 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   296
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   297
            masm.movdl(tmp1, tmp);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   298
            masm.pshufd(tmp1, tmp1, 0);    // Store Unicode mask in 'vtmp1'.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   299
            masm.pxor(tmp3, tmp3);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   300
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   301
            masm.movdqu(tmp2, new AMD64Address(src));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   302
            masm.ptest(tmp2, tmp1);        // Check for Unicode chars in vector.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   303
            masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   304
            masm.packuswb(tmp2, tmp3);     // Only ASCII chars; compress each to a byte.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   305
            masm.movq(new AMD64Address(dst), tmp2);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   306
            masm.addq(src, 16);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   307
            masm.addq(dst, 8);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   308
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   309
            masm.bind(labelCopyChars);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   310
        }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   311
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   312
        // Compress any remaining characters using a vanilla implementation.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   313
        masm.testl(len, len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   314
        masm.jccb(AMD64Assembler.ConditionFlag.Zero, labelReturnLength);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   315
        masm.leaq(src, new AMD64Address(src, len, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   316
        masm.leaq(dst, new AMD64Address(dst, len, AMD64Address.Scale.Times1));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   317
        masm.negq(len);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   318
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   319
        Label labelCopyCharsLoop = new Label();
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   320
        // Compress a single character per iteration.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   321
        masm.bind(labelCopyCharsLoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   322
        masm.movzwl(res, new AMD64Address(src, len, AMD64Address.Scale.Times2));
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   323
        masm.testl(res, 0xff00);     // Check if Unicode character.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   324
        masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   325
        // An ASCII character; compress to a byte.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   326
        masm.movb(new AMD64Address(dst, len, AMD64Address.Scale.Times1), res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   327
        masm.incrementq(len, 1);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   328
        masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   329
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   330
        // If compression succeeded, return the length.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   331
        masm.bind(labelReturnLength);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   332
        masm.pop(res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   333
        masm.jmpb(labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   334
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   335
        // If compression failed, return 0.
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   336
        masm.bind(labelReturnZero);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   337
        masm.xorl(res, res);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   338
        masm.addq(rsp, 8 /* wordSize */);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   339
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   340
        masm.bind(labelDone);
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   341
    }
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   342
59095
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   343
    @Override
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   344
    public boolean needsClearUpperVectorRegisters() {
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   345
        return true;
03fbcd06b4c0 8233841: Update Graal
dlong
parents: 58877
diff changeset
   346
    }
52578
7dd81e82d083 8210777: Update Graal
dlong
parents:
diff changeset
   347
}