--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayCompareToOp.java Fri Mar 16 22:59:32 2018 -0700
@@ -0,0 +1,595 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.lir.amd64;
+
+import static jdk.vm.ci.amd64.AMD64.k7;
+import static jdk.vm.ci.amd64.AMD64.rax;
+import static jdk.vm.ci.amd64.AMD64.rcx;
+import static jdk.vm.ci.amd64.AMD64.rdx;
+import static jdk.vm.ci.code.ValueUtil.asRegister;
+import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
+import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
+
+import java.lang.reflect.Array;
+import java.lang.reflect.Field;
+import java.util.EnumSet;
+
+import org.graalvm.compiler.asm.Label;
+import org.graalvm.compiler.asm.amd64.AMD64Address;
+import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
+import org.graalvm.compiler.asm.amd64.AMD64Assembler.AvxVectorLen;
+import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
+import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
+import org.graalvm.compiler.core.common.LIRKind;
+import org.graalvm.compiler.lir.LIRInstructionClass;
+import org.graalvm.compiler.lir.Opcode;
+import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
+import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
+
+import jdk.vm.ci.amd64.AMD64;
+import jdk.vm.ci.amd64.AMD64.CPUFeature;
+import jdk.vm.ci.amd64.AMD64Kind;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.Value;
+import sun.misc.Unsafe;
+
+/**
+ * Emits code which compares two arrays lexicographically. If the CPU supports any vector
+ * instructions specialized code is emitted to leverage these instructions.
+ */
+@Opcode("ARRAY_COMPARE_TO")
+public final class AMD64ArrayCompareToOp extends AMD64LIRInstruction {
+ public static final LIRInstructionClass<AMD64ArrayCompareToOp> TYPE = LIRInstructionClass.create(AMD64ArrayCompareToOp.class);
+
+ private final JavaKind kind1;
+ private final JavaKind kind2;
+ private final int array1BaseOffset;
+ private final int array2BaseOffset;
+
+ @Def({REG}) protected Value resultValue;
+ @Alive({REG}) protected Value array1Value;
+ @Alive({REG}) protected Value array2Value;
+ @Alive({REG}) protected Value length1Value;
+ @Alive({REG}) protected Value length2Value;
+ @Temp({REG}) protected Value temp1;
+ @Temp({REG}) protected Value temp2;
+
+ @Temp({REG, ILLEGAL}) protected Value vectorTemp1;
+
+ public AMD64ArrayCompareToOp(LIRGeneratorTool tool, JavaKind kind1, JavaKind kind2, Value result, Value array1, Value array2, Value length1, Value length2) {
+ super(TYPE);
+ this.kind1 = kind1;
+ this.kind2 = kind2;
+
+ // Both offsets should be the same but better be safe than sorry.
+ Class<?> array1Class = Array.newInstance(kind1.toJavaClass(), 0).getClass();
+ Class<?> array2Class = Array.newInstance(kind2.toJavaClass(), 0).getClass();
+ this.array1BaseOffset = UNSAFE.arrayBaseOffset(array1Class);
+ this.array2BaseOffset = UNSAFE.arrayBaseOffset(array2Class);
+
+ this.resultValue = result;
+ this.array1Value = array1;
+ this.array2Value = array2;
+ this.length1Value = length1;
+ this.length2Value = length2;
+
+ // Allocate some temporaries.
+ this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
+ this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
+
+ // We only need the vector temporaries if we generate SSE code.
+ if (supportsSSE42(tool.target())) {
+ this.vectorTemp1 = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
+ } else {
+ this.vectorTemp1 = Value.ILLEGAL;
+ }
+ }
+
+ private static boolean supportsSSE42(TargetDescription target) {
+ AMD64 arch = (AMD64) target.arch;
+ return arch.getFeatures().contains(CPUFeature.SSE4_2);
+ }
+
+ private static boolean supportsAVX2(TargetDescription target) {
+ AMD64 arch = (AMD64) target.arch;
+ return arch.getFeatures().contains(CPUFeature.AVX2);
+ }
+
+ private static boolean supportsAVX512VLBW(TargetDescription target) {
+ AMD64 arch = (AMD64) target.arch;
+ EnumSet<CPUFeature> features = arch.getFeatures();
+ return features.contains(CPUFeature.AVX512BW) && features.contains(CPUFeature.AVX512VL);
+ }
+
+ @Override
+ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
+ Register result = asRegister(resultValue);
+ Register str1 = asRegister(temp1);
+ Register str2 = asRegister(temp2);
+
+ // Load array base addresses.
+ masm.leaq(str1, new AMD64Address(asRegister(array1Value), array1BaseOffset));
+ masm.leaq(str2, new AMD64Address(asRegister(array2Value), array2BaseOffset));
+ Register cnt1 = asRegister(length1Value);
+ Register cnt2 = asRegister(length2Value);
+
+ // Checkstyle: stop
+ Label LENGTH_DIFF_LABEL = new Label();
+ Label POP_LABEL = new Label();
+ Label DONE_LABEL = new Label();
+ Label WHILE_HEAD_LABEL = new Label();
+ Label COMPARE_WIDE_VECTORS_LOOP_FAILED = new Label(); // used only _LP64 && AVX3
+ int stride, stride2;
+ int adr_stride = -1;
+ int adr_stride1 = -1;
+ int adr_stride2 = -1;
+ // Checkstyle: resume
+ int stride2x2 = 0x40;
+ AMD64Address.Scale scale = null;
+ AMD64Address.Scale scale1 = null;
+ AMD64Address.Scale scale2 = null;
+
+ // if (ae != StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ stride2x2 = 0x20;
+ }
+
+ // if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+ if (kind1 != kind2) {
+ masm.shrl(cnt2, 1);
+ }
+ // Compute the minimum of the string lengths and the
+ // difference of the string lengths (stack).
+ // Do the conditional move stuff
+ masm.movl(result, cnt1);
+ masm.subl(cnt1, cnt2);
+ masm.push(cnt1);
+ masm.cmovl(ConditionFlag.LessEqual, cnt2, result); // cnt2 = min(cnt1, cnt2)
+
+ // Is the minimum length zero?
+ masm.testl(cnt2, cnt2);
+ masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
+ // if (ae == StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ // Load first bytes
+ masm.movzbl(result, new AMD64Address(str1, 0)); // result = str1[0]
+ masm.movzbl(cnt1, new AMD64Address(str2, 0)); // cnt1 = str2[0]
+ // } else if (ae == StrIntrinsicNode::UU) {
+ } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
+ // Load first characters
+ masm.movzwl(result, new AMD64Address(str1, 0));
+ masm.movzwl(cnt1, new AMD64Address(str2, 0));
+ } else {
+ masm.movzbl(result, new AMD64Address(str1, 0));
+ masm.movzwl(cnt1, new AMD64Address(str2, 0));
+ }
+ masm.subl(result, cnt1);
+ masm.jcc(ConditionFlag.NotZero, POP_LABEL);
+
+ // if (ae == StrIntrinsicNode::UU) {
+ if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
+ // Divide length by 2 to get number of chars
+ masm.shrl(cnt2, 1);
+ }
+ masm.cmpl(cnt2, 1);
+ masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
+
+ // Check if the strings start at the same location and setup scale and stride
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.cmpptr(str1, str2);
+ masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
+ // if (ae == StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ scale = AMD64Address.Scale.Times1;
+ stride = 16;
+ } else {
+ scale = AMD64Address.Scale.Times2;
+ stride = 8;
+ }
+ } else {
+ scale1 = AMD64Address.Scale.Times1;
+ scale2 = AMD64Address.Scale.Times2;
+ // scale not used
+ stride = 8;
+ }
+
+ // if (UseAVX >= 2 && UseSSE42Intrinsics) {
+ if (supportsAVX2(crb.target) && supportsSSE42(crb.target)) {
+ Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
+
+ // Checkstyle: stop
+ Label COMPARE_WIDE_VECTORS = new Label();
+ Label VECTOR_NOT_EQUAL = new Label();
+ Label COMPARE_WIDE_TAIL = new Label();
+ Label COMPARE_SMALL_STR = new Label();
+ Label COMPARE_WIDE_VECTORS_LOOP = new Label();
+ Label COMPARE_16_CHARS = new Label();
+ Label COMPARE_INDEX_CHAR = new Label();
+ Label COMPARE_WIDE_VECTORS_LOOP_AVX2 = new Label();
+ Label COMPARE_TAIL_LONG = new Label();
+ Label COMPARE_WIDE_VECTORS_LOOP_AVX3 = new Label(); // used only _LP64 && AVX3
+ // Checkstyle: resume
+
+ int pcmpmask = 0x19;
+ // if (ae == StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ pcmpmask &= ~0x01;
+ }
+
+ // Setup to compare 16-chars (32-bytes) vectors,
+ // start from first character again because it has aligned address.
+ // if (ae == StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ stride2 = 32;
+ } else {
+ stride2 = 16;
+ }
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ adr_stride = stride << scale.log2;
+ } else {
+ adr_stride1 = 8; // stride << scale1;
+ adr_stride2 = 16; // stride << scale2;
+ }
+
+ assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
+ // rax and rdx are used by pcmpestri as elements counters
+ masm.movl(result, cnt2);
+ masm.andl(cnt2, ~(stride2 - 1)); // cnt2 holds the vector count
+ masm.jcc(ConditionFlag.Zero, COMPARE_TAIL_LONG);
+
+ // fast path : compare first 2 8-char vectors.
+ masm.bind(COMPARE_16_CHARS);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.movdqu(vec1, new AMD64Address(str1, 0));
+ } else {
+ masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
+ }
+ masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
+ masm.jccb(ConditionFlag.Below, COMPARE_INDEX_CHAR);
+
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.movdqu(vec1, new AMD64Address(str1, adr_stride));
+ masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride), pcmpmask);
+ } else {
+ masm.pmovzxbw(vec1, new AMD64Address(str1, adr_stride1));
+ masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride2), pcmpmask);
+ }
+ masm.jccb(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS);
+ masm.addl(cnt1, stride);
+
+ // Compare the characters at index in cnt1
+ masm.bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
+ loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
+ masm.subl(result, cnt2);
+ masm.jmp(POP_LABEL);
+
+ // Setup the registers to start vector comparison loop
+ masm.bind(COMPARE_WIDE_VECTORS);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.leaq(str1, new AMD64Address(str1, result, scale));
+ masm.leaq(str2, new AMD64Address(str2, result, scale));
+ } else {
+ masm.leaq(str1, new AMD64Address(str1, result, scale1));
+ masm.leaq(str2, new AMD64Address(str2, result, scale2));
+ }
+ masm.subl(result, stride2);
+ masm.subl(cnt2, stride2);
+ masm.jcc(ConditionFlag.Zero, COMPARE_WIDE_TAIL);
+ masm.negq(result);
+
+ // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
+ masm.bind(COMPARE_WIDE_VECTORS_LOOP);
+
+ // if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
+ if (supportsAVX512VLBW(crb.target)) {
+ masm.cmpl(cnt2, stride2x2);
+ masm.jccb(ConditionFlag.Below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
+ masm.testl(cnt2, stride2x2 - 1); // cnt2 holds the vector count
+ // means we cannot subtract by 0x40
+ masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX2);
+
+ masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.evmovdquq(vec1, new AMD64Address(str1, result, scale), AvxVectorLen.AVX_512bit);
+ // k7 == 11..11, if operands equal, otherwise k7 has some 0
+ masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale), AvxVectorLen.AVX_512bit);
+ } else {
+ masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_512bit);
+ // k7 == 11..11, if operands equal, otherwise k7 has some 0
+ masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale2), AvxVectorLen.AVX_512bit);
+ }
+ masm.kortestql(k7, k7);
+ masm.jcc(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
+ masm.addq(result, stride2x2); // update since we already compared at this addr
+ masm.subl(cnt2, stride2x2); // and sub the size too
+ masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
+
+ masm.vpxor(vec1, vec1, vec1);
+ masm.jmpb(COMPARE_WIDE_TAIL);
+ }
+
+ masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.vmovdqu(vec1, new AMD64Address(str1, result, scale));
+ masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale));
+ } else {
+ masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_256bit);
+ masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2));
+ }
+ masm.vptest(vec1, vec1);
+ masm.jcc(ConditionFlag.NotZero, VECTOR_NOT_EQUAL);
+ masm.addq(result, stride2);
+ masm.subl(cnt2, stride2);
+ masm.jcc(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP);
+ // clean upper bits of YMM registers
+ masm.vpxor(vec1, vec1, vec1);
+
+ // compare wide vectors tail
+ masm.bind(COMPARE_WIDE_TAIL);
+ masm.testq(result, result);
+ masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
+
+ masm.movl(result, stride2);
+ masm.movl(cnt2, result);
+ masm.negq(result);
+ masm.jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
+
+ // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
+ masm.bind(VECTOR_NOT_EQUAL);
+ // clean upper bits of YMM registers
+ masm.vpxor(vec1, vec1, vec1);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.leaq(str1, new AMD64Address(str1, result, scale));
+ masm.leaq(str2, new AMD64Address(str2, result, scale));
+ } else {
+ masm.leaq(str1, new AMD64Address(str1, result, scale1));
+ masm.leaq(str2, new AMD64Address(str2, result, scale2));
+ }
+ masm.jmp(COMPARE_16_CHARS);
+
+ // Compare tail chars, length between 1 to 15 chars
+ masm.bind(COMPARE_TAIL_LONG);
+ masm.movl(cnt2, result);
+ masm.cmpl(cnt2, stride);
+ masm.jcc(ConditionFlag.Less, COMPARE_SMALL_STR);
+
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.movdqu(vec1, new AMD64Address(str1, 0));
+ } else {
+ masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
+ }
+ masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
+ masm.jcc(ConditionFlag.Below, COMPARE_INDEX_CHAR);
+ masm.subq(cnt2, stride);
+ masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.leaq(str1, new AMD64Address(str1, result, scale));
+ masm.leaq(str2, new AMD64Address(str2, result, scale));
+ } else {
+ masm.leaq(str1, new AMD64Address(str1, result, scale1));
+ masm.leaq(str2, new AMD64Address(str2, result, scale2));
+ }
+ masm.negq(cnt2);
+ masm.jmpb(WHILE_HEAD_LABEL);
+
+ masm.bind(COMPARE_SMALL_STR);
+ } else if (supportsSSE42(crb.target)) {
+ Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
+
+ // Checkstyle: stop
+ Label COMPARE_WIDE_VECTORS = new Label();
+ Label VECTOR_NOT_EQUAL = new Label();
+ Label COMPARE_TAIL = new Label();
+ // Checkstyle: resume
+ int pcmpmask = 0x19;
+ // Setup to compare 8-char (16-byte) vectors,
+ // start from first character again because it has aligned address.
+ masm.movl(result, cnt2);
+ masm.andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
+ // if (ae == StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ pcmpmask &= ~0x01;
+ }
+ masm.jcc(ConditionFlag.Zero, COMPARE_TAIL);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.leaq(str1, new AMD64Address(str1, result, scale));
+ masm.leaq(str2, new AMD64Address(str2, result, scale));
+ } else {
+ masm.leaq(str1, new AMD64Address(str1, result, scale1));
+ masm.leaq(str2, new AMD64Address(str2, result, scale2));
+ }
+ masm.negq(result);
+
+ // pcmpestri
+ // inputs:
+ // vec1- substring
+ // rax - negative string length (elements count)
+ // mem - scanned string
+ // rdx - string length (elements count)
+ // pcmpmask - cmp mode: 11000 (string compare with negated result)
+ // + 00 (unsigned bytes) or + 01 (unsigned shorts)
+ // outputs:
+ // rcx - first mismatched element index
+ assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
+
+ masm.bind(COMPARE_WIDE_VECTORS);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.movdqu(vec1, new AMD64Address(str1, result, scale));
+ masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
+ } else {
+ masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
+ masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
+ }
+ // After pcmpestri cnt1(rcx) contains mismatched element index
+
+ masm.jccb(ConditionFlag.Below, VECTOR_NOT_EQUAL); // CF==1
+ masm.addq(result, stride);
+ masm.subq(cnt2, stride);
+ masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS);
+
+ // compare wide vectors tail
+ masm.testq(result, result);
+ masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
+
+ masm.movl(cnt2, stride);
+ masm.movl(result, stride);
+ masm.negq(result);
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.movdqu(vec1, new AMD64Address(str1, result, scale));
+ masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
+ } else {
+ masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
+ masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
+ }
+ masm.jccb(ConditionFlag.AboveEqual, LENGTH_DIFF_LABEL);
+
+ // Mismatched characters in the vectors
+ masm.bind(VECTOR_NOT_EQUAL);
+ masm.addq(cnt1, result);
+ loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
+ masm.subl(result, cnt2);
+ masm.jmpb(POP_LABEL);
+
+ masm.bind(COMPARE_TAIL); // limit is zero
+ masm.movl(cnt2, result);
+ // Fallthru to tail compare
+ }
+
+ // Shift str2 and str1 to the end of the arrays, negate min
+ // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+ if (kind1 == kind2) {
+ masm.leaq(str1, new AMD64Address(str1, cnt2, scale));
+ masm.leaq(str2, new AMD64Address(str2, cnt2, scale));
+ } else {
+ masm.leaq(str1, new AMD64Address(str1, cnt2, scale1));
+ masm.leaq(str2, new AMD64Address(str2, cnt2, scale2));
+ }
+ masm.decrementl(cnt2); // first character was compared already
+ masm.negq(cnt2);
+
+ // Compare the rest of the elements
+ masm.bind(WHILE_HEAD_LABEL);
+ loadNextElements(masm, result, cnt1, str1, str2, scale, scale1, scale2, cnt2);
+ masm.subl(result, cnt1);
+ masm.jccb(ConditionFlag.NotZero, POP_LABEL);
+ masm.incrementq(cnt2, 1);
+ masm.jccb(ConditionFlag.NotZero, WHILE_HEAD_LABEL);
+
+ // Strings are equal up to min length. Return the length difference.
+ masm.bind(LENGTH_DIFF_LABEL);
+ masm.pop(result);
+ // if (ae == StrIntrinsicNode::UU) {
+ if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
+ // Divide diff by 2 to get number of chars
+ masm.sarl(result, 1);
+ }
+ masm.jmpb(DONE_LABEL);
+
+ // if (VM_Version::supports_avx512vlbw()) {
+ if (supportsAVX512VLBW(crb.target)) {
+ masm.bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
+
+ masm.kmovql(cnt1, k7);
+ masm.notq(cnt1);
+ masm.bsfq(cnt2, cnt1);
+ // if (ae != StrIntrinsicNode::LL) {
+ if (kind1 != JavaKind.Byte && kind2 != JavaKind.Byte) {
+ // Divide diff by 2 to get number of chars
+ masm.sarl(cnt2, 1);
+ }
+ masm.addq(result, cnt2);
+ // if (ae == StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ masm.movzbl(cnt1, new AMD64Address(str2, result, Scale.Times1));
+ masm.movzbl(result, new AMD64Address(str1, result, Scale.Times1));
+ } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
+ masm.movzwl(cnt1, new AMD64Address(str2, result, scale));
+ masm.movzwl(result, new AMD64Address(str1, result, scale));
+ } else {
+ masm.movzwl(cnt1, new AMD64Address(str2, result, scale2));
+ masm.movzbl(result, new AMD64Address(str1, result, scale1));
+ }
+ masm.subl(result, cnt1);
+ masm.jmpb(POP_LABEL);
+ }
+
+ // Discard the stored length difference
+ masm.bind(POP_LABEL);
+ masm.pop(cnt1);
+
+ // That's it
+ masm.bind(DONE_LABEL);
+ // if (ae == StrIntrinsicNode::UL) {
+ if (kind1 == JavaKind.Char && kind2 == JavaKind.Byte) {
+ masm.negl(result);
+ }
+ }
+
+ private void loadNextElements(AMD64MacroAssembler masm, Register elem1, Register elem2, Register str1, Register str2,
+ AMD64Address.Scale scale, AMD64Address.Scale scale1,
+ AMD64Address.Scale scale2, Register index) {
+ // if (ae == StrIntrinsicNode::LL) {
+ if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
+ masm.movzbl(elem1, new AMD64Address(str1, index, scale, 0));
+ masm.movzbl(elem2, new AMD64Address(str2, index, scale, 0));
+ // } else if (ae == StrIntrinsicNode::UU) {
+ } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
+ masm.movzwl(elem1, new AMD64Address(str1, index, scale, 0));
+ masm.movzwl(elem2, new AMD64Address(str2, index, scale, 0));
+ } else {
+ masm.movzbl(elem1, new AMD64Address(str1, index, scale1, 0));
+ masm.movzwl(elem2, new AMD64Address(str2, index, scale2, 0));
+ }
+ }
+
+ private static final Unsafe UNSAFE = initUnsafe();
+
+ private static Unsafe initUnsafe() {
+ try {
+ return Unsafe.getUnsafe();
+ } catch (SecurityException se) {
+ try {
+ Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
+ theUnsafe.setAccessible(true);
+ return (Unsafe) theUnsafe.get(Unsafe.class);
+ } catch (Exception e) {
+ throw new RuntimeException("exception while trying to get Unsafe", e);
+ }
+ }
+ }
+}