author | dlong |
Thu, 14 Nov 2019 12:21:00 -0800 | |
changeset 59095 | 03fbcd06b4c0 |
parent 58299 | 6df94ce3ab2f |
permissions | -rw-r--r-- |
51436 | 1 |
/* |
54204 | 2 |
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. |
51436 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 |
* or visit www.oracle.com if you need additional information or have any |
|
21 |
* questions. |
|
22 |
*/ |
|
23 |
||
24 |
||
25 |
package org.graalvm.compiler.lir.amd64; |
|
26 |
||
55509 | 27 |
import static jdk.vm.ci.code.ValueUtil.asRegister; |
28 |
import static jdk.vm.ci.code.ValueUtil.isRegister; |
|
29 |
import static jdk.vm.ci.code.ValueUtil.isStackSlot; |
|
30 |
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.CONST; |
|
31 |
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; |
|
32 |
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; |
|
33 |
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK; |
|
34 |
||
35 |
import java.util.Objects; |
|
36 |
||
51436 | 37 |
import org.graalvm.compiler.asm.Label; |
38 |
import org.graalvm.compiler.asm.amd64.AMD64Address; |
|
55509 | 39 |
import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; |
51436 | 40 |
import org.graalvm.compiler.asm.amd64.AMD64Assembler; |
55509 | 41 |
import org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64RMOp; |
51436 | 42 |
import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexMoveOp; |
43 |
import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexRMIOp; |
|
44 |
import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexRMOp; |
|
45 |
import org.graalvm.compiler.asm.amd64.AMD64Assembler.VexRVMOp; |
|
55509 | 46 |
import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.OperandSize; |
51436 | 47 |
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; |
48 |
import org.graalvm.compiler.asm.amd64.AVXKind; |
|
49 |
import org.graalvm.compiler.core.common.LIRKind; |
|
55509 | 50 |
import org.graalvm.compiler.core.common.NumUtil; |
51 |
import org.graalvm.compiler.lir.ConstantValue; |
|
51436 | 52 |
import org.graalvm.compiler.lir.LIRInstructionClass; |
53 |
import org.graalvm.compiler.lir.Opcode; |
|
54 |
import org.graalvm.compiler.lir.asm.CompilationResultBuilder; |
|
55 |
import org.graalvm.compiler.lir.gen.LIRGeneratorTool; |
|
56 |
||
55509 | 57 |
import jdk.vm.ci.amd64.AMD64; |
58 |
import jdk.vm.ci.amd64.AMD64.CPUFeature; |
|
59 |
import jdk.vm.ci.amd64.AMD64Kind; |
|
60 |
import jdk.vm.ci.code.Register; |
|
61 |
import jdk.vm.ci.meta.JavaConstant; |
|
62 |
import jdk.vm.ci.meta.JavaKind; |
|
63 |
import jdk.vm.ci.meta.Value; |
|
51436 | 64 |
|
65 |
/** |
|
66 |
*/ |
|
67 |
@Opcode("AMD64_ARRAY_INDEX_OF") |
|
68 |
public final class AMD64ArrayIndexOfOp extends AMD64LIRInstruction { |
|
69 |
public static final LIRInstructionClass<AMD64ArrayIndexOfOp> TYPE = LIRInstructionClass.create(AMD64ArrayIndexOfOp.class); |
|
70 |
||
55509 | 71 |
private final JavaKind valueKind; |
52578 | 72 |
private final int nValues; |
73 |
private final boolean findTwoConsecutive; |
|
74 |
private final AMD64Kind vectorKind; |
|
55509 | 75 |
private final int arrayBaseOffset; |
76 |
private final Scale arrayIndexScale; |
|
51436 | 77 |
|
78 |
@Def({REG}) protected Value resultValue; |
|
52578 | 79 |
@Alive({REG}) protected Value arrayPtrValue; |
55509 | 80 |
@Alive({REG}) protected Value arrayLengthValue; |
81 |
@Use({REG}) protected Value fromIndexValue; |
|
82 |
@Alive({REG, STACK, CONST}) protected Value searchValue1; |
|
83 |
@Alive({REG, STACK, CONST, ILLEGAL}) protected Value searchValue2; |
|
84 |
@Alive({REG, STACK, CONST, ILLEGAL}) protected Value searchValue3; |
|
85 |
@Alive({REG, STACK, CONST, ILLEGAL}) protected Value searchValue4; |
|
51436 | 86 |
@Temp({REG}) protected Value comparisonResult1; |
55509 | 87 |
@Temp({REG, ILLEGAL}) protected Value comparisonResult2; |
52578 | 88 |
@Temp({REG, ILLEGAL}) protected Value vectorCompareVal1; |
89 |
@Temp({REG, ILLEGAL}) protected Value vectorCompareVal2; |
|
90 |
@Temp({REG, ILLEGAL}) protected Value vectorCompareVal3; |
|
91 |
@Temp({REG, ILLEGAL}) protected Value vectorCompareVal4; |
|
51436 | 92 |
@Temp({REG, ILLEGAL}) protected Value vectorArray1; |
93 |
@Temp({REG, ILLEGAL}) protected Value vectorArray2; |
|
94 |
@Temp({REG, ILLEGAL}) protected Value vectorArray3; |
|
95 |
@Temp({REG, ILLEGAL}) protected Value vectorArray4; |
|
96 |
||
55509 | 97 |
public AMD64ArrayIndexOfOp(JavaKind arrayKind, JavaKind valueKind, boolean findTwoConsecutive, int maxVectorSize, LIRGeneratorTool tool, |
98 |
Value result, Value arrayPtr, Value arrayLength, Value fromIndex, Value... searchValues) { |
|
51436 | 99 |
super(TYPE); |
55509 | 100 |
this.valueKind = valueKind; |
101 |
this.arrayBaseOffset = tool.getProviders().getMetaAccess().getArrayBaseOffset(arrayKind); |
|
102 |
this.arrayIndexScale = Objects.requireNonNull(Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(valueKind))); |
|
52578 | 103 |
this.findTwoConsecutive = findTwoConsecutive; |
104 |
assert 0 < searchValues.length && searchValues.length <= 4; |
|
55509 | 105 |
assert byteMode(valueKind) || charMode(valueKind); |
52578 | 106 |
assert supports(tool, CPUFeature.SSE2) || supports(tool, CPUFeature.AVX) || supportsAVX2(tool); |
107 |
nValues = searchValues.length; |
|
108 |
assert !findTwoConsecutive || nValues == 1; |
|
51436 | 109 |
resultValue = result; |
52578 | 110 |
arrayPtrValue = arrayPtr; |
111 |
arrayLengthValue = arrayLength; |
|
55509 | 112 |
fromIndexValue = fromIndex; |
52578 | 113 |
searchValue1 = searchValues[0]; |
114 |
searchValue2 = nValues > 1 ? searchValues[1] : Value.ILLEGAL; |
|
115 |
searchValue3 = nValues > 2 ? searchValues[2] : Value.ILLEGAL; |
|
116 |
searchValue4 = nValues > 3 ? searchValues[3] : Value.ILLEGAL; |
|
55509 | 117 |
comparisonResult1 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind())); |
118 |
comparisonResult2 = findTwoConsecutive ? tool.newVariable(LIRKind.value(tool.target().arch.getWordKind())) : Value.ILLEGAL; |
|
119 |
vectorKind = supportsAVX2(tool) && (maxVectorSize < 0 || maxVectorSize >= 32) ? byteMode(valueKind) ? AMD64Kind.V256_BYTE : AMD64Kind.V256_WORD |
|
120 |
: byteMode(valueKind) ? AMD64Kind.V128_BYTE : AMD64Kind.V128_WORD; |
|
52578 | 121 |
vectorCompareVal1 = tool.newVariable(LIRKind.value(vectorKind)); |
122 |
vectorCompareVal2 = nValues > 1 ? tool.newVariable(LIRKind.value(vectorKind)) : Value.ILLEGAL; |
|
123 |
vectorCompareVal3 = nValues > 2 ? tool.newVariable(LIRKind.value(vectorKind)) : Value.ILLEGAL; |
|
124 |
vectorCompareVal4 = nValues > 3 ? tool.newVariable(LIRKind.value(vectorKind)) : Value.ILLEGAL; |
|
125 |
vectorArray1 = tool.newVariable(LIRKind.value(vectorKind)); |
|
126 |
vectorArray2 = tool.newVariable(LIRKind.value(vectorKind)); |
|
127 |
vectorArray3 = tool.newVariable(LIRKind.value(vectorKind)); |
|
128 |
vectorArray4 = tool.newVariable(LIRKind.value(vectorKind)); |
|
51436 | 129 |
} |
130 |
||
52578 | 131 |
private static boolean byteMode(JavaKind kind) { |
51436 | 132 |
return kind == JavaKind.Byte; |
133 |
} |
|
134 |
||
52578 | 135 |
private static boolean charMode(JavaKind kind) { |
51436 | 136 |
return kind == JavaKind.Char; |
137 |
} |
|
138 |
||
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
139 |
private JavaKind getComparisonKind() { |
55509 | 140 |
return findTwoConsecutive ? (byteMode(valueKind) ? JavaKind.Char : JavaKind.Int) : valueKind; |
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
141 |
} |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
142 |
|
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
143 |
private AVXKind.AVXSize getVectorSize() { |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
144 |
return AVXKind.getDataSize(vectorKind); |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
145 |
} |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
146 |
|
51436 | 147 |
@Override |
148 |
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler asm) { |
|
55509 | 149 |
int nVectors = nValues == 1 ? 4 : nValues == 2 ? 2 : 1; |
52578 | 150 |
Register arrayPtr = asRegister(arrayPtrValue); |
151 |
Register arrayLength = asRegister(arrayLengthValue); |
|
55509 | 152 |
Register fromIndex = asRegister(fromIndexValue); |
153 |
Register index = asRegister(resultValue); |
|
154 |
Value[] searchValue = { |
|
155 |
nValues > 0 ? searchValue1 : null, |
|
156 |
nValues > 1 ? searchValue2 : null, |
|
157 |
nValues > 2 ? searchValue3 : null, |
|
158 |
nValues > 3 ? searchValue4 : null, |
|
52578 | 159 |
}; |
160 |
Register[] vecCmp = { |
|
161 |
nValues > 0 ? asRegister(vectorCompareVal1) : null, |
|
162 |
nValues > 1 ? asRegister(vectorCompareVal2) : null, |
|
163 |
nValues > 2 ? asRegister(vectorCompareVal3) : null, |
|
164 |
nValues > 3 ? asRegister(vectorCompareVal4) : null, |
|
165 |
}; |
|
166 |
Register[] vecArray = { |
|
167 |
asRegister(vectorArray1), |
|
168 |
asRegister(vectorArray2), |
|
169 |
asRegister(vectorArray3), |
|
170 |
asRegister(vectorArray4), |
|
171 |
}; |
|
172 |
Register[] cmpResult = { |
|
173 |
asRegister(comparisonResult1), |
|
55509 | 174 |
findTwoConsecutive ? asRegister(comparisonResult2) : null, |
52578 | 175 |
}; |
55509 | 176 |
Label ret = new Label(); |
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
177 |
|
52578 | 178 |
Label bulkVectorLoop = new Label(); |
179 |
Label singleVectorLoop = new Label(); |
|
180 |
Label[] vectorFound = { |
|
181 |
new Label(), |
|
182 |
new Label(), |
|
183 |
new Label(), |
|
184 |
new Label(), |
|
185 |
}; |
|
55509 | 186 |
Label runVectorized = new Label(); |
187 |
Label elementWiseLoop = new Label(); |
|
188 |
Label elementWiseFound = new Label(); |
|
189 |
Label elementWiseNotFound = new Label(); |
|
190 |
Label skipBulkVectorLoop = new Label(); |
|
191 |
int vectorSize = getVectorSize().getBytes() / valueKind.getByteCount(); |
|
192 |
int bulkSize = vectorSize * nVectors; |
|
193 |
JavaKind vectorCompareKind = valueKind; |
|
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
194 |
if (findTwoConsecutive) { |
52578 | 195 |
bulkSize /= 2; |
55509 | 196 |
vectorCompareKind = byteMode(valueKind) ? JavaKind.Char : JavaKind.Int; |
197 |
} |
|
198 |
// index = fromIndex + vectorSize (+1 if findTwoConsecutive) |
|
199 |
// important: this must be the first register manipulation, since fromIndex is |
|
200 |
// annotated with @Use |
|
201 |
asm.leaq(index, new AMD64Address(fromIndex, vectorSize + (findTwoConsecutive ? 1 : 0))); |
|
202 |
||
203 |
// check if vector vector load is in bounds |
|
204 |
asm.cmpq(index, arrayLength); |
|
205 |
asm.jccb(AMD64Assembler.ConditionFlag.LessEqual, runVectorized); |
|
206 |
||
207 |
// search range is smaller than vector size, do element-wise comparison |
|
208 |
||
209 |
// index = fromIndex (+ 1 if findTwoConsecutive) |
|
210 |
asm.subq(index, vectorSize); |
|
211 |
// check if enough array slots remain |
|
212 |
asm.cmpq(index, arrayLength); |
|
213 |
asm.jccb(AMD64Assembler.ConditionFlag.GreaterEqual, elementWiseNotFound); |
|
214 |
// compare one-by-one |
|
215 |
asm.bind(elementWiseLoop); |
|
216 |
// check for match |
|
217 |
OperandSize cmpSize = getOpSize(getComparisonKind()); |
|
218 |
// address = findTwoConsecutive ? array[index - 1] : array[index] |
|
219 |
AMD64Address arrayAddr = new AMD64Address(arrayPtr, index, arrayIndexScale, arrayBaseOffset - (findTwoConsecutive ? valueKind.getByteCount() : 0)); |
|
220 |
boolean valuesOnStack = searchValuesOnStack(searchValue); |
|
221 |
if (valuesOnStack) { |
|
222 |
(cmpSize == OperandSize.BYTE ? AMD64RMOp.MOVB : AMD64RMOp.MOV).emit(asm, cmpSize, cmpResult[0], arrayAddr); |
|
223 |
for (int i = 0; i < nValues; i++) { |
|
224 |
if (isConstant(searchValue[i])) { |
|
225 |
int imm = asConstant(searchValue[i]).asInt(); |
|
226 |
AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(cmpSize, NumUtil.isByte(imm)).emit(asm, cmpSize, cmpResult[0], imm); |
|
227 |
} else if (isStackSlot(searchValue[i])) { |
|
228 |
AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(cmpSize).emit(asm, cmpSize, cmpResult[0], (AMD64Address) crb.asAddress(searchValue[i])); |
|
229 |
} else { |
|
230 |
AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(cmpSize).emit(asm, cmpSize, cmpResult[0], asRegister(searchValue[i])); |
|
231 |
} |
|
232 |
asm.jccb(AMD64Assembler.ConditionFlag.Equal, elementWiseFound); |
|
233 |
} |
|
52578 | 234 |
} else { |
55509 | 235 |
for (int i = 0; i < nValues; i++) { |
236 |
if (isConstant(searchValue[i])) { |
|
237 |
int imm = asConstant(searchValue[i]).asInt(); |
|
238 |
AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(cmpSize, NumUtil.isByte(imm)).emit(asm, cmpSize, arrayAddr, imm); |
|
239 |
} else { |
|
240 |
AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(cmpSize).emit(asm, cmpSize, asRegister(searchValue[i]), arrayAddr); |
|
241 |
} |
|
242 |
asm.jccb(AMD64Assembler.ConditionFlag.Equal, elementWiseFound); |
|
243 |
} |
|
244 |
} |
|
245 |
// adjust index |
|
246 |
asm.incrementq(index, 1); |
|
247 |
// continue loop |
|
248 |
asm.cmpq(index, arrayLength); |
|
249 |
asm.jccb(AMD64Assembler.ConditionFlag.Less, elementWiseLoop); |
|
250 |
||
251 |
asm.bind(elementWiseNotFound); |
|
252 |
asm.xorq(index, index); |
|
253 |
||
254 |
if (findTwoConsecutive) { |
|
255 |
asm.bind(elementWiseFound); |
|
256 |
asm.decrementq(index, 1); |
|
257 |
} else { |
|
258 |
asm.decrementq(index, 1); |
|
259 |
asm.bind(elementWiseFound); |
|
260 |
} |
|
261 |
asm.jmp(ret); |
|
262 |
||
263 |
// vectorized implementation |
|
264 |
asm.bind(runVectorized); |
|
265 |
||
266 |
// move search values to vectors |
|
267 |
for (int i = 0; i < nValues; i++) { |
|
268 |
// fill comparison vector with copies of the search value |
|
269 |
broadcastSearchValue(crb, asm, vecCmp[i], searchValue[i], cmpResult[0], vecArray[0]); |
|
52578 | 270 |
} |
271 |
||
55509 | 272 |
// do one unaligned vector comparison pass and adjust alignment afterwards |
273 |
emitVectorCompare(asm, vectorCompareKind, findTwoConsecutive ? 2 : 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, false, false); |
|
51436 | 274 |
|
55509 | 275 |
// adjust index to vector size alignment |
276 |
asm.leaq(cmpResult[0], new AMD64Address(arrayPtr, arrayBaseOffset)); |
|
277 |
if (charMode(valueKind)) { |
|
278 |
asm.shrq(cmpResult[0], 1); |
|
279 |
} |
|
280 |
asm.addq(index, cmpResult[0]); |
|
281 |
// adjust to next lower multiple of vector size |
|
282 |
asm.andq(index, ~(vectorSize - 1)); |
|
283 |
asm.subq(index, cmpResult[0]); |
|
284 |
// add bulk size |
|
285 |
asm.addq(index, bulkSize); |
|
286 |
||
51436 | 287 |
// check if there are enough array slots remaining for the bulk loop |
55509 | 288 |
asm.cmpq(index, arrayLength); |
289 |
asm.jccb(AMD64Assembler.ConditionFlag.Greater, skipBulkVectorLoop); |
|
51436 | 290 |
|
291 |
emitAlign(crb, asm); |
|
292 |
asm.bind(bulkVectorLoop); |
|
293 |
// memory-aligned bulk comparison |
|
55509 | 294 |
emitVectorCompare(asm, vectorCompareKind, nVectors, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, false, !findTwoConsecutive); |
295 |
// adjust index |
|
296 |
asm.addq(index, bulkSize); |
|
51436 | 297 |
// check if there are enough array slots remaining for the bulk loop |
55509 | 298 |
asm.cmpq(index, arrayLength); |
299 |
asm.jccb(AMD64Assembler.ConditionFlag.LessEqual, bulkVectorLoop); |
|
51436 | 300 |
|
55509 | 301 |
asm.bind(skipBulkVectorLoop); |
302 |
if ((findTwoConsecutive && nVectors == 2) || nVectors == 1) { |
|
303 |
// do last load from end of array |
|
304 |
asm.movq(index, arrayLength); |
|
305 |
// compare |
|
306 |
emitVectorCompare(asm, vectorCompareKind, findTwoConsecutive ? 2 : 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, true, false); |
|
307 |
} else { |
|
308 |
// remove bulk offset |
|
309 |
asm.subq(index, bulkSize); |
|
52578 | 310 |
emitAlign(crb, asm); |
311 |
// same loop as bulkVectorLoop, with only one vector |
|
312 |
asm.bind(singleVectorLoop); |
|
55509 | 313 |
// add vector size |
314 |
asm.addq(index, vectorSize); |
|
315 |
// check if vector load is in bounds |
|
316 |
asm.cmpq(index, arrayLength); |
|
317 |
// if load would be over bounds, set the load to the end of the array |
|
318 |
asm.cmovq(AMD64Assembler.ConditionFlag.Greater, index, arrayLength); |
|
52578 | 319 |
// compare |
55509 | 320 |
emitVectorCompare(asm, vectorCompareKind, findTwoConsecutive ? 2 : 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, true, false); |
321 |
// check if there are enough array slots remaining for the loop |
|
322 |
asm.cmpq(index, arrayLength); |
|
323 |
asm.jccb(AMD64Assembler.ConditionFlag.Less, singleVectorLoop); |
|
51436 | 324 |
} |
52578 | 325 |
|
55509 | 326 |
asm.movl(index, -1); |
327 |
asm.jmpb(ret); |
|
328 |
||
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
329 |
if (findTwoConsecutive) { |
55509 | 330 |
Label vectorFound2Done = new Label(); |
331 |
||
332 |
// vectorFound[0] and vectorFound[2] behave like the single-char case |
|
333 |
asm.bind(vectorFound[2]); |
|
334 |
// add static offset |
|
335 |
asm.subq(index, getResultIndexDelta(2)); |
|
336 |
asm.jmpb(vectorFound2Done); |
|
337 |
||
338 |
asm.bind(vectorFound[0]); |
|
339 |
// add static offset |
|
340 |
asm.subq(index, getResultIndexDelta(0)); |
|
341 |
asm.bind(vectorFound2Done); |
|
342 |
// find offset |
|
343 |
asm.bsfq(cmpResult[0], cmpResult[0]); |
|
344 |
if (charMode(valueKind)) { |
|
345 |
// convert byte offset to chars |
|
346 |
asm.shrl(cmpResult[0], 1); |
|
347 |
} |
|
348 |
// add offset to index |
|
349 |
asm.addq(index, cmpResult[0]); |
|
350 |
asm.jmpb(ret); |
|
351 |
||
352 |
Label minResult = new Label(); |
|
353 |
Label minResultDone = new Label(); |
|
354 |
||
355 |
// in vectorFound[1] and vectorFound[3], we have to check the results 0 and 2 as well |
|
356 |
if (nVectors > 2) { |
|
357 |
asm.bind(vectorFound[3]); |
|
358 |
// add offset |
|
359 |
asm.subq(index, getResultIndexDelta(3)); |
|
360 |
asm.jmpb(minResult); |
|
361 |
} |
|
362 |
||
363 |
asm.bind(vectorFound[1]); |
|
364 |
// add offset |
|
365 |
asm.subq(index, getResultIndexDelta(1)); |
|
366 |
||
367 |
asm.bind(minResult); |
|
368 |
// find offset 0 |
|
369 |
asm.bsfq(cmpResult[1], cmpResult[1]); |
|
370 |
// check if second result is also a match |
|
371 |
asm.testq(cmpResult[0], cmpResult[0]); |
|
372 |
asm.jccb(AMD64Assembler.ConditionFlag.Zero, minResultDone); |
|
373 |
// find offset 1 |
|
374 |
asm.bsfq(cmpResult[0], cmpResult[0]); |
|
375 |
asm.addq(cmpResult[0], valueKind.getByteCount()); |
|
376 |
// if first result is greater than second, replace it with the second result |
|
377 |
asm.cmpq(cmpResult[1], cmpResult[0]); |
|
378 |
asm.cmovq(AMD64Assembler.ConditionFlag.Greater, cmpResult[1], cmpResult[0]); |
|
379 |
asm.bind(minResultDone); |
|
380 |
if (charMode(valueKind)) { |
|
381 |
// convert byte offset to chars |
|
382 |
asm.shrl(cmpResult[1], 1); |
|
383 |
} |
|
384 |
// add offset to index |
|
385 |
asm.addq(index, cmpResult[1]); |
|
386 |
} else { |
|
387 |
Label end = new Label(); |
|
388 |
for (int i = 0; i < nVectors; i++) { |
|
389 |
asm.bind(vectorFound[i]); |
|
390 |
// add static offset |
|
391 |
asm.subq(index, getResultIndexDelta(i)); |
|
392 |
if (i < nVectors - 1) { |
|
393 |
asm.jmpb(end); |
|
394 |
} |
|
395 |
} |
|
396 |
asm.bind(end); |
|
397 |
// find offset |
|
398 |
asm.bsfq(cmpResult[0], cmpResult[0]); |
|
399 |
if (charMode(valueKind)) { |
|
400 |
// convert byte offset to chars |
|
401 |
asm.shrl(cmpResult[0], 1); |
|
402 |
} |
|
403 |
// add offset to index |
|
404 |
asm.addq(index, cmpResult[0]); |
|
52578 | 405 |
} |
55509 | 406 |
asm.bind(ret); |
407 |
} |
|
51436 | 408 |
|
55509 | 409 |
private boolean searchValuesOnStack(Value[] searchValue) { |
410 |
for (int i = 0; i < nValues; i++) { |
|
411 |
if (isStackSlot(searchValue[i])) { |
|
412 |
return true; |
|
52578 | 413 |
} |
51436 | 414 |
} |
55509 | 415 |
return false; |
416 |
} |
|
51436 | 417 |
|
55509 | 418 |
private int getResultIndexDelta(int i) { |
419 |
return (((findTwoConsecutive ? i / 2 : i) + 1) * (getVectorSize().getBytes() / valueKind.getByteCount())) + (findTwoConsecutive ? (i & 1) : 0); |
|
420 |
} |
|
51436 | 421 |
|
55509 | 422 |
private int getVectorOffset(int i) { |
423 |
return arrayBaseOffset - getResultIndexDelta(i) * valueKind.getByteCount(); |
|
424 |
} |
|
425 |
||
426 |
private void broadcastSearchValue(CompilationResultBuilder crb, AMD64MacroAssembler asm, Register dst, Value srcVal, Register tmpReg, Register tmpVector) { |
|
427 |
Register src = asRegOrTmpReg(crb, asm, srcVal, tmpReg); |
|
428 |
if (asm.supports(CPUFeature.AVX)) { |
|
429 |
VexMoveOp.VMOVD.emit(asm, AVXKind.AVXSize.DWORD, dst, src); |
|
52578 | 430 |
} else { |
55509 | 431 |
asm.movdl(dst, src); |
52578 | 432 |
} |
55509 | 433 |
emitBroadcast(asm, getComparisonKind(), dst, tmpVector, getVectorSize()); |
52578 | 434 |
} |
51436 | 435 |
|
55509 | 436 |
private static boolean isConstant(Value val) { |
437 |
assert !(val instanceof ConstantValue) || ((ConstantValue) val).isJavaConstant(); |
|
438 |
return val instanceof ConstantValue; |
|
439 |
} |
|
440 |
||
441 |
private static JavaConstant asConstant(Value val) { |
|
442 |
return ((ConstantValue) val).getJavaConstant(); |
|
443 |
} |
|
444 |
||
445 |
private static Register asRegOrTmpReg(CompilationResultBuilder crb, AMD64MacroAssembler asm, Value val, Register tmpReg) { |
|
446 |
if (isRegister(val)) { |
|
447 |
return asRegister(val); |
|
448 |
} else if (isStackSlot(val)) { |
|
449 |
asm.movl(tmpReg, (AMD64Address) crb.asAddress(val)); |
|
450 |
return tmpReg; |
|
451 |
} else { |
|
452 |
assert isConstant(val); |
|
453 |
asm.movl(tmpReg, asConstant(val).asInt()); |
|
454 |
return tmpReg; |
|
455 |
} |
|
51436 | 456 |
} |
457 |
||
458 |
private static void emitAlign(CompilationResultBuilder crb, AMD64MacroAssembler asm) { |
|
459 |
asm.align(crb.target.wordSize * 2); |
|
460 |
} |
|
461 |
||
462 |
/** |
|
52578 | 463 |
* Fills {@code vecDst} with copies of its lowest byte, word or dword. |
51436 | 464 |
*/ |
52578 | 465 |
private static void emitBroadcast(AMD64MacroAssembler asm, JavaKind kind, Register vecDst, Register vecTmp, AVXKind.AVXSize vectorSize) { |
466 |
switch (kind) { |
|
467 |
case Byte: |
|
468 |
if (asm.supports(CPUFeature.AVX2)) { |
|
469 |
VexRMOp.VPBROADCASTB.emit(asm, vectorSize, vecDst, vecDst); |
|
470 |
} else if (asm.supports(CPUFeature.AVX)) { |
|
471 |
VexRVMOp.VPXOR.emit(asm, vectorSize, vecTmp, vecTmp, vecTmp); |
|
472 |
VexRVMOp.VPSHUFB.emit(asm, vectorSize, vecDst, vecDst, vecTmp); |
|
473 |
} else if (asm.supports(CPUFeature.SSSE3)) { |
|
474 |
asm.pxor(vecTmp, vecTmp); |
|
475 |
asm.pshufb(vecDst, vecTmp); |
|
476 |
} else { // SSE2 |
|
477 |
asm.punpcklbw(vecDst, vecDst); |
|
478 |
asm.punpcklbw(vecDst, vecDst); |
|
479 |
asm.pshufd(vecDst, vecDst, 0); |
|
480 |
} |
|
481 |
break; |
|
482 |
case Short: |
|
483 |
case Char: |
|
484 |
if (asm.supports(CPUFeature.AVX2)) { |
|
485 |
VexRMOp.VPBROADCASTW.emit(asm, vectorSize, vecDst, vecDst); |
|
486 |
} else if (asm.supports(CPUFeature.AVX)) { |
|
487 |
VexRMIOp.VPSHUFLW.emit(asm, vectorSize, vecDst, vecDst, 0); |
|
488 |
VexRMIOp.VPSHUFD.emit(asm, vectorSize, vecDst, vecDst, 0); |
|
489 |
} else { // SSE |
|
490 |
asm.pshuflw(vecDst, vecDst, 0); |
|
491 |
asm.pshufd(vecDst, vecDst, 0); |
|
492 |
} |
|
493 |
break; |
|
494 |
case Int: |
|
495 |
if (asm.supports(CPUFeature.AVX2)) { |
|
496 |
VexRMOp.VPBROADCASTD.emit(asm, vectorSize, vecDst, vecDst); |
|
497 |
} else if (asm.supports(CPUFeature.AVX)) { |
|
498 |
VexRMIOp.VPSHUFD.emit(asm, vectorSize, vecDst, vecDst, 0); |
|
499 |
} else { // SSE |
|
500 |
asm.pshufd(vecDst, vecDst, 0); |
|
501 |
} |
|
502 |
break; |
|
503 |
default: |
|
504 |
throw new UnsupportedOperationException(); |
|
51436 | 505 |
} |
506 |
} |
|
507 |
||
55509 | 508 |
private void emitVectorCompare(AMD64MacroAssembler asm, |
52578 | 509 |
JavaKind kind, |
510 |
int nVectors, |
|
51436 | 511 |
Register arrayPtr, |
55509 | 512 |
Register index, |
52578 | 513 |
Register[] vecCmp, |
514 |
Register[] vecArray, |
|
515 |
Register[] cmpResult, |
|
516 |
Label[] vectorFound, |
|
55509 | 517 |
boolean shortJmp, |
51436 | 518 |
boolean alignedLoad) { |
519 |
// load array contents into vectors |
|
55509 | 520 |
for (int i = 0; i < nVectors; i++) { |
521 |
int base = i * nValues; |
|
522 |
for (int j = 0; j < nValues; j++) { |
|
523 |
emitArrayLoad(asm, getVectorSize(), vecArray[base + j], arrayPtr, index, getVectorOffset(nVectors - (i + 1)), alignedLoad); |
|
52578 | 524 |
} |
525 |
} |
|
51436 | 526 |
// compare all loaded bytes to the search value. |
527 |
// matching bytes are set to 0xff, non-matching bytes are set to 0x00. |
|
55509 | 528 |
if (!findTwoConsecutive) { |
529 |
for (int i = 0; i < nVectors; i++) { |
|
530 |
int base = i * nValues; |
|
531 |
for (int j = 0; j < nValues; j++) { |
|
532 |
emitVectorCompareInst(asm, kind, getVectorSize(), vecArray[base + j], vecCmp[j]); |
|
533 |
if ((j & 1) == 1) { |
|
534 |
emitPOR(asm, getVectorSize(), vecArray[base + j - 1], vecArray[base + j]); |
|
535 |
} |
|
536 |
} |
|
537 |
if (nValues > 2) { |
|
538 |
emitPOR(asm, getVectorSize(), vecArray[base], vecArray[base + 2]); |
|
539 |
} |
|
540 |
emitMOVMSK(asm, getVectorSize(), cmpResult[0], vecArray[base]); |
|
541 |
emitJnz(asm, cmpResult[0], vectorFound[nVectors - (i + 1)], shortJmp); |
|
52578 | 542 |
} |
55509 | 543 |
} else { |
544 |
for (int i = 0; i < nVectors; i += 2) { |
|
545 |
emitVectorCompareInst(asm, kind, getVectorSize(), vecArray[i], vecCmp[0]); |
|
546 |
emitVectorCompareInst(asm, kind, getVectorSize(), vecArray[i + 1], vecCmp[0]); |
|
547 |
emitMOVMSK(asm, getVectorSize(), cmpResult[1], vecArray[i]); |
|
548 |
emitMOVMSK(asm, getVectorSize(), cmpResult[0], vecArray[i + 1]); |
|
549 |
emitJnz(asm, cmpResult[1], vectorFound[nVectors - (i + 1)], shortJmp); |
|
550 |
emitJnz(asm, cmpResult[0], vectorFound[nVectors - (i + 2)], shortJmp); |
|
52578 | 551 |
} |
552 |
} |
|
51436 | 553 |
} |
554 |
||
55509 | 555 |
private static void emitJnz(AMD64MacroAssembler asm, Register cond, Label tgt, boolean shortJmp) { |
556 |
asm.testl(cond, cond); |
|
557 |
if (shortJmp) { |
|
558 |
asm.jccb(AMD64Assembler.ConditionFlag.NotZero, tgt); |
|
559 |
} else { |
|
560 |
asm.jcc(AMD64Assembler.ConditionFlag.NotZero, tgt); |
|
51436 | 561 |
} |
562 |
} |
|
563 |
||
55509 | 564 |
private void emitArrayLoad(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register vecDst, Register arrayPtr, Register index, int offset, boolean alignedLoad) { |
565 |
AMD64Address src = new AMD64Address(arrayPtr, index, arrayIndexScale, offset); |
|
51436 | 566 |
if (asm.supports(CPUFeature.AVX)) { |
58299 | 567 |
VexMoveOp loadOp = alignedLoad ? VexMoveOp.VMOVDQA32 : VexMoveOp.VMOVDQU32; |
51436 | 568 |
loadOp.emit(asm, vectorSize, vecDst, src); |
569 |
} else { |
|
570 |
// SSE |
|
571 |
asm.movdqu(vecDst, src); |
|
572 |
} |
|
573 |
} |
|
574 |
||
52578 | 575 |
/** |
576 |
* Compares all packed bytes/words/dwords in {@code vecArray} to {@code vecCmp}. Matching values |
|
577 |
* are set to all ones (0xff, 0xffff, ...), non-matching values are set to zero. |
|
578 |
*/ |
|
579 |
private static void emitVectorCompareInst(AMD64MacroAssembler asm, JavaKind kind, AVXKind.AVXSize vectorSize, Register vecArray, Register vecCmp) { |
|
580 |
switch (kind) { |
|
581 |
case Byte: |
|
582 |
if (asm.supports(CPUFeature.AVX)) { |
|
583 |
VexRVMOp.VPCMPEQB.emit(asm, vectorSize, vecArray, vecCmp, vecArray); |
|
584 |
} else { // SSE |
|
585 |
asm.pcmpeqb(vecArray, vecCmp); |
|
586 |
} |
|
587 |
break; |
|
588 |
case Short: |
|
589 |
case Char: |
|
590 |
if (asm.supports(CPUFeature.AVX)) { |
|
591 |
VexRVMOp.VPCMPEQW.emit(asm, vectorSize, vecArray, vecCmp, vecArray); |
|
592 |
} else { // SSE |
|
593 |
asm.pcmpeqw(vecArray, vecCmp); |
|
594 |
} |
|
595 |
break; |
|
596 |
case Int: |
|
597 |
if (asm.supports(CPUFeature.AVX)) { |
|
598 |
VexRVMOp.VPCMPEQD.emit(asm, vectorSize, vecArray, vecCmp, vecArray); |
|
599 |
} else { // SSE |
|
600 |
asm.pcmpeqd(vecArray, vecCmp); |
|
601 |
} |
|
602 |
break; |
|
603 |
default: |
|
604 |
throw new UnsupportedOperationException(); |
|
51436 | 605 |
} |
606 |
} |
|
607 |
||
55509 | 608 |
private static void emitPOR(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register dst, Register vecSrc) { |
609 |
if (asm.supports(CPUFeature.AVX)) { |
|
610 |
VexRVMOp.VPOR.emit(asm, vectorSize, dst, dst, vecSrc); |
|
611 |
} else { |
|
612 |
// SSE |
|
613 |
asm.por(dst, vecSrc); |
|
614 |
} |
|
615 |
} |
|
616 |
||
51436 | 617 |
private static void emitMOVMSK(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register dst, Register vecSrc) { |
618 |
if (asm.supports(CPUFeature.AVX)) { |
|
619 |
VexRMOp.VPMOVMSKB.emit(asm, vectorSize, dst, vecSrc); |
|
620 |
} else { |
|
621 |
// SSE |
|
622 |
asm.pmovmskb(dst, vecSrc); |
|
623 |
} |
|
624 |
} |
|
625 |
||
55509 | 626 |
private static OperandSize getOpSize(JavaKind kind) { |
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
627 |
switch (kind) { |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
628 |
case Byte: |
55509 | 629 |
return OperandSize.BYTE; |
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
630 |
case Short: |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
631 |
case Char: |
55509 | 632 |
return OperandSize.WORD; |
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
633 |
case Int: |
55509 | 634 |
return OperandSize.DWORD; |
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
635 |
default: |
55509 | 636 |
return OperandSize.QWORD; |
53290
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
637 |
} |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
638 |
} |
b685bc048276
8215313: [AOT] java/lang/String/Split.java fails with AOTed java.base
dnsimon
parents:
52578
diff
changeset
|
639 |
|
51436 | 640 |
private static boolean supportsAVX2(LIRGeneratorTool tool) { |
641 |
return supports(tool, CPUFeature.AVX2); |
|
642 |
} |
|
643 |
||
644 |
private static boolean supports(LIRGeneratorTool tool, CPUFeature cpuFeature) { |
|
645 |
return ((AMD64) tool.target().arch).getFeatures().contains(cpuFeature); |
|
646 |
} |
|
59095 | 647 |
|
648 |
@Override |
|
649 |
public boolean needsClearUpperVectorRegisters() { |
|
650 |
return true; |
|
651 |
} |
|
51436 | 652 |
} |