--- a/hotspot/src/cpu/x86/vm/x86_32.ad Wed Feb 03 12:28:30 2010 -0800
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Wed Feb 03 15:56:37 2010 -0800
@@ -235,6 +235,11 @@
//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description
+source_hpp %{
+// Must be visible to the DFA in dfa_x86_32.cpp
+extern bool is_operand_hi32_zero(Node* n);
+%}
+
source %{
#define RELOC_IMM32 Assembler::imm_operand
#define RELOC_DISP32 Assembler::disp32_operand
@@ -1485,6 +1490,21 @@
return EBP_REG_mask;
}
+// Returns true if the high 32 bits of the value is known to be zero.
+bool is_operand_hi32_zero(Node* n) {
+ int opc = n->Opcode();
+ if (opc == Op_LoadUI2L) {
+ return true;
+ }
+ if (opc == Op_AndL) {
+ Node* o2 = n->in(2);
+ if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
+ return true;
+ }
+ }
+ return false;
+}
+
%}
//----------ENCODING BLOCK-----------------------------------------------------
@@ -8599,6 +8619,63 @@
ins_pipe( pipe_slow );
%}
+// Multiply Register Long where the left operand's high 32 bits are zero
+instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+ predicate(is_operand_hi32_zero(n->in(1)));
+ match(Set dst (MulL dst src));
+ effect(KILL cr, TEMP tmp);
+ ins_cost(2*100+2*400);
+// Basic idea: lo(result) = lo(x_lo * y_lo)
+// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
+ format %{ "MOV $tmp,$src.hi\n\t"
+ "IMUL $tmp,EAX\n\t"
+ "MUL EDX:EAX,$src.lo\n\t"
+ "ADD EDX,$tmp" %}
+ ins_encode %{
+ __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
+ __ imull($tmp$$Register, rax);
+ __ mull($src$$Register);
+ __ addl(rdx, $tmp$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Multiply Register Long where the right operand's high 32 bits are zero
+instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+ predicate(is_operand_hi32_zero(n->in(2)));
+ match(Set dst (MulL dst src));
+ effect(KILL cr, TEMP tmp);
+ ins_cost(2*100+2*400);
+// Basic idea: lo(result) = lo(x_lo * y_lo)
+// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
+ format %{ "MOV $tmp,$src.lo\n\t"
+ "IMUL $tmp,EDX\n\t"
+ "MUL EDX:EAX,$src.lo\n\t"
+ "ADD EDX,$tmp" %}
+ ins_encode %{
+ __ movl($tmp$$Register, $src$$Register);
+ __ imull($tmp$$Register, rdx);
+ __ mull($src$$Register);
+ __ addl(rdx, $tmp$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Multiply Register Long where the left and the right operands' high 32 bits are zero
+instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
+ predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
+ match(Set dst (MulL dst src));
+ effect(KILL cr);
+ ins_cost(1*400);
+// Basic idea: lo(result) = lo(x_lo * y_lo)
+// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
+ format %{ "MUL EDX:EAX,$src.lo\n\t" %}
+ ins_encode %{
+ __ mull($src$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// Multiply Register Long by small constant
instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
match(Set dst (MulL dst src));
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6921969/TestMultiplyLongHiZero.java Wed Feb 03 15:56:37 2010 -0800
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2010 Google, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 6921969
+ * @summary Tests shorter long multiply sequences when the high 32 bits of long operands are known to be zero on x86_32
+ * @run main/othervm -Xbatch -XX:-Inline -XX:CompileOnly=.testNormal,.testLeftOptimized,.testRightOptimized,.testOptimized,.testLeftOptimized_LoadUI2L,.testRightOptimized_LoadUI2L,.testOptimized_LoadUI2L TestMultiplyLongHiZero
+ */
+
+// This test must run without any command line arguments.
+
+public class TestMultiplyLongHiZero {
+
+ private static void check(long leftFactor, long rightFactor, long optimizedProduct, long constantProduct) {
+ long normalProduct = leftFactor * rightFactor; // unaffected by the new optimization
+ if (optimizedProduct != constantProduct || normalProduct != constantProduct) {
+ throw new RuntimeException("Not all three products are equal: " +
+ Long.toHexString(normalProduct) + ", " +
+ Long.toHexString(optimizedProduct) + ", " +
+ Long.toHexString(constantProduct));
+ }
+ }
+
+ private static int initInt(String[] args, int v) {
+ if (args.length > 0) {
+ try {
+ return Integer.valueOf(args[0]);
+ } catch (NumberFormatException e) { }
+ }
+ return v;
+ }
+
+ private static final long mask32 = 0x00000000FFFFFFFFL;
+
+ private static void testNormal(int leftFactor, int rightFactor, long constantProduct) {
+ check((long) leftFactor,
+ (long) rightFactor,
+ (long) leftFactor * (long) rightFactor, // unaffected by the new optimization
+ constantProduct);
+ }
+
+ private static void testLeftOptimized(int leftFactor, int rightFactor, long constantProduct) {
+ check((leftFactor & mask32),
+ (long) rightFactor,
+ (leftFactor & mask32) * (long) rightFactor, // left factor optimized
+ constantProduct);
+ }
+
+ private static void testRightOptimized(int leftFactor, int rightFactor, long constantProduct) {
+ check((long) leftFactor,
+ (rightFactor & mask32),
+ (long) leftFactor * (rightFactor & mask32), // right factor optimized
+ constantProduct);
+ }
+
+ private static void testOptimized(int leftFactor, int rightFactor, long constantProduct) {
+ check((leftFactor & mask32),
+ (rightFactor & mask32),
+ (leftFactor & mask32) * (rightFactor & mask32), // both factors optimized
+ constantProduct);
+ }
+
+ private static void testLeftOptimized_LoadUI2L(int leftFactor, int rightFactor, long constantProduct, int[] factors) {
+ check((leftFactor & mask32),
+ (long) rightFactor,
+ (factors[0] & mask32) * (long) rightFactor, // left factor optimized
+ constantProduct);
+ }
+
+ private static void testRightOptimized_LoadUI2L(int leftFactor, int rightFactor, long constantProduct, int[] factors) {
+ check((long) leftFactor,
+ (rightFactor & mask32),
+ (long) leftFactor * (factors[1] & mask32), // right factor optimized
+ constantProduct);
+ }
+
+ private static void testOptimized_LoadUI2L(int leftFactor, int rightFactor, long constantProduct, int[] factors) {
+ check((leftFactor & mask32),
+ (rightFactor & mask32),
+ (factors[0] & mask32) * (factors[1] & mask32), // both factors optimized
+ constantProduct);
+ }
+
+ private static void test(int leftFactor, int rightFactor,
+ long normalConstantProduct,
+ long leftOptimizedConstantProduct,
+ long rightOptimizedConstantProduct,
+ long optimizedConstantProduct) {
+ int[] factors = new int[2];
+ factors[0] = leftFactor;
+ factors[1] = rightFactor;
+ testNormal(leftFactor, rightFactor, normalConstantProduct);
+ testLeftOptimized(leftFactor, rightFactor, leftOptimizedConstantProduct);
+ testRightOptimized(leftFactor, rightFactor, rightOptimizedConstantProduct);
+ testOptimized(leftFactor, rightFactor, optimizedConstantProduct);
+ testLeftOptimized_LoadUI2L(leftFactor, rightFactor, leftOptimizedConstantProduct, factors);
+ testRightOptimized_LoadUI2L(leftFactor, rightFactor, rightOptimizedConstantProduct, factors);
+ testOptimized_LoadUI2L(leftFactor, rightFactor, optimizedConstantProduct, factors);
+ }
+
+ public static void main(String[] args) {
+ for (int i = 0; i < 100000; ++i) { // Trigger compilation
+ int i0 = initInt(args, 1);
+ int i1 = initInt(args, 3);
+ int i2 = initInt(args, -1);
+ int i3 = initInt(args, 0x7FFFFFFF);
+ test(i0, i1, 3L, 3L, 3L, 3L);
+ test(i0, i2, -1L, -1L, 0xFFFFFFFFL, 0xFFFFFFFFL);
+ test(i0, i3, 0x7FFFFFFFL, 0x7FFFFFFFL, 0x7FFFFFFFL, 0x7FFFFFFFL);
+ test(i1, i2, -3L, -3L, 0x2FFFFFFFDL, 0x2FFFFFFFDL);
+ test(i1, i3, 0x17FFFFFFDL, 0x17FFFFFFDL, 0x17FFFFFFDL, 0x17FFFFFFDL);
+ test(i2, i3, 0xFFFFFFFF80000001L, 0x7FFFFFFE80000001L,
+ 0xFFFFFFFF80000001L, 0x7FFFFFFE80000001L);
+ }
+ }
+}