8217519: Improve RegMask population count calculation
Reviewed-by: thartmann, neliasso, kvn
--- a/src/hotspot/share/opto/regmask.cpp Wed Jan 23 10:23:05 2019 +0100
+++ b/src/hotspot/share/opto/regmask.cpp Wed Jan 23 10:01:21 2019 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
#include "opto/matcher.hpp"
#include "opto/node.hpp"
#include "opto/regmask.hpp"
+#include "utilities/population_count.hpp"
#define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */
@@ -62,7 +63,7 @@
}
// Find highest 1, or return 32 if empty
-int find_hihghest_bit( uint32_t mask ) {
+int find_highest_bit( uint32_t mask ) {
int n = 0;
if( mask > 0xffff ) {
mask >>= 16;
@@ -140,21 +141,6 @@
return 1;
}
-//------------------------------find_first_pair--------------------------------
-// Find the lowest-numbered register pair in the mask. Return the
-// HIGHEST register number in the pair, or BAD if no pairs.
-OptoReg::Name RegMask::find_first_pair() const {
- verify_pairs();
- for( int i = 0; i < RM_SIZE; i++ ) {
- if( _A[i] ) { // Found some bits
- int bit = _A[i] & -_A[i]; // Extract low bit
- // Convert to bit number, return hi bit in pair
- return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+1);
- }
- }
- return OptoReg::Bad;
-}
-
//------------------------------ClearToPairs-----------------------------------
// Clear out partial bits; leave only bit pairs
void RegMask::clear_to_pairs() {
@@ -167,18 +153,6 @@
verify_pairs();
}
-//------------------------------SmearToPairs-----------------------------------
-// Smear out partial bits; leave only bit pairs
-void RegMask::smear_to_pairs() {
- for( int i = 0; i < RM_SIZE; i++ ) {
- int bits = _A[i];
- bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
- bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
- _A[i] = bits;
- }
- verify_pairs();
-}
-
//------------------------------is_aligned_pairs-------------------------------
bool RegMask::is_aligned_pairs() const {
// Assert that the register mask contains only bit pairs.
@@ -389,14 +363,10 @@
//------------------------------Size-------------------------------------------
// Compute size of register mask in bits
uint RegMask::Size() const {
- extern uint8_t bitsInByte[BITS_IN_BYTE_ARRAY_SIZE];
uint sum = 0;
- for( int i = 0; i < RM_SIZE; i++ )
- sum +=
- bitsInByte[(_A[i]>>24) & 0xff] +
- bitsInByte[(_A[i]>>16) & 0xff] +
- bitsInByte[(_A[i]>> 8) & 0xff] +
- bitsInByte[ _A[i] & 0xff];
+ for (int i = 0; i < RM_SIZE; i++) {
+ sum += population_count(_A[i]);
+ }
return sum;
}
--- a/src/hotspot/share/opto/regmask.hpp Wed Jan 23 10:23:05 2019 +0100
+++ b/src/hotspot/share/opto/regmask.hpp Wed Jan 23 10:01:21 2019 +0100
@@ -48,7 +48,7 @@
// Find lowest 1, or return 32 if empty
int find_lowest_bit( uint32_t mask );
// Find highest 1, or return 32 if empty
-int find_hihghest_bit( uint32_t mask );
+int find_highest_bit( uint32_t mask );
//------------------------------RegMask----------------------------------------
// The ADL file describes how to print the machine-specific registers, as well
@@ -170,18 +170,11 @@
FORALL_BODY
# undef BODY
{ base = OptoReg::Bad; bits = 1<<0; }
- return OptoReg::Name(base + find_hihghest_bit(bits));
+ return OptoReg::Name(base + find_highest_bit(bits));
}
- // Find the lowest-numbered register pair in the mask. Return the
- // HIGHEST register number in the pair, or BAD if no pairs.
- // Assert that the mask contains only bit pairs.
- OptoReg::Name find_first_pair() const;
-
// Clear out partial bits; leave only aligned adjacent bit pairs.
void clear_to_pairs();
- // Smear out partial bits; leave only aligned adjacent bit pairs.
- void smear_to_pairs();
// Verify that the mask contains only aligned adjacent bit pairs
void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
// Test that the mask contains only aligned adjacent bit pairs
@@ -218,9 +211,6 @@
// Test that the mask contains only aligned adjacent bit sets
bool is_aligned_sets(const int size) const;
- // mask is a set of misaligned registers
- bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);}
-
// Test for a single adjacent set
int is_bound_set(const int size) const;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/utilities/population_count.hpp Wed Jan 23 10:01:21 2019 +0100
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_UTILITIES_POPULATION_COUNT_HPP
+#define SHARE_UTILITIES_POPULATION_COUNT_HPP
+
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Returns the population count of x, i.e., the number of bits set in x.
+//
+// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2.
+//
+// Ideally this should be dispatched per platform to use optimized
+// instructions when available, such as POPCNT on modern x86/AMD. Our builds
+// still target and support older architectures that might lack support for
+// these, however. For example, with current build configurations,
+// __builtin_popcount(x) would generate a call to a similar but slower 64-bit
+// version of this 32-bit implementation.
+static uint32_t population_count(uint32_t x) {
+ x -= ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
+}
+
+#endif // SHARE_UTILITIES_POPULATION_COUNT_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/gtest/utilities/test_population_count.cpp Wed Jan 23 10:01:21 2019 +0100
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "libadt/vectset.hpp"
+#include "runtime/os.hpp"
+#include "utilities/population_count.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "unittest.hpp"
+
+
+TEST(population_count, sparse) {
+ extern uint8_t bitsInByte[BITS_IN_BYTE_ARRAY_SIZE];
+ // Step through the entire input range from a random starting point,
+ // verify population_count return values against the lookup table
+ // approach used historically
+ uint32_t step = 4711;
+ for (uint32_t value = os::random() % step; value < UINT_MAX - step; value += step) {
+ uint32_t lookup = bitsInByte[(value >> 24) & 0xff] +
+ bitsInByte[(value >> 16) & 0xff] +
+ bitsInByte[(value >> 8) & 0xff] +
+ bitsInByte[ value & 0xff];
+
+ EXPECT_EQ(lookup, population_count(value))
+ << "value = " << value;
+ }
+
+ // Test a few edge cases
+ EXPECT_EQ(0u, population_count(0u))
+ << "value = " << 0;
+ EXPECT_EQ(1u, population_count(1u))
+ << "value = " << 1;
+ EXPECT_EQ(1u, population_count(2u))
+ << "value = " << 2;
+ EXPECT_EQ(32u, population_count(UINT_MAX))
+ << "value = " << UINT_MAX;
+ EXPECT_EQ(31u, population_count(UINT_MAX - 1))
+ << "value = " << (UINT_MAX - 1);
+}