8220159: Optimize various RegMask operations by introducing watermarks
authorredestad
Tue, 05 Mar 2019 16:39:18 +0100
changeset 54021 6347ffe2c3c7
parent 54020 c112c2d5a856
child 54022 ff399127078a
8220159: Optimize various RegMask operations by introducing watermarks Reviewed-by: neliasso, thartmann
src/hotspot/share/opto/chaitin.hpp
src/hotspot/share/opto/regmask.cpp
src/hotspot/share/opto/regmask.hpp
--- a/src/hotspot/share/opto/chaitin.hpp	Thu Mar 07 07:19:49 2019 -0500
+++ b/src/hotspot/share/opto/chaitin.hpp	Tue Mar 05 16:39:18 2019 +0100
@@ -114,9 +114,9 @@
     _msize_valid=1;
     if (_is_vector) {
       assert(!_fat_proj, "sanity");
-      _mask.verify_sets(_num_regs);
+      assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
     } else if (_num_regs == 2 && !_fat_proj) {
-      _mask.verify_pairs();
+      assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
     }
 #endif
   }
--- a/src/hotspot/share/opto/regmask.cpp	Thu Mar 07 07:19:49 2019 -0500
+++ b/src/hotspot/share/opto/regmask.cpp	Tue Mar 05 16:39:18 2019 +0100
@@ -82,69 +82,62 @@
     return 1;
 }
 
-//------------------------------ClearToPairs-----------------------------------
 // Clear out partial bits; leave only bit pairs
 void RegMask::clear_to_pairs() {
-  for( int i = 0; i < RM_SIZE; i++ ) {
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
     int bits = _A[i];
     bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
     bits |= (bits>>1);          // Smear 1 hi-bit into a pair
     _A[i] = bits;
   }
-  verify_pairs();
+  assert(is_aligned_pairs(), "mask is not aligned, adjacent pairs");
 }
 
-//------------------------------is_aligned_pairs-------------------------------
+bool RegMask::is_misaligned_pair() const {
+  return Size() == 2 && !is_aligned_pairs();
+}
+
 bool RegMask::is_aligned_pairs() const {
   // Assert that the register mask contains only bit pairs.
-  for( int i = 0; i < RM_SIZE; i++ ) {
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
     int bits = _A[i];
-    while( bits ) {             // Check bits for pairing
+    while (bits) {              // Check bits for pairing
       int bit = bits & -bits;   // Extract low bit
       // Low bit is not odd means its mis-aligned.
-      if( (bit & 0x55555555) == 0 ) return false;
+      if ((bit & 0x55555555) == 0) return false;
       bits -= bit;              // Remove bit from mask
       // Check for aligned adjacent bit
-      if( (bits & (bit<<1)) == 0 ) return false;
+      if ((bits & (bit<<1)) == 0) return false;
       bits -= (bit<<1);         // Remove other halve of pair
     }
   }
   return true;
 }
 
-//------------------------------is_bound1--------------------------------------
 // Return TRUE if the mask contains a single bit
-int RegMask::is_bound1() const {
-  if( is_AllStack() ) return false;
-  int bit = -1;                 // Set to hold the one bit allowed
-  for( int i = 0; i < RM_SIZE; i++ ) {
-    if( _A[i] ) {               // Found some bits
-      if( bit != -1 ) return false; // Already had bits, so fail
-      bit = _A[i] & -_A[i];     // Extract 1 bit from mask
-      if( bit != _A[i] ) return false; // Found many bits, so fail
-    }
-  }
-  // True for both the empty mask and for a single bit
-  return true;
+bool RegMask::is_bound1() const {
+  if (is_AllStack()) return false;
+  return Size() == 1;
 }
 
-//------------------------------is_bound2--------------------------------------
 // Return TRUE if the mask contains an adjacent pair of bits and no other bits.
-int RegMask::is_bound_pair() const {
-  if( is_AllStack() ) return false;
-
+bool RegMask::is_bound_pair() const {
+  if (is_AllStack()) return false;
   int bit = -1;                 // Set to hold the one bit allowed
-  for( int i = 0; i < RM_SIZE; i++ ) {
-    if( _A[i] ) {               // Found some bits
-      if( bit != -1 ) return false; // Already had bits, so fail
-      bit = _A[i] & -(_A[i]);   // Extract 1 bit from mask
-      if( (bit << 1) != 0 ) {   // Bit pair stays in same word?
-        if( (bit | (bit<<1)) != _A[i] )
-          return false;         // Require adjacent bit pair and no more bits
-      } else {                  // Else its a split-pair case
-        if( bit != _A[i] ) return false; // Found many bits, so fail
-        i++;                    // Skip iteration forward
-        if( i >= RM_SIZE || _A[i] != 1 )
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
+    if (_A[i]) {                   // Found some bits
+      if (bit != -1) return false; // Already had bits, so fail
+      bit = _A[i] & -(_A[i]);      // Extract 1 bit from mask
+      if ((bit << 1) != 0) {       // Bit pair stays in same word?
+        if ((bit | (bit<<1)) != _A[i])
+          return false;            // Require adjacent bit pair and no more bits
+      } else {                     // Else its a split-pair case
+        if(bit != _A[i]) return false; // Found many bits, so fail
+        i++;                       // Skip iteration forward
+        if (i > _hwm || _A[i] != 1)
           return false; // Require 1 lo bit in next word
       }
     }
@@ -153,32 +146,43 @@
   return true;
 }
 
+// Test for a single adjacent set of ideal register's size.
+bool RegMask::is_bound(uint ireg) const {
+  if (is_vector(ireg)) {
+    if (is_bound_set(num_registers(ireg)))
+      return true;
+  } else if (is_bound1() || is_bound_pair()) {
+    return true;
+  }
+  return false;
+}
+
 // only indicies of power 2 are accessed, so index 3 is only filled in for storage.
 static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
-//------------------------------find_first_set---------------------------------
+
 // Find the lowest-numbered register set in the mask.  Return the
 // HIGHEST register number in the set, or BAD if no sets.
 // Works also for size 1.
 OptoReg::Name RegMask::find_first_set(const int size) const {
-  verify_sets(size);
-  for (int i = 0; i < RM_SIZE; i++) {
+  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
     if (_A[i]) {                // Found some bits
-      int bit = _A[i] & -_A[i]; // Extract low bit
       // Convert to bit number, return hi bit in pair
-      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
+      return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(_A[i]) + (size - 1));
     }
   }
   return OptoReg::Bad;
 }
 
-//------------------------------clear_to_sets----------------------------------
 // Clear out partial bits; leave only aligned adjacent bit pairs
 void RegMask::clear_to_sets(const int size) {
   if (size == 1) return;
   assert(2 <= size && size <= 16, "update low bits table");
   assert(is_power_of_2(size), "sanity");
+  assert(valid_watermarks(), "sanity");
   int low_bits_mask = low_bits[size>>2];
-  for (int i = 0; i < RM_SIZE; i++) {
+  for (int i = _lwm; i <= _hwm; i++) {
     int bits = _A[i];
     int sets = (bits & low_bits_mask);
     for (int j = 1; j < size; j++) {
@@ -196,17 +200,17 @@
     }
     _A[i] = sets;
   }
-  verify_sets(size);
+  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
 }
 
-//------------------------------smear_to_sets----------------------------------
 // Smear out partial bits to aligned adjacent bit sets
 void RegMask::smear_to_sets(const int size) {
   if (size == 1) return;
   assert(2 <= size && size <= 16, "update low bits table");
   assert(is_power_of_2(size), "sanity");
+  assert(valid_watermarks(), "sanity");
   int low_bits_mask = low_bits[size>>2];
-  for (int i = 0; i < RM_SIZE; i++) {
+  for (int i = _lwm; i <= _hwm; i++) {
     int bits = _A[i];
     int sets = 0;
     for (int j = 0; j < size; j++) {
@@ -225,17 +229,17 @@
     }
     _A[i] = sets;
   }
-  verify_sets(size);
+  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
 }
 
-//------------------------------is_aligned_set--------------------------------
+// Assert that the register mask contains only bit sets.
 bool RegMask::is_aligned_sets(const int size) const {
   if (size == 1) return true;
   assert(2 <= size && size <= 16, "update low bits table");
   assert(is_power_of_2(size), "sanity");
   int low_bits_mask = low_bits[size>>2];
-  // Assert that the register mask contains only bit sets.
-  for (int i = 0; i < RM_SIZE; i++) {
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
     int bits = _A[i];
     while (bits) {              // Check bits for pairing
       int bit = bits & -bits;   // Extract low bit
@@ -252,14 +256,14 @@
   return true;
 }
 
-//------------------------------is_bound_set-----------------------------------
 // Return TRUE if the mask contains one adjacent set of bits and no other bits.
 // Works also for size 1.
 int RegMask::is_bound_set(const int size) const {
-  if( is_AllStack() ) return false;
+  if (is_AllStack()) return false;
   assert(1 <= size && size <= 16, "update low bits table");
+  assert(valid_watermarks(), "sanity");
   int bit = -1;                 // Set to hold the one bit allowed
-  for (int i = 0; i < RM_SIZE; i++) {
+  for (int i = _lwm; i <= _hwm; i++) {
     if (_A[i] ) {               // Found some bits
       if (bit != -1)
        return false;            // Already had bits, so fail
@@ -279,7 +283,7 @@
         int set = bit>>clear_bit_size;
         set = set & -set; // Remove sign extension.
         set = (((set << size) - 1) >> shift_back_size);
-        if (i >= RM_SIZE || _A[i] != set)
+        if (i > _hwm || _A[i] != set)
           return false; // Require expected low bits in next word
       }
     }
@@ -288,31 +292,29 @@
   return true;
 }
 
-//------------------------------is_UP------------------------------------------
 // UP means register only, Register plus stack, or stack only is DOWN
 bool RegMask::is_UP() const {
   // Quick common case check for DOWN (any stack slot is legal)
-  if( is_AllStack() )
+  if (is_AllStack())
     return false;
   // Slower check for any stack bits set (also DOWN)
-  if( overlap(Matcher::STACK_ONLY_mask) )
+  if (overlap(Matcher::STACK_ONLY_mask))
     return false;
   // Not DOWN, so must be UP
   return true;
 }
 
-//------------------------------Size-------------------------------------------
 // Compute size of register mask in bits
 uint RegMask::Size() const {
   uint sum = 0;
-  for (int i = 0; i < RM_SIZE; i++) {
+  assert(valid_watermarks(), "sanity");
+  for (int i = _lwm; i <= _hwm; i++) {
     sum += population_count(_A[i]);
   }
   return sum;
 }
 
 #ifndef PRODUCT
-//------------------------------print------------------------------------------
 void RegMask::dump(outputStream *st) const {
   st->print("[");
   RegMask rm = *this;           // Structure copy into local temp
--- a/src/hotspot/share/opto/regmask.hpp	Thu Mar 07 07:19:49 2019 -0500
+++ b/src/hotspot/share/opto/regmask.hpp	Tue Mar 05 16:39:18 2019 +0100
@@ -30,22 +30,6 @@
 #include "utilities/count_leading_zeros.hpp"
 #include "utilities/count_trailing_zeros.hpp"
 
-// Some fun naming (textual) substitutions:
-//
-// RegMask::get_low_elem() ==> RegMask::find_first_elem()
-// RegMask::Special        ==> RegMask::Empty
-// RegMask::_flags         ==> RegMask::is_AllStack()
-// RegMask::operator<<=()  ==> RegMask::Insert()
-// RegMask::operator>>=()  ==> RegMask::Remove()
-// RegMask::Union()        ==> RegMask::OR
-// RegMask::Inter()        ==> RegMask::AND
-//
-// OptoRegister::RegName   ==> OptoReg::Name
-//
-// OptoReg::stack0()       ==> _last_Mach_Reg  or ZERO in core version
-//
-// numregs in chaitin      ==> proper degree in chaitin
-
 //-------------Non-zero bit search methods used by RegMask---------------------
 // Find lowest 1, undefined if empty/0
 static int find_lowest_bit(uint32_t mask) {
@@ -78,6 +62,12 @@
     // is something like 90+ parameters.
     int _A[RM_SIZE];
   };
+  // The low and high water marks represents the lowest and highest word
+  // that might contain set register mask bits, respectively. We guarantee
+  // that there are no bits in words outside this range, but any word at
+  // and between the two marks can still be 0.
+  int _lwm;
+  int _hwm;
 
   enum {
     _WordBits    = BitsPerInt,
@@ -85,7 +75,7 @@
     _RM_SIZE     = RM_SIZE   // local constant, imported, then hidden by #undef
   };
 
-public:
+ public:
   enum { CHUNK_SIZE = RM_SIZE*_WordBits };
 
   // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
@@ -113,28 +103,41 @@
 #   define BODY(I) int a##I,
     FORALL_BODY
 #   undef BODY
-    int dummy = 0 ) {
+    int dummy = 0) {
 #   define BODY(I) _A[I] = a##I;
     FORALL_BODY
 #   undef BODY
+    _lwm = 0;
+    _hwm = RM_SIZE - 1;
+    while (_hwm > 0 && _A[_hwm] == 0) _hwm--;
+    while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++;
+    assert(valid_watermarks(), "post-condition");
   }
 
   // Handy copying constructor
-  RegMask( RegMask *rm ) {
-#   define BODY(I) _A[I] = rm->_A[I];
-    FORALL_BODY
-#   undef BODY
+  RegMask(RegMask *rm) {
+    _hwm = rm->_hwm;
+    _lwm = rm->_lwm;
+    for (int i = 0; i < RM_SIZE; i++) {
+      _A[i] = rm->_A[i];
+    }
+    assert(valid_watermarks(), "post-condition");
   }
 
   // Construct an empty mask
-  RegMask( ) { Clear(); }
+  RegMask() {
+    Clear();
+  }
 
   // Construct a mask with a single bit
-  RegMask( OptoReg::Name reg ) { Clear(); Insert(reg); }
+  RegMask(OptoReg::Name reg) {
+    Clear();
+    Insert(reg);
+  }
 
   // Check for register being in mask
-  int Member( OptoReg::Name reg ) const {
-    assert( reg < CHUNK_SIZE, "" );
+  int Member(OptoReg::Name reg) const {
+    assert(reg < CHUNK_SIZE, "");
     return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1)));
   }
 
@@ -152,56 +155,69 @@
   void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); }
 
   // Test for being a not-empty mask.
-  int is_NotEmpty( ) const {
+  int is_NotEmpty() const {
+    assert(valid_watermarks(), "sanity");
     int tmp = 0;
-#   define BODY(I) tmp |= _A[I];
-    FORALL_BODY
-#   undef BODY
+    for (int i = _lwm; i <= _hwm; i++) {
+      tmp |= _A[i];
+    }
     return tmp;
   }
 
   // Find lowest-numbered register from mask, or BAD if mask is empty.
   OptoReg::Name find_first_elem() const {
-    int base, bits;
-#   define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
-    FORALL_BODY
-#   undef BODY
-      { base = OptoReg::Bad; bits = 1<<0; }
-    return OptoReg::Name(base + find_lowest_bit(bits));
+    assert(valid_watermarks(), "sanity");
+    for (int i = _lwm; i <= _hwm; i++) {
+      int bits = _A[i];
+      if (bits) {
+        return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits));
+      }
+    }
+    return OptoReg::Name(OptoReg::Bad);
   }
+
   // Get highest-numbered register from mask, or BAD if mask is empty.
   OptoReg::Name find_last_elem() const {
-    int base, bits;
-#   define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
-    FORALL_BODY
-#   undef BODY
-      { base = OptoReg::Bad; bits = 1<<0; }
-    return OptoReg::Name(base + find_highest_bit(bits));
+    assert(valid_watermarks(), "sanity");
+    for (int i = _hwm; i >= _lwm; i--) {
+      int bits = _A[i];
+      if (bits) {
+        return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits));
+      }
+    }
+    return OptoReg::Name(OptoReg::Bad);
   }
 
   // Clear out partial bits; leave only aligned adjacent bit pairs.
   void clear_to_pairs();
-  // Verify that the mask contains only aligned adjacent bit pairs
-  void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
+
+#ifdef ASSERT
+  // Verify watermarks are sane, i.e., within bounds and that no
+  // register words below or above the watermarks have bits set.
+  bool valid_watermarks() const {
+    assert(_hwm >= 0 && _hwm < RM_SIZE, "_hwm out of range: %d", _hwm);
+    assert(_lwm >= 0 && _lwm < RM_SIZE, "_lwm out of range: %d", _lwm);
+    for (int i = 0; i < _lwm; i++) {
+      assert(_A[i] == 0, "_lwm too high: %d regs at: %d", _lwm, i);
+    }
+    for (int i = _hwm + 1; i < RM_SIZE; i++) {
+      assert(_A[i] == 0, "_hwm too low: %d regs at: %d", _hwm, i);
+    }
+    return true;
+  }
+#endif // !ASSERT
+
   // Test that the mask contains only aligned adjacent bit pairs
   bool is_aligned_pairs() const;
 
   // mask is a pair of misaligned registers
-  bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
+  bool is_misaligned_pair() const;
   // Test for single register
-  int is_bound1() const;
+  bool is_bound1() const;
   // Test for a single adjacent pair
-  int is_bound_pair() const;
+  bool is_bound_pair() const;
   // Test for a single adjacent set of ideal register's size.
-  int is_bound(uint ireg) const {
-    if (is_vector(ireg)) {
-      if (is_bound_set(num_registers(ireg)))
-        return true;
-    } else if (is_bound1() || is_bound_pair()) {
-      return true;
-    }
-    return false;
-  }
+  bool is_bound(uint ireg) const;
 
   // Find the lowest-numbered register set in the mask.  Return the
   // HIGHEST register number in the set, or BAD if no sets.
@@ -212,8 +228,6 @@
   void clear_to_sets(const int size);
   // Smear out partial bits to aligned adjacent bit sets.
   void smear_to_sets(const int size);
-  // Verify that the mask contains only aligned adjacent bit sets
-  void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
   // Test that the mask contains only aligned adjacent bit sets
   bool is_aligned_sets(const int size) const;
 
@@ -224,12 +238,15 @@
   static int num_registers(uint ireg);
 
   // Fast overlap test.  Non-zero if any registers in common.
-  int overlap( const RegMask &rm ) const {
-    return
-#   define BODY(I) (_A[I] & rm._A[I]) |
-    FORALL_BODY
-#   undef BODY
-    0 ;
+  int overlap(const RegMask &rm) const {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    int hwm = MIN2(_hwm, rm._hwm);
+    int lwm = MAX2(_lwm, rm._lwm);
+    int result = 0;
+    for (int i = lwm; i <= hwm; i++) {
+      result |= _A[i] & rm._A[i];
+    }
+    return result;
   }
 
   // Special test for register pressure based splitting
@@ -237,50 +254,72 @@
   bool is_UP() const;
 
   // Clear a register mask
-  void Clear( ) {
-#   define BODY(I) _A[I] = 0;
-    FORALL_BODY
-#   undef BODY
+  void Clear() {
+    _lwm = RM_SIZE - 1;
+    _hwm = 0;
+    memset(_A, 0, sizeof(int)*RM_SIZE);
+    assert(valid_watermarks(), "sanity");
   }
 
   // Fill a register mask with 1's
-  void Set_All( ) {
-#   define BODY(I) _A[I] = -1;
-    FORALL_BODY
-#   undef BODY
+  void Set_All() {
+    _lwm = 0;
+    _hwm = RM_SIZE - 1;
+    memset(_A, 0xFF, sizeof(int)*RM_SIZE);
+    assert(valid_watermarks(), "sanity");
   }
 
   // Insert register into mask
-  void Insert( OptoReg::Name reg ) {
-    assert( reg < CHUNK_SIZE, "" );
-    _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
+  void Insert(OptoReg::Name reg) {
+    assert(reg < CHUNK_SIZE, "sanity");
+    assert(valid_watermarks(), "pre-condition");
+    int index = reg>>_LogWordBits;
+    if (index > _hwm) _hwm = index;
+    if (index < _lwm) _lwm = index;
+    _A[index] |= (1<<(reg&(_WordBits-1)));
+    assert(valid_watermarks(), "post-condition");
   }
 
   // Remove register from mask
-  void Remove( OptoReg::Name reg ) {
-    assert( reg < CHUNK_SIZE, "" );
+  void Remove(OptoReg::Name reg) {
+    assert(reg < CHUNK_SIZE, "");
     _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1)));
   }
 
   // OR 'rm' into 'this'
-  void OR( const RegMask &rm ) {
-#   define BODY(I) this->_A[I] |= rm._A[I];
-    FORALL_BODY
-#   undef BODY
+  void OR(const RegMask &rm) {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    // OR widens the live range
+    if (_lwm > rm._lwm) _lwm = rm._lwm;
+    if (_hwm < rm._hwm) _hwm = rm._hwm;
+    for (int i = _lwm; i <= _hwm; i++) {
+      _A[i] |= rm._A[i];
+    }
+    assert(valid_watermarks(), "sanity");
   }
 
   // AND 'rm' into 'this'
-  void AND( const RegMask &rm ) {
-#   define BODY(I) this->_A[I] &= rm._A[I];
-    FORALL_BODY
-#   undef BODY
+  void AND(const RegMask &rm) {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    // Do not evaluate words outside the current watermark range, as they are
+    // already zero and an &= would not change that
+    for (int i = _lwm; i <= _hwm; i++) {
+      _A[i] &= rm._A[i];
+    }
+    // Narrow the watermarks if &rm spans a narrower range.
+    // Update after to ensure non-overlapping words are zeroed out.
+    if (_lwm < rm._lwm) _lwm = rm._lwm;
+    if (_hwm > rm._hwm) _hwm = rm._hwm;
   }
 
   // Subtract 'rm' from 'this'
-  void SUBTRACT( const RegMask &rm ) {
-#   define BODY(I) _A[I] &= ~rm._A[I];
-    FORALL_BODY
-#   undef BODY
+  void SUBTRACT(const RegMask &rm) {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    int hwm = MIN2(_hwm, rm._hwm);
+    int lwm = MAX2(_lwm, rm._lwm);
+    for (int i = lwm; i <= hwm; i++) {
+      _A[i] &= ~rm._A[i];
+    }
   }
 
   // Compute size of register mask: number of bits