# HG changeset patch # User redestad # Date 1551800358 -3600 # Node ID 6347ffe2c3c7ef750ec92781d89950607bf4b73d # Parent c112c2d5a856a7254cfb46393f87995942eef6eb 8220159: Optimize various RegMask operations by introducing watermarks Reviewed-by: neliasso, thartmann diff -r c112c2d5a856 -r 6347ffe2c3c7 src/hotspot/share/opto/chaitin.hpp --- a/src/hotspot/share/opto/chaitin.hpp Thu Mar 07 07:19:49 2019 -0500 +++ b/src/hotspot/share/opto/chaitin.hpp Tue Mar 05 16:39:18 2019 +0100 @@ -114,9 +114,9 @@ _msize_valid=1; if (_is_vector) { assert(!_fat_proj, "sanity"); - _mask.verify_sets(_num_regs); + assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); } else if (_num_regs == 2 && !_fat_proj) { - _mask.verify_pairs(); + assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); } #endif } diff -r c112c2d5a856 -r 6347ffe2c3c7 src/hotspot/share/opto/regmask.cpp --- a/src/hotspot/share/opto/regmask.cpp Thu Mar 07 07:19:49 2019 -0500 +++ b/src/hotspot/share/opto/regmask.cpp Tue Mar 05 16:39:18 2019 +0100 @@ -82,69 +82,62 @@ return 1; } -//------------------------------ClearToPairs----------------------------------- // Clear out partial bits; leave only bit pairs void RegMask::clear_to_pairs() { - for( int i = 0; i < RM_SIZE; i++ ) { + assert(valid_watermarks(), "sanity"); + for (int i = _lwm; i <= _hwm; i++) { int bits = _A[i]; bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair bits |= (bits>>1); // Smear 1 hi-bit into a pair _A[i] = bits; } - verify_pairs(); + assert(is_aligned_pairs(), "mask is not aligned, adjacent pairs"); } -//------------------------------is_aligned_pairs------------------------------- +bool RegMask::is_misaligned_pair() const { + return Size() == 2 && !is_aligned_pairs(); +} + bool RegMask::is_aligned_pairs() const { // Assert that the register mask contains only bit pairs. - for( int i = 0; i < RM_SIZE; i++ ) { + assert(valid_watermarks(), "sanity"); + for (int i = _lwm; i <= _hwm; i++) { int bits = _A[i]; - while( bits ) { // Check bits for pairing + while (bits) { // Check bits for pairing int bit = bits & -bits; // Extract low bit // Low bit is not odd means its mis-aligned. - if( (bit & 0x55555555) == 0 ) return false; + if ((bit & 0x55555555) == 0) return false; bits -= bit; // Remove bit from mask // Check for aligned adjacent bit - if( (bits & (bit<<1)) == 0 ) return false; + if ((bits & (bit<<1)) == 0) return false; bits -= (bit<<1); // Remove other halve of pair } } return true; } -//------------------------------is_bound1-------------------------------------- // Return TRUE if the mask contains a single bit -int RegMask::is_bound1() const { - if( is_AllStack() ) return false; - int bit = -1; // Set to hold the one bit allowed - for( int i = 0; i < RM_SIZE; i++ ) { - if( _A[i] ) { // Found some bits - if( bit != -1 ) return false; // Already had bits, so fail - bit = _A[i] & -_A[i]; // Extract 1 bit from mask - if( bit != _A[i] ) return false; // Found many bits, so fail - } - } - // True for both the empty mask and for a single bit - return true; +bool RegMask::is_bound1() const { + if (is_AllStack()) return false; + return Size() == 1; } -//------------------------------is_bound2-------------------------------------- // Return TRUE if the mask contains an adjacent pair of bits and no other bits. -int RegMask::is_bound_pair() const { - if( is_AllStack() ) return false; - +bool RegMask::is_bound_pair() const { + if (is_AllStack()) return false; int bit = -1; // Set to hold the one bit allowed - for( int i = 0; i < RM_SIZE; i++ ) { - if( _A[i] ) { // Found some bits - if( bit != -1 ) return false; // Already had bits, so fail - bit = _A[i] & -(_A[i]); // Extract 1 bit from mask - if( (bit << 1) != 0 ) { // Bit pair stays in same word? - if( (bit | (bit<<1)) != _A[i] ) - return false; // Require adjacent bit pair and no more bits - } else { // Else its a split-pair case - if( bit != _A[i] ) return false; // Found many bits, so fail - i++; // Skip iteration forward - if( i >= RM_SIZE || _A[i] != 1 ) + assert(valid_watermarks(), "sanity"); + for (int i = _lwm; i <= _hwm; i++) { + if (_A[i]) { // Found some bits + if (bit != -1) return false; // Already had bits, so fail + bit = _A[i] & -(_A[i]); // Extract 1 bit from mask + if ((bit << 1) != 0) { // Bit pair stays in same word? + if ((bit | (bit<<1)) != _A[i]) + return false; // Require adjacent bit pair and no more bits + } else { // Else its a split-pair case + if(bit != _A[i]) return false; // Found many bits, so fail + i++; // Skip iteration forward + if (i > _hwm || _A[i] != 1) return false; // Require 1 lo bit in next word } } @@ -153,32 +146,43 @@ return true; } +// Test for a single adjacent set of ideal register's size. +bool RegMask::is_bound(uint ireg) const { + if (is_vector(ireg)) { + if (is_bound_set(num_registers(ireg))) + return true; + } else if (is_bound1() || is_bound_pair()) { + return true; + } + return false; +} + // only indicies of power 2 are accessed, so index 3 is only filled in for storage. static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; -//------------------------------find_first_set--------------------------------- + // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. // Works also for size 1. OptoReg::Name RegMask::find_first_set(const int size) const { - verify_sets(size); - for (int i = 0; i < RM_SIZE; i++) { + assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); + assert(valid_watermarks(), "sanity"); + for (int i = _lwm; i <= _hwm; i++) { if (_A[i]) { // Found some bits - int bit = _A[i] & -_A[i]; // Extract low bit // Convert to bit number, return hi bit in pair - return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1)); + return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(_A[i]) + (size - 1)); } } return OptoReg::Bad; } -//------------------------------clear_to_sets---------------------------------- // Clear out partial bits; leave only aligned adjacent bit pairs void RegMask::clear_to_sets(const int size) { if (size == 1) return; assert(2 <= size && size <= 16, "update low bits table"); assert(is_power_of_2(size), "sanity"); + assert(valid_watermarks(), "sanity"); int low_bits_mask = low_bits[size>>2]; - for (int i = 0; i < RM_SIZE; i++) { + for (int i = _lwm; i <= _hwm; i++) { int bits = _A[i]; int sets = (bits & low_bits_mask); for (int j = 1; j < size; j++) { @@ -196,17 +200,17 @@ } _A[i] = sets; } - verify_sets(size); + assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); } -//------------------------------smear_to_sets---------------------------------- // Smear out partial bits to aligned adjacent bit sets void RegMask::smear_to_sets(const int size) { if (size == 1) return; assert(2 <= size && size <= 16, "update low bits table"); assert(is_power_of_2(size), "sanity"); + assert(valid_watermarks(), "sanity"); int low_bits_mask = low_bits[size>>2]; - for (int i = 0; i < RM_SIZE; i++) { + for (int i = _lwm; i <= _hwm; i++) { int bits = _A[i]; int sets = 0; for (int j = 0; j < size; j++) { @@ -225,17 +229,17 @@ } _A[i] = sets; } - verify_sets(size); + assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); } -//------------------------------is_aligned_set-------------------------------- +// Assert that the register mask contains only bit sets. bool RegMask::is_aligned_sets(const int size) const { if (size == 1) return true; assert(2 <= size && size <= 16, "update low bits table"); assert(is_power_of_2(size), "sanity"); int low_bits_mask = low_bits[size>>2]; - // Assert that the register mask contains only bit sets. - for (int i = 0; i < RM_SIZE; i++) { + assert(valid_watermarks(), "sanity"); + for (int i = _lwm; i <= _hwm; i++) { int bits = _A[i]; while (bits) { // Check bits for pairing int bit = bits & -bits; // Extract low bit @@ -252,14 +256,14 @@ return true; } -//------------------------------is_bound_set----------------------------------- // Return TRUE if the mask contains one adjacent set of bits and no other bits. // Works also for size 1. int RegMask::is_bound_set(const int size) const { - if( is_AllStack() ) return false; + if (is_AllStack()) return false; assert(1 <= size && size <= 16, "update low bits table"); + assert(valid_watermarks(), "sanity"); int bit = -1; // Set to hold the one bit allowed - for (int i = 0; i < RM_SIZE; i++) { + for (int i = _lwm; i <= _hwm; i++) { if (_A[i] ) { // Found some bits if (bit != -1) return false; // Already had bits, so fail @@ -279,7 +283,7 @@ int set = bit>>clear_bit_size; set = set & -set; // Remove sign extension. set = (((set << size) - 1) >> shift_back_size); - if (i >= RM_SIZE || _A[i] != set) + if (i > _hwm || _A[i] != set) return false; // Require expected low bits in next word } } @@ -288,31 +292,29 @@ return true; } -//------------------------------is_UP------------------------------------------ // UP means register only, Register plus stack, or stack only is DOWN bool RegMask::is_UP() const { // Quick common case check for DOWN (any stack slot is legal) - if( is_AllStack() ) + if (is_AllStack()) return false; // Slower check for any stack bits set (also DOWN) - if( overlap(Matcher::STACK_ONLY_mask) ) + if (overlap(Matcher::STACK_ONLY_mask)) return false; // Not DOWN, so must be UP return true; } -//------------------------------Size------------------------------------------- // Compute size of register mask in bits uint RegMask::Size() const { uint sum = 0; - for (int i = 0; i < RM_SIZE; i++) { + assert(valid_watermarks(), "sanity"); + for (int i = _lwm; i <= _hwm; i++) { sum += population_count(_A[i]); } return sum; } #ifndef PRODUCT -//------------------------------print------------------------------------------ void RegMask::dump(outputStream *st) const { st->print("["); RegMask rm = *this; // Structure copy into local temp diff -r c112c2d5a856 -r 6347ffe2c3c7 src/hotspot/share/opto/regmask.hpp --- a/src/hotspot/share/opto/regmask.hpp Thu Mar 07 07:19:49 2019 -0500 +++ b/src/hotspot/share/opto/regmask.hpp Tue Mar 05 16:39:18 2019 +0100 @@ -30,22 +30,6 @@ #include "utilities/count_leading_zeros.hpp" #include "utilities/count_trailing_zeros.hpp" -// Some fun naming (textual) substitutions: -// -// RegMask::get_low_elem() ==> RegMask::find_first_elem() -// RegMask::Special ==> RegMask::Empty -// RegMask::_flags ==> RegMask::is_AllStack() -// RegMask::operator<<=() ==> RegMask::Insert() -// RegMask::operator>>=() ==> RegMask::Remove() -// RegMask::Union() ==> RegMask::OR -// RegMask::Inter() ==> RegMask::AND -// -// OptoRegister::RegName ==> OptoReg::Name -// -// OptoReg::stack0() ==> _last_Mach_Reg or ZERO in core version -// -// numregs in chaitin ==> proper degree in chaitin - //-------------Non-zero bit search methods used by RegMask--------------------- // Find lowest 1, undefined if empty/0 static int find_lowest_bit(uint32_t mask) { @@ -78,6 +62,12 @@ // is something like 90+ parameters. int _A[RM_SIZE]; }; + // The low and high water marks represents the lowest and highest word + // that might contain set register mask bits, respectively. We guarantee + // that there are no bits in words outside this range, but any word at + // and between the two marks can still be 0. + int _lwm; + int _hwm; enum { _WordBits = BitsPerInt, @@ -85,7 +75,7 @@ _RM_SIZE = RM_SIZE // local constant, imported, then hidden by #undef }; -public: + public: enum { CHUNK_SIZE = RM_SIZE*_WordBits }; // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits. @@ -113,28 +103,41 @@ # define BODY(I) int a##I, FORALL_BODY # undef BODY - int dummy = 0 ) { + int dummy = 0) { # define BODY(I) _A[I] = a##I; FORALL_BODY # undef BODY + _lwm = 0; + _hwm = RM_SIZE - 1; + while (_hwm > 0 && _A[_hwm] == 0) _hwm--; + while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++; + assert(valid_watermarks(), "post-condition"); } // Handy copying constructor - RegMask( RegMask *rm ) { -# define BODY(I) _A[I] = rm->_A[I]; - FORALL_BODY -# undef BODY + RegMask(RegMask *rm) { + _hwm = rm->_hwm; + _lwm = rm->_lwm; + for (int i = 0; i < RM_SIZE; i++) { + _A[i] = rm->_A[i]; + } + assert(valid_watermarks(), "post-condition"); } // Construct an empty mask - RegMask( ) { Clear(); } + RegMask() { + Clear(); + } // Construct a mask with a single bit - RegMask( OptoReg::Name reg ) { Clear(); Insert(reg); } + RegMask(OptoReg::Name reg) { + Clear(); + Insert(reg); + } // Check for register being in mask - int Member( OptoReg::Name reg ) const { - assert( reg < CHUNK_SIZE, "" ); + int Member(OptoReg::Name reg) const { + assert(reg < CHUNK_SIZE, ""); return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1))); } @@ -152,56 +155,69 @@ void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); } // Test for being a not-empty mask. - int is_NotEmpty( ) const { + int is_NotEmpty() const { + assert(valid_watermarks(), "sanity"); int tmp = 0; -# define BODY(I) tmp |= _A[I]; - FORALL_BODY -# undef BODY + for (int i = _lwm; i <= _hwm; i++) { + tmp |= _A[i]; + } return tmp; } // Find lowest-numbered register from mask, or BAD if mask is empty. OptoReg::Name find_first_elem() const { - int base, bits; -# define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else - FORALL_BODY -# undef BODY - { base = OptoReg::Bad; bits = 1<<0; } - return OptoReg::Name(base + find_lowest_bit(bits)); + assert(valid_watermarks(), "sanity"); + for (int i = _lwm; i <= _hwm; i++) { + int bits = _A[i]; + if (bits) { + return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits)); + } + } + return OptoReg::Name(OptoReg::Bad); } + // Get highest-numbered register from mask, or BAD if mask is empty. OptoReg::Name find_last_elem() const { - int base, bits; -# define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else - FORALL_BODY -# undef BODY - { base = OptoReg::Bad; bits = 1<<0; } - return OptoReg::Name(base + find_highest_bit(bits)); + assert(valid_watermarks(), "sanity"); + for (int i = _hwm; i >= _lwm; i--) { + int bits = _A[i]; + if (bits) { + return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits)); + } + } + return OptoReg::Name(OptoReg::Bad); } // Clear out partial bits; leave only aligned adjacent bit pairs. void clear_to_pairs(); - // Verify that the mask contains only aligned adjacent bit pairs - void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); } + +#ifdef ASSERT + // Verify watermarks are sane, i.e., within bounds and that no + // register words below or above the watermarks have bits set. + bool valid_watermarks() const { + assert(_hwm >= 0 && _hwm < RM_SIZE, "_hwm out of range: %d", _hwm); + assert(_lwm >= 0 && _lwm < RM_SIZE, "_lwm out of range: %d", _lwm); + for (int i = 0; i < _lwm; i++) { + assert(_A[i] == 0, "_lwm too high: %d regs at: %d", _lwm, i); + } + for (int i = _hwm + 1; i < RM_SIZE; i++) { + assert(_A[i] == 0, "_hwm too low: %d regs at: %d", _hwm, i); + } + return true; + } +#endif // !ASSERT + // Test that the mask contains only aligned adjacent bit pairs bool is_aligned_pairs() const; // mask is a pair of misaligned registers - bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); } + bool is_misaligned_pair() const; // Test for single register - int is_bound1() const; + bool is_bound1() const; // Test for a single adjacent pair - int is_bound_pair() const; + bool is_bound_pair() const; // Test for a single adjacent set of ideal register's size. - int is_bound(uint ireg) const { - if (is_vector(ireg)) { - if (is_bound_set(num_registers(ireg))) - return true; - } else if (is_bound1() || is_bound_pair()) { - return true; - } - return false; - } + bool is_bound(uint ireg) const; // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. @@ -212,8 +228,6 @@ void clear_to_sets(const int size); // Smear out partial bits to aligned adjacent bit sets. void smear_to_sets(const int size); - // Verify that the mask contains only aligned adjacent bit sets - void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); } // Test that the mask contains only aligned adjacent bit sets bool is_aligned_sets(const int size) const; @@ -224,12 +238,15 @@ static int num_registers(uint ireg); // Fast overlap test. Non-zero if any registers in common. - int overlap( const RegMask &rm ) const { - return -# define BODY(I) (_A[I] & rm._A[I]) | - FORALL_BODY -# undef BODY - 0 ; + int overlap(const RegMask &rm) const { + assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); + int hwm = MIN2(_hwm, rm._hwm); + int lwm = MAX2(_lwm, rm._lwm); + int result = 0; + for (int i = lwm; i <= hwm; i++) { + result |= _A[i] & rm._A[i]; + } + return result; } // Special test for register pressure based splitting @@ -237,50 +254,72 @@ bool is_UP() const; // Clear a register mask - void Clear( ) { -# define BODY(I) _A[I] = 0; - FORALL_BODY -# undef BODY + void Clear() { + _lwm = RM_SIZE - 1; + _hwm = 0; + memset(_A, 0, sizeof(int)*RM_SIZE); + assert(valid_watermarks(), "sanity"); } // Fill a register mask with 1's - void Set_All( ) { -# define BODY(I) _A[I] = -1; - FORALL_BODY -# undef BODY + void Set_All() { + _lwm = 0; + _hwm = RM_SIZE - 1; + memset(_A, 0xFF, sizeof(int)*RM_SIZE); + assert(valid_watermarks(), "sanity"); } // Insert register into mask - void Insert( OptoReg::Name reg ) { - assert( reg < CHUNK_SIZE, "" ); - _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1))); + void Insert(OptoReg::Name reg) { + assert(reg < CHUNK_SIZE, "sanity"); + assert(valid_watermarks(), "pre-condition"); + int index = reg>>_LogWordBits; + if (index > _hwm) _hwm = index; + if (index < _lwm) _lwm = index; + _A[index] |= (1<<(reg&(_WordBits-1))); + assert(valid_watermarks(), "post-condition"); } // Remove register from mask - void Remove( OptoReg::Name reg ) { - assert( reg < CHUNK_SIZE, "" ); + void Remove(OptoReg::Name reg) { + assert(reg < CHUNK_SIZE, ""); _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1))); } // OR 'rm' into 'this' - void OR( const RegMask &rm ) { -# define BODY(I) this->_A[I] |= rm._A[I]; - FORALL_BODY -# undef BODY + void OR(const RegMask &rm) { + assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); + // OR widens the live range + if (_lwm > rm._lwm) _lwm = rm._lwm; + if (_hwm < rm._hwm) _hwm = rm._hwm; + for (int i = _lwm; i <= _hwm; i++) { + _A[i] |= rm._A[i]; + } + assert(valid_watermarks(), "sanity"); } // AND 'rm' into 'this' - void AND( const RegMask &rm ) { -# define BODY(I) this->_A[I] &= rm._A[I]; - FORALL_BODY -# undef BODY + void AND(const RegMask &rm) { + assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); + // Do not evaluate words outside the current watermark range, as they are + // already zero and an &= would not change that + for (int i = _lwm; i <= _hwm; i++) { + _A[i] &= rm._A[i]; + } + // Narrow the watermarks if &rm spans a narrower range. + // Update after to ensure non-overlapping words are zeroed out. + if (_lwm < rm._lwm) _lwm = rm._lwm; + if (_hwm > rm._hwm) _hwm = rm._hwm; } // Subtract 'rm' from 'this' - void SUBTRACT( const RegMask &rm ) { -# define BODY(I) _A[I] &= ~rm._A[I]; - FORALL_BODY -# undef BODY + void SUBTRACT(const RegMask &rm) { + assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); + int hwm = MIN2(_hwm, rm._hwm); + int lwm = MAX2(_lwm, rm._lwm); + for (int i = lwm; i <= hwm; i++) { + _A[i] &= ~rm._A[i]; + } } // Compute size of register mask: number of bits