8179527: Implement intrinsic code for reverseBytes with load/store
authormdoerr
Fri, 02 Jun 2017 16:32:39 +0200
changeset 46521 17e8acfe1db8
parent 46520 de5cb3eed39b
child 46522 86b13b03a053
8179527: Implement intrinsic code for reverseBytes with load/store Reviewed-by: simonis, mdoerr Contributed-by: Michihiro Horie <horie@jp.ibm.com>
hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
hotspot/src/cpu/ppc/vm/ppc.ad
hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Fri Jun 02 13:48:01 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Fri Jun 02 16:32:39 2017 +0200
@@ -376,10 +376,12 @@
     STWX_OPCODE  = (31u << OPCODE_SHIFT | 151u << 1),
     STWU_OPCODE  = (37u << OPCODE_SHIFT),
     STWUX_OPCODE = (31u << OPCODE_SHIFT | 183u << 1),
+    STWBRX_OPCODE = (31u << OPCODE_SHIFT | 662u << 1),
 
     STH_OPCODE   = (44u << OPCODE_SHIFT),
     STHX_OPCODE  = (31u << OPCODE_SHIFT | 407u << 1),
     STHU_OPCODE  = (45u << OPCODE_SHIFT),
+    STHBRX_OPCODE = (31u << OPCODE_SHIFT | 918u << 1),
 
     STB_OPCODE   = (38u << OPCODE_SHIFT),
     STBX_OPCODE  = (31u << OPCODE_SHIFT | 215u << 1),
@@ -401,11 +403,13 @@
     LD_OPCODE     = (58u << OPCODE_SHIFT |   0u << XO_30_31_SHIFT), // DS-FORM
     LDU_OPCODE    = (58u << OPCODE_SHIFT |   1u << XO_30_31_SHIFT), // DS-FORM
     LDX_OPCODE    = (31u << OPCODE_SHIFT |  21u << XO_21_30_SHIFT), // X-FORM
+    LDBRX_OPCODE  = (31u << OPCODE_SHIFT | 532u << 1),              // X-FORM
 
     STD_OPCODE    = (62u << OPCODE_SHIFT |   0u << XO_30_31_SHIFT), // DS-FORM
     STDU_OPCODE   = (62u << OPCODE_SHIFT |   1u << XO_30_31_SHIFT), // DS-FORM
-    STDUX_OPCODE  = (31u << OPCODE_SHIFT | 181u << 1),                  // X-FORM
+    STDUX_OPCODE  = (31u << OPCODE_SHIFT | 181u << 1),              // X-FORM
     STDX_OPCODE   = (31u << OPCODE_SHIFT | 149u << XO_21_30_SHIFT), // X-FORM
+    STDBRX_OPCODE = (31u << OPCODE_SHIFT | 660u << 1),              // X-FORM
 
     RLDICR_OPCODE = (30u << OPCODE_SHIFT |   1u << XO_27_29_SHIFT), // MD-FORM
     RLDICL_OPCODE = (30u << OPCODE_SHIFT |   0u << XO_27_29_SHIFT), // MD-FORM
@@ -1552,6 +1556,9 @@
   inline void ld(   Register d, int si16,    Register s1);
   inline void ldu(  Register d, int si16,    Register s1);
 
+  // 8 bytes reversed
+  inline void ldbrx( Register d, Register s1, Register s2);
+
   // For convenience. Load pointer into d from b+s1.
   inline void ld_ptr(Register d, int b, Register s1);
   DEBUG_ONLY(inline void ld_ptr(Register d, ByteSize b, Register s1);)
@@ -1560,10 +1567,12 @@
   inline void stwx( Register d, Register s1, Register s2);
   inline void stw(  Register d, int si16,    Register s1);
   inline void stwu( Register d, int si16,    Register s1);
+  inline void stwbrx( Register d, Register s1, Register s2);
 
   inline void sthx( Register d, Register s1, Register s2);
   inline void sth(  Register d, int si16,    Register s1);
   inline void sthu( Register d, int si16,    Register s1);
+  inline void sthbrx( Register d, Register s1, Register s2);
 
   inline void stbx( Register d, Register s1, Register s2);
   inline void stb(  Register d, int si16,    Register s1);
@@ -1573,6 +1582,7 @@
   inline void std(  Register d, int si16,    Register s1);
   inline void stdu( Register d, int si16,    Register s1);
   inline void stdux(Register s, Register a,  Register b);
+  inline void stdbrx( Register d, Register s1, Register s2);
 
   inline void st_ptr(Register d, int si16,    Register s1);
   DEBUG_ONLY(inline void st_ptr(Register d, ByteSize b, Register s1);)
@@ -2182,14 +2192,18 @@
   inline void lbz(  Register d, int si16);
   inline void ldx(  Register d, Register s2);
   inline void ld(   Register d, int si16);
+  inline void ldbrx(Register d, Register s2);
   inline void stwx( Register d, Register s2);
   inline void stw(  Register d, int si16);
+  inline void stwbrx( Register d, Register s2);
   inline void sthx( Register d, Register s2);
   inline void sth(  Register d, int si16);
+  inline void sthbrx( Register d, Register s2);
   inline void stbx( Register d, Register s2);
   inline void stb(  Register d, int si16);
   inline void stdx( Register d, Register s2);
   inline void std(  Register d, int si16);
+  inline void stdbrx( Register d, Register s2);
 
   // PPC 2, section 3.2.1 Instruction Cache Instructions
   inline void icbi(    Register s2);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Fri Jun 02 13:48:01 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Fri Jun 02 16:32:39 2017 +0200
@@ -327,6 +327,7 @@
 inline void Assembler::ld(   Register d, int si16,    Register s1) { emit_int32(LD_OPCODE  | rt(d) | ds(si16)   | ra0mem(s1));}
 inline void Assembler::ldx(  Register d, Register s1, Register s2) { emit_int32(LDX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::ldu(  Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LDU_OPCODE | rt(d) | ds(si16) | rta0mem(s1));}
+inline void Assembler::ldbrx( Register d, Register s1, Register s2) { emit_int32(LDBRX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 
 inline void Assembler::ld_ptr(Register d, int b, Register s1) { ld(d, b, s1); }
 DEBUG_ONLY(inline void Assembler::ld_ptr(Register d, ByteSize b, Register s1) { ld(d, in_bytes(b), s1); })
@@ -335,10 +336,12 @@
 inline void Assembler::stwx( Register d, Register s1, Register s2) { emit_int32(STWX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::stw(  Register d, int si16,    Register s1) { emit_int32(STW_OPCODE  | rs(d) | d1(si16)   | ra0mem(s1));}
 inline void Assembler::stwu( Register d, int si16,    Register s1) { emit_int32(STWU_OPCODE | rs(d) | d1(si16)   | rta0mem(s1));}
+inline void Assembler::stwbrx( Register d, Register s1, Register s2) { emit_int32(STWBRX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
 
 inline void Assembler::sthx( Register d, Register s1, Register s2) { emit_int32(STHX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::sth(  Register d, int si16,    Register s1) { emit_int32(STH_OPCODE  | rs(d) | d1(si16)   | ra0mem(s1));}
 inline void Assembler::sthu( Register d, int si16,    Register s1) { emit_int32(STHU_OPCODE | rs(d) | d1(si16)   | rta0mem(s1));}
+inline void Assembler::sthbrx( Register d, Register s1, Register s2) { emit_int32(STHBRX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
 
 inline void Assembler::stbx( Register d, Register s1, Register s2) { emit_int32(STBX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::stb(  Register d, int si16,    Register s1) { emit_int32(STB_OPCODE  | rs(d) | d1(si16)   | ra0mem(s1));}
@@ -348,6 +351,7 @@
 inline void Assembler::stdx( Register d, Register s1, Register s2) { emit_int32(STDX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::stdu( Register d, int si16,    Register s1) { emit_int32(STDU_OPCODE | rs(d) | ds(si16)   | rta0mem(s1));}
 inline void Assembler::stdux(Register s, Register a,  Register b)  { emit_int32(STDUX_OPCODE| rs(s) | rta0mem(a) | rb(b));}
+inline void Assembler::stdbrx( Register d, Register s1, Register s2) { emit_int32(STDBRX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
 
 inline void Assembler::st_ptr(Register d, int b, Register s1) { std(d, b, s1); }
 DEBUG_ONLY(inline void Assembler::st_ptr(Register d, ByteSize b, Register s1) { std(d, in_bytes(b), s1); })
@@ -944,14 +948,18 @@
 inline void Assembler::lbz(  Register d, int si16   ) { emit_int32( LBZ_OPCODE  | rt(d) | d1(si16));}
 inline void Assembler::ld(   Register d, int si16   ) { emit_int32( LD_OPCODE   | rt(d) | ds(si16));}
 inline void Assembler::ldx(  Register d, Register s2) { emit_int32( LDX_OPCODE  | rt(d) | rb(s2));}
+inline void Assembler::ldbrx(Register d, Register s2) { emit_int32( LDBRX_OPCODE| rt(d) | rb(s2));}
 inline void Assembler::stwx( Register d, Register s2) { emit_int32( STWX_OPCODE | rs(d) | rb(s2));}
 inline void Assembler::stw(  Register d, int si16   ) { emit_int32( STW_OPCODE  | rs(d) | d1(si16));}
+inline void Assembler::stwbrx(Register d, Register s2){ emit_int32(STWBRX_OPCODE| rs(d) | rb(s2));}
 inline void Assembler::sthx( Register d, Register s2) { emit_int32( STHX_OPCODE | rs(d) | rb(s2));}
 inline void Assembler::sth(  Register d, int si16   ) { emit_int32( STH_OPCODE  | rs(d) | d1(si16));}
+inline void Assembler::sthbrx(Register d, Register s2){ emit_int32(STHBRX_OPCODE| rs(d) | rb(s2));}
 inline void Assembler::stbx( Register d, Register s2) { emit_int32( STBX_OPCODE | rs(d) | rb(s2));}
 inline void Assembler::stb(  Register d, int si16   ) { emit_int32( STB_OPCODE  | rs(d) | d1(si16));}
 inline void Assembler::std(  Register d, int si16   ) { emit_int32( STD_OPCODE  | rs(d) | ds(si16));}
 inline void Assembler::stdx( Register d, Register s2) { emit_int32( STDX_OPCODE | rs(d) | rb(s2));}
+inline void Assembler::stdbrx(Register d, Register s2){ emit_int32(STDBRX_OPCODE| rs(d) | rb(s2));}
 
 // ra0 version
 inline void Assembler::icbi(    Register s2)          { emit_int32( ICBI_OPCODE   | rb(s2)           ); }
--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Fri Jun 02 13:48:01 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Fri Jun 02 16:32:39 2017 +0200
@@ -5842,6 +5842,16 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct rldicl(iRegLdst dst, iRegLsrc src, immI16 shift, immI16 mask_begin) %{
+  effect(DEF dst, USE src, USE shift, USE mask_begin);
+
+  size(4);
+  ins_encode %{
+    __ rldicl($dst$$Register, $src$$Register, $shift$$constant, $mask_begin$$constant);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // Needed to postalloc expand loadConN: ConN is loaded as ConI
 // leaving the upper 32 bits with sign-extension bits.
 // This clears these bits: dst = src & 0xFFFFFFFF.
@@ -10519,6 +10529,16 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct extsh(iRegIdst dst, iRegIsrc src) %{
+  effect(DEF dst, USE src);
+
+  size(4);
+  ins_encode %{
+    __ extsh($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // LShiftI 16 + RShiftI 16 converts short to int.
 instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
   match(Set dst (RShiftI (LShiftI src amount) amount));
@@ -12682,8 +12702,7 @@
 // Just slightly faster than java implementation.
 instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
   match(Set dst (ReverseBytesI src));
-  predicate(UseCountLeadingZerosInstructionsPPC64);
-  ins_cost(DEFAULT_COST);
+  ins_cost(7*DEFAULT_COST);
 
   expand %{
     immI16 imm24 %{ (int) 24 %}
@@ -12705,6 +12724,172 @@
   %}
 %}
 
+instruct bytes_reverse_long_Ex(iRegLdst dst, iRegLsrc src) %{
+  match(Set dst (ReverseBytesL src));
+  ins_cost(15*DEFAULT_COST);
+
+  expand %{
+    immI16 imm56 %{ (int) 56 %}
+    immI16 imm48 %{ (int) 48 %}
+    immI16 imm40 %{ (int) 40 %}
+    immI16 imm32 %{ (int) 32 %}
+    immI16 imm24 %{ (int) 24 %}
+    immI16 imm16 %{ (int) 16 %}
+    immI16  imm8 %{ (int)  8 %}
+    immI16  imm0 %{ (int)  0 %}
+    iRegLdst tmpL1;
+    iRegLdst tmpL2;
+    iRegLdst tmpL3;
+    iRegLdst tmpL4;
+    iRegLdst tmpL5;
+    iRegLdst tmpL6;
+
+                                        // src   : |a|b|c|d|e|f|g|h|
+    rldicl(tmpL1, src, imm8, imm24);    // tmpL1 : | | | |e|f|g|h|a|
+    rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |a| | | |e|
+    rldicl(tmpL3, tmpL2, imm32, imm0);  // tmpL3 : | | | |e| | | |a|
+    rldicl(tmpL1, src, imm16, imm24);   // tmpL1 : | | | |f|g|h|a|b|
+    rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |b| | | |f|
+    rldicl(tmpL4, tmpL2, imm40, imm0);  // tmpL4 : | | |f| | | |b| |
+    orL_reg_reg(tmpL5, tmpL3, tmpL4);   // tmpL5 : | | |f|e| | |b|a|
+    rldicl(tmpL1, src, imm24, imm24);   // tmpL1 : | | | |g|h|a|b|c|
+    rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |c| | | |g|
+    rldicl(tmpL3, tmpL2, imm48, imm0);  // tmpL3 : | |g| | | |c| | |
+    rldicl(tmpL1, src, imm32, imm24);   // tmpL1 : | | | |h|a|b|c|d|
+    rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |d| | | |h|
+    rldicl(tmpL4, tmpL2, imm56, imm0);  // tmpL4 : |h| | | |d| | | |
+    orL_reg_reg(tmpL6, tmpL3, tmpL4);   // tmpL6 : |h|g| | |d|c| | |
+    orL_reg_reg(dst, tmpL5, tmpL6);     // dst   : |h|g|f|e|d|c|b|a|
+  %}
+%}
+
+instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
+  match(Set dst (ReverseBytesUS src));
+  ins_cost(2*DEFAULT_COST);
+
+  expand %{
+    immI16  imm16 %{ (int) 16 %}
+    immI16   imm8 %{ (int)  8 %}
+
+    urShiftI_reg_imm(dst, src, imm8);
+    insrwi(dst, src, imm16, imm8);
+  %}
+%}
+
+instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
+  match(Set dst (ReverseBytesS src));
+  ins_cost(3*DEFAULT_COST);
+
+  expand %{
+    immI16  imm16 %{ (int) 16 %}
+    immI16   imm8 %{ (int)  8 %}
+    iRegLdst tmpI1;
+
+    urShiftI_reg_imm(tmpI1, src, imm8);
+    insrwi(tmpI1, src, imm16, imm8);
+    extsh(dst, tmpI1);
+  %}
+%}
+
+// Load Integer reversed byte order
+instruct loadI_reversed(iRegIdst dst, indirect mem) %{
+  match(Set dst (ReverseBytesI (LoadI mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  ins_encode %{
+    __ lwbrx($dst$$Register, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Load Long - aligned and reversed
+instruct loadL_reversed(iRegLdst dst, indirect mem) %{
+  match(Set dst (ReverseBytesL (LoadL mem)));
+  predicate(VM_Version::has_ldbrx());
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  ins_encode %{
+    __ ldbrx($dst$$Register, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Load unsigned short / char reversed byte order
+instruct loadUS_reversed(iRegIdst dst, indirect mem) %{
+  match(Set dst (ReverseBytesUS (LoadUS mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  ins_encode %{
+    __ lhbrx($dst$$Register, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Load short reversed byte order
+instruct loadS_reversed(iRegIdst dst, indirect mem) %{
+  match(Set dst (ReverseBytesS (LoadS mem)));
+  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
+
+  size(8);
+  ins_encode %{
+    __ lhbrx($dst$$Register, $mem$$Register);
+    __ extsh($dst$$Register, $dst$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Store Integer reversed byte order
+instruct storeI_reversed(iRegIsrc src, indirect mem) %{
+  match(Set mem (StoreI mem (ReverseBytesI src)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  ins_encode %{
+    __ stwbrx($src$$Register, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Store Long reversed byte order
+instruct storeL_reversed(iRegLsrc src, indirect mem) %{
+  match(Set mem (StoreL mem (ReverseBytesL src)));
+  predicate(VM_Version::has_stdbrx());
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  ins_encode %{
+    __ stdbrx($src$$Register, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Store unsigned short / char reversed byte order
+instruct storeUS_reversed(iRegIsrc src, indirect mem) %{
+  match(Set mem (StoreC mem (ReverseBytesUS src)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  ins_encode %{
+    __ sthbrx($src$$Register, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Store short reversed byte order
+instruct storeS_reversed(iRegIsrc src, indirect mem) %{
+  match(Set mem (StoreC mem (ReverseBytesS src)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  ins_encode %{
+    __ sthbrx($src$$Register, $mem$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 //---------- Replicate Vector Instructions ------------------------------------
 
 // Insrdi does replicate if src == dst.
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Fri Jun 02 13:48:01 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Fri Jun 02 16:32:39 2017 +0200
@@ -111,7 +111,7 @@
   // Create and print feature-string.
   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
   jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_fsqrt()   ? " fsqrt"   : ""),
                (has_isel()    ? " isel"    : ""),
                (has_lxarxeh() ? " lxarxeh" : ""),
@@ -126,7 +126,9 @@
                (has_vpmsumb() ? " vpmsumb" : ""),
                (has_tcheck()  ? " tcheck"  : ""),
                (has_mfdscr()  ? " mfdscr"  : ""),
-               (has_vsx()     ? " vsx"     : "")
+               (has_vsx()     ? " vsx"     : ""),
+               (has_ldbrx()   ? " ldbrx"   : ""),
+               (has_stdbrx()  ? " stdbrx"  : "")
                // Make sure number of %s matches num_features!
               );
   _features_string = os::strdup(buf);
@@ -663,6 +665,8 @@
   a->tcheck(0);                                // code[12] -> tcheck
   a->mfdscr(R0);                               // code[13] -> mfdscr
   a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
+  a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[15] -> ldbrx
+  a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[16] -> stdbrx
   a->blr();
 
   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -712,6 +716,8 @@
   if (code[feature_cntr++]) features |= tcheck_m;
   if (code[feature_cntr++]) features |= mfdscr_m;
   if (code[feature_cntr++]) features |= vsx_m;
+  if (code[feature_cntr++]) features |= ldbrx_m;
+  if (code[feature_cntr++]) features |= stdbrx_m;
 
   // Print the detection code.
   if (PrintAssembly) {
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp	Fri Jun 02 13:48:01 2017 +0200
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp	Fri Jun 02 16:32:39 2017 +0200
@@ -47,6 +47,8 @@
     tcheck,
     mfdscr,
     vsx,
+    ldbrx,
+    stdbrx,
     num_features // last entry to count features
   };
   enum Feature_Flag_Set {
@@ -66,6 +68,8 @@
     tcheck_m              = (1 << tcheck ),
     mfdscr_m              = (1 << mfdscr ),
     vsx_m                 = (1 << vsx    ),
+    ldbrx_m               = (1 << ldbrx  ),
+    stdbrx_m              = (1 << stdbrx ),
     all_features_m        = (unsigned long)-1
   };
 
@@ -100,6 +104,8 @@
   static bool has_tcheck()  { return (_features & tcheck_m) != 0; }
   static bool has_mfdscr()  { return (_features & mfdscr_m) != 0; }
   static bool has_vsx()     { return (_features & vsx_m) != 0; }
+  static bool has_ldbrx()   { return (_features & ldbrx_m) != 0; }
+  static bool has_stdbrx()  { return (_features & stdbrx_m) != 0; }
 
   // Assembler testing
   static void allow_all();