Merge
authorroland
Fri, 21 Dec 2012 01:39:34 -0800
changeset 14838 6e41d19e827b
parent 14821 334d055fba97 (current diff)
parent 14837 a75c3082d106 (diff)
child 14844 c92bd8f2109d
Merge
hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
hotspot/src/share/vm/classfile/classLoaderData.cpp
hotspot/src/share/vm/opto/library_call.cpp
--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -298,7 +298,7 @@
     for (int i = 0; i < _bytes_to_copy; i++) {
       address ptr = (address)(_pc_start + i);
       int a_byte = (*ptr) & 0xFF;
-      __ a_byte (a_byte);
+      __ emit_int8 (a_byte);
     }
   }
 
@@ -340,10 +340,10 @@
   int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
 
   // Emit the patch record.  We need to emit a full word, so emit an extra empty byte
-  __ a_byte(0);
-  __ a_byte(being_initialized_entry_offset);
-  __ a_byte(bytes_to_skip);
-  __ a_byte(_bytes_to_copy);
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
   address patch_info_pc = __ pc();
   assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
 
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -100,34 +100,6 @@
 bool AbstractAssembler::pd_check_instruction_mark() { return false; }
 #endif
 
-
-void MacroAssembler::print_instruction(int inst) {
-  const char* s;
-  switch (inv_op(inst)) {
-  default:         s = "????"; break;
-  case call_op:    s = "call"; break;
-  case branch_op:
-    switch (inv_op2(inst)) {
-      case fb_op2:     s = "fb";   break;
-      case fbp_op2:    s = "fbp";  break;
-      case br_op2:     s = "br";   break;
-      case bp_op2:     s = "bp";   break;
-      case cb_op2:     s = "cb";   break;
-      case bpr_op2: {
-        if (is_cbcond(inst)) {
-          s = is_cxb(inst) ? "cxb" : "cwb";
-        } else {
-          s = "bpr";
-        }
-        break;
-      }
-      default:         s = "????"; break;
-    }
-  }
-  ::tty->print("%s", s);
-}
-
-
 // Patch instruction inst at offset inst_pos to refer to dest_pos
 // and return the resulting instruction.
 // We should have pcs, not offsets, but since all is relative, it will work out
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -603,7 +603,6 @@
   friend class Label;
 
  protected:
-  static void print_instruction(int inst);
   static int  patched_branch(int dest_pos, int inst, int inst_pos);
   static int  branch_destination(int inst, int pos);
 
@@ -759,9 +758,6 @@
   // Required platform-specific helpers for Label::patch_instructions.
   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
   void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch);
-#endif
 
   // sethi Macro handles optimizations and relocations
 private:
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -43,14 +43,6 @@
   stub_inst = patched_branch(target - branch, stub_inst, 0);
 }
 
-#ifndef PRODUCT
-inline void MacroAssembler::pd_print_patched_instruction(address branch) {
-  jint stub_inst = *(jint*) branch;
-  print_instruction(stub_inst);
-  ::tty->print("%s", " (unresolved)");
-}
-#endif // PRODUCT
-
 // Use the right loads/stores for the platform
 inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) {
 #ifdef _LP64
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Fri Dec 21 01:39:34 2012 -0800
@@ -10224,7 +10224,7 @@
 
 //---------- Zeros Count Instructions ------------------------------------------
 
-instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
+instruct countLeadingZerosI(iRegIsafe dst, iRegI src, iRegI tmp, flagsReg cr) %{
   predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   match(Set dst (CountLeadingZerosI src));
   effect(TEMP dst, TEMP tmp, KILL cr);
@@ -10321,7 +10321,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
+instruct countTrailingZerosI(iRegIsafe dst, iRegI src, flagsReg cr) %{
   predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   match(Set dst (CountTrailingZerosI src));
   effect(TEMP dst, KILL cr);
@@ -10364,19 +10364,21 @@
 
 //---------- Population Count Instructions -------------------------------------
 
-instruct popCountI(iRegI dst, iRegI src) %{
+instruct popCountI(iRegIsafe dst, iRegI src) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI src));
 
-  format %{ "POPC   $src, $dst" %}
-  ins_encode %{
-    __ popc($src$$Register, $dst$$Register);
+  format %{ "SRL    $src, G0, $dst\t! clear upper word for 64 bit POPC\n\t"
+            "POPC   $dst, $dst" %}
+  ins_encode %{
+    __ srl($src$$Register, G0, $dst$$Register);
+    __ popc($dst$$Register, $dst$$Register);
   %}
   ins_pipe(ialu_reg);
 %}
 
 // Note: Long.bitCount(long) returns an int.
-instruct popCountL(iRegI dst, iRegL src) %{
+instruct popCountL(iRegIsafe dst, iRegL src) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountL src));
 
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -434,7 +434,7 @@
 
   // the frame is greater than one page in size, so check against
   // the bottom of the stack
-  __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
+  __ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check);
 
   // the stack will overflow, throw an exception
 
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -226,9 +226,9 @@
   assert(isByte(op1) && isByte(op2), "wrong opcode");
   assert(isByte(imm8), "not a byte");
   assert((op1 & 0x01) == 0, "should be 8bit operation");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst));
-  emit_byte(imm8);
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst));
+  emit_int8(imm8);
 }
 
 
@@ -237,12 +237,12 @@
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   if (is8bit(imm32)) {
-    emit_byte(op1 | 0x02); // set sign bit
-    emit_byte(op2 | encode(dst));
-    emit_byte(imm32 & 0xFF);
+    emit_int8(op1 | 0x02); // set sign bit
+    emit_int8(op2 | encode(dst));
+    emit_int8(imm32 & 0xFF);
   } else {
-    emit_byte(op1);
-    emit_byte(op2 | encode(dst));
+    emit_int8(op1);
+    emit_int8(op2 | encode(dst));
     emit_long(imm32);
   }
 }
@@ -252,8 +252,8 @@
   assert(isByte(op1) && isByte(op2), "wrong opcode");
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst));
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst));
   emit_long(imm32);
 }
 
@@ -262,11 +262,11 @@
   assert((op1 & 0x01) == 1, "should be 32bit operation");
   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
   if (is8bit(imm32)) {
-    emit_byte(op1 | 0x02); // set sign bit
+    emit_int8(op1 | 0x02); // set sign bit
     emit_operand(rm, adr, 1);
-    emit_byte(imm32 & 0xFF);
+    emit_int8(imm32 & 0xFF);
   } else {
-    emit_byte(op1);
+    emit_int8(op1);
     emit_operand(rm, adr, 4);
     emit_long(imm32);
   }
@@ -275,8 +275,8 @@
 
 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
   assert(isByte(op1) && isByte(op2), "wrong opcode");
-  emit_byte(op1);
-  emit_byte(op2 | encode(dst) << 3 | encode(src));
+  emit_int8(op1);
+  emit_int8(op2 | encode(dst) << 3 | encode(src));
 }
 
 
@@ -301,21 +301,21 @@
         // [base + index*scale]
         // [00 reg 100][ss index base]
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x04 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
+        emit_int8(0x04 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [base + index*scale + imm8]
         // [01 reg 100][ss index base] imm8
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x44 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x44 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
+        emit_int8(disp & 0xFF);
       } else {
         // [base + index*scale + disp32]
         // [10 reg 100][ss index base] disp32
         assert(index != rsp, "illegal addressing mode");
-        emit_byte(0x84 | regenc);
-        emit_byte(scale << 6 | indexenc | baseenc);
+        emit_int8(0x84 | regenc);
+        emit_int8(scale << 6 | indexenc | baseenc);
         emit_data(disp, rspec, disp32_operand);
       }
     } else if (base == rsp LP64_ONLY(|| base == r12)) {
@@ -323,19 +323,19 @@
       if (disp == 0 && rtype == relocInfo::none) {
         // [rsp]
         // [00 reg 100][00 100 100]
-        emit_byte(0x04 | regenc);
-        emit_byte(0x24);
+        emit_int8(0x04 | regenc);
+        emit_int8(0x24);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [rsp + imm8]
         // [01 reg 100][00 100 100] disp8
-        emit_byte(0x44 | regenc);
-        emit_byte(0x24);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x44 | regenc);
+        emit_int8(0x24);
+        emit_int8(disp & 0xFF);
       } else {
         // [rsp + imm32]
         // [10 reg 100][00 100 100] disp32
-        emit_byte(0x84 | regenc);
-        emit_byte(0x24);
+        emit_int8(0x84 | regenc);
+        emit_int8(0x24);
         emit_data(disp, rspec, disp32_operand);
       }
     } else {
@@ -345,16 +345,16 @@
           base != rbp LP64_ONLY(&& base != r13)) {
         // [base]
         // [00 reg base]
-        emit_byte(0x00 | regenc | baseenc);
+        emit_int8(0x00 | regenc | baseenc);
       } else if (is8bit(disp) && rtype == relocInfo::none) {
         // [base + disp8]
         // [01 reg base] disp8
-        emit_byte(0x40 | regenc | baseenc);
-        emit_byte(disp & 0xFF);
+        emit_int8(0x40 | regenc | baseenc);
+        emit_int8(disp & 0xFF);
       } else {
         // [base + disp32]
         // [10 reg base] disp32
-        emit_byte(0x80 | regenc | baseenc);
+        emit_int8(0x80 | regenc | baseenc);
         emit_data(disp, rspec, disp32_operand);
       }
     }
@@ -364,14 +364,14 @@
       // [index*scale + disp]
       // [00 reg 100][ss index 101] disp32
       assert(index != rsp, "illegal addressing mode");
-      emit_byte(0x04 | regenc);
-      emit_byte(scale << 6 | indexenc | 0x05);
+      emit_int8(0x04 | regenc);
+      emit_int8(scale << 6 | indexenc | 0x05);
       emit_data(disp, rspec, disp32_operand);
     } else if (rtype != relocInfo::none ) {
       // [disp] (64bit) RIP-RELATIVE (32bit) abs
       // [00 000 101] disp32
 
-      emit_byte(0x05 | regenc);
+      emit_int8(0x05 | regenc);
       // Note that the RIP-rel. correction applies to the generated
       // disp field, but _not_ to the target address in the rspec.
 
@@ -391,8 +391,8 @@
       // 32bit never did this, did everything as the rip-rel/disp code above
       // [disp] ABSOLUTE
       // [00 reg 100][00 100 101] disp32
-      emit_byte(0x04 | regenc);
-      emit_byte(0x25);
+      emit_int8(0x04 | regenc);
+      emit_int8(0x25);
       emit_data(disp, rspec, disp32_operand);
     }
   }
@@ -883,8 +883,8 @@
 void Assembler::emit_farith(int b1, int b2, int i) {
   assert(isByte(b1) && isByte(b2), "wrong opcode");
   assert(0 <= i &&  i < 8, "illegal stack offset");
-  emit_byte(b1);
-  emit_byte(b2 + i);
+  emit_int8(b1);
+  emit_int8(b2 + i);
 }
 
 
@@ -899,7 +899,7 @@
 void Assembler::adcl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
@@ -911,7 +911,7 @@
 void Assembler::adcl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x13);
+  emit_int8(0x13);
   emit_operand(dst, src);
 }
 
@@ -929,7 +929,7 @@
 void Assembler::addl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x01);
+  emit_int8(0x01);
   emit_operand(src, dst);
 }
 
@@ -941,7 +941,7 @@
 void Assembler::addl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x03);
+  emit_int8(0x03);
   emit_operand(dst, src);
 }
 
@@ -953,38 +953,40 @@
 void Assembler::addr_nop_4() {
   assert(UseAddressNop, "no CPU support");
   // 4 bytes: NOP DWORD PTR [EAX+0]
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
-  emit_byte(0);    // 8-bits offset (1 byte)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
+  emit_int8(0);    // 8-bits offset (1 byte)
 }
 
 void Assembler::addr_nop_5() {
   assert(UseAddressNop, "no CPU support");
   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
-  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
-  emit_byte(0);    // 8-bits offset (1 byte)
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
+  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
+  emit_int8(0);    // 8-bits offset (1 byte)
 }
 
 void Assembler::addr_nop_7() {
   assert(UseAddressNop, "no CPU support");
   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8((unsigned char)0x80);
+                   // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
   emit_long(0);    // 32-bits offset (4 bytes)
 }
 
 void Assembler::addr_nop_8() {
   assert(UseAddressNop, "no CPU support");
   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
-  emit_byte(0x0F);
-  emit_byte(0x1F);
-  emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
-  emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
+  emit_int8(0x0F);
+  emit_int8(0x1F);
+  emit_int8((unsigned char)0x84);
+                   // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
+  emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
   emit_long(0);    // 32-bits offset (4 bytes)
 }
 
@@ -1012,67 +1014,67 @@
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xde);
+  emit_int8((unsigned char)0xDE);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xde);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDE);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdf);
+  emit_int8((unsigned char)0xDF);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdf);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::aesenc(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdc);
+  emit_int8((unsigned char)0xDC);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdc);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDC);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::aesenclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdd);
+  emit_int8((unsigned char)0xDD);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0xdd);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xDD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::andl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rsp, dst, 4);
   emit_long(imm32);
 }
@@ -1085,7 +1087,7 @@
 void Assembler::andl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x23);
+  emit_int8(0x23);
   emit_operand(dst, src);
 }
 
@@ -1096,23 +1098,23 @@
 
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBC);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bsrl(Register dst, Register src) {
   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bswapl(Register reg) { // bswap
   int encode = prefix_and_encode(reg->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xC8 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::call(Label& L, relocInfo::relocType rtype) {
@@ -1125,36 +1127,36 @@
     assert(offs <= 0, "assembler error");
     InstructionMark im(this);
     // 1110 1000 #32-bit disp
-    emit_byte(0xE8);
+    emit_int8((unsigned char)0xE8);
     emit_data(offs - long_size, rtype, operand);
   } else {
     InstructionMark im(this);
     // 1110 1000 #32-bit disp
     L.add_patch_at(code(), locator());
 
-    emit_byte(0xE8);
+    emit_int8((unsigned char)0xE8);
     emit_data(int(0), rtype, operand);
   }
 }
 
 void Assembler::call(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xD0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 
 void Assembler::call(Address adr) {
   InstructionMark im(this);
   prefix(adr);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rdx, adr);
 }
 
 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
   assert(entry != NULL, "call most probably wrong");
   InstructionMark im(this);
-  emit_byte(0xE8);
+  emit_int8((unsigned char)0xE8);
   intptr_t disp = entry - (pc() + sizeof(int32_t));
   assert(is_simm32(disp), "must be 32bit offset (call2)");
   // Technically, should use call32_operand, but this format is
@@ -1165,42 +1167,42 @@
 }
 
 void Assembler::cdql() {
-  emit_byte(0x99);
+  emit_int8((unsigned char)0x99);
 }
 
 void Assembler::cld() {
-  emit_byte(0xfc);
+  emit_int8((unsigned char)0xFC);
 }
 
 void Assembler::cmovl(Condition cc, Register dst, Register src) {
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::cmovl(Condition cc, Register dst, Address src) {
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpb(Address dst, int imm8) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x80);
+  emit_int8((unsigned char)0x80);
   emit_operand(rdi, dst, 1);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 void Assembler::cmpl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 4);
   emit_long(imm32);
 }
@@ -1219,17 +1221,17 @@
 void Assembler::cmpl(Register dst, Address  src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x3B);
+  emit_int8((unsigned char)0x3B);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpw(Address dst, int imm16) {
   InstructionMark im(this);
   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
-  emit_byte(0x66);
-  emit_byte(0x81);
+  emit_int8(0x66);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 2);
-  emit_word(imm16);
+  emit_int16(imm16);
 }
 
 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
@@ -1238,8 +1240,8 @@
 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
   InstructionMark im(this);
   prefix(adr, reg);
-  emit_byte(0x0F);
-  emit_byte(0xB1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr);
 }
 
@@ -1266,8 +1268,8 @@
 }
 
 void Assembler::cpuid() {
-  emit_byte(0x0F);
-  emit_byte(0xA2);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA2);
 }
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
@@ -1293,8 +1295,8 @@
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
@@ -1305,8 +1307,8 @@
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
@@ -1328,22 +1330,22 @@
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::decl(Address dst) {
   // Don't use it directly. Use MacroAssembler::decrement() instead.
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rcx, dst);
 }
 
@@ -1369,43 +1371,43 @@
 
 void Assembler::emms() {
   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
-  emit_byte(0x0F);
-  emit_byte(0x77);
+  emit_int8(0x0F);
+  emit_int8(0x77);
 }
 
 void Assembler::hlt() {
-  emit_byte(0xF4);
+  emit_int8((unsigned char)0xF4);
 }
 
 void Assembler::idivl(Register src) {
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::divl(Register src) { // Unsigned
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF0 | encode));
 }
 
 void Assembler::imull(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xAF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 void Assembler::imull(Register dst, Register src, int value) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   if (is8bit(value)) {
-    emit_byte(0x6B);
-    emit_byte(0xC0 | encode);
-    emit_byte(value & 0xFF);
+    emit_int8(0x6B);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int8(value & 0xFF);
   } else {
-    emit_byte(0x69);
-    emit_byte(0xC0 | encode);
+    emit_int8(0x69);
+    emit_int8((unsigned char)(0xC0 | encode));
     emit_long(value);
   }
 }
@@ -1414,7 +1416,7 @@
   // Don't use it directly. Use MacroAssembler::increment() instead.
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rax, dst);
 }
 
@@ -1430,14 +1432,14 @@
     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
     if (maybe_short && is8bit(offs - short_size)) {
       // 0111 tttn #8-bit disp
-      emit_byte(0x70 | cc);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8(0x70 | cc);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
       // 0000 1111 1000 tttn #32-bit disp
       assert(is_simm32(offs - long_size),
              "must be 32bit offset (call4)");
-      emit_byte(0x0F);
-      emit_byte(0x80 | cc);
+      emit_int8(0x0F);
+      emit_int8((unsigned char)(0x80 | cc));
       emit_long(offs - long_size);
     }
   } else {
@@ -1446,8 +1448,8 @@
     // Note: use jccb() if label to be bound is very close to get
     //       an 8-bit displacement
     L.add_patch_at(code(), locator());
-    emit_byte(0x0F);
-    emit_byte(0x80 | cc);
+    emit_int8(0x0F);
+    emit_int8((unsigned char)(0x80 | cc));
     emit_long(0);
   }
 }
@@ -1466,20 +1468,20 @@
 #endif
     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
     // 0111 tttn #8-bit disp
-    emit_byte(0x70 | cc);
-    emit_byte((offs - short_size) & 0xFF);
+    emit_int8(0x70 | cc);
+    emit_int8((offs - short_size) & 0xFF);
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0x70 | cc);
-    emit_byte(0);
+    emit_int8(0x70 | cc);
+    emit_int8(0);
   }
 }
 
 void Assembler::jmp(Address adr) {
   InstructionMark im(this);
   prefix(adr);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsp, adr);
 }
 
@@ -1492,10 +1494,10 @@
     const int long_size = 5;
     intptr_t offs = entry - pc();
     if (maybe_short && is8bit(offs - short_size)) {
-      emit_byte(0xEB);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8((unsigned char)0xEB);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
-      emit_byte(0xE9);
+      emit_int8((unsigned char)0xE9);
       emit_long(offs - long_size);
     }
   } else {
@@ -1505,20 +1507,20 @@
     // force an 8-bit displacement.
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0xE9);
+    emit_int8((unsigned char)0xE9);
     emit_long(0);
   }
 }
 
 void Assembler::jmp(Register entry) {
   int encode = prefix_and_encode(entry->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0xE9);
+  emit_int8((unsigned char)0xE9);
   assert(dest != NULL, "must have a target");
   intptr_t disp = dest - (pc() + sizeof(int32_t));
   assert(is_simm32(disp), "must be 32bit offset (jmp)");
@@ -1539,13 +1541,13 @@
     assert(is8bit(dist), "Dispacement too large for a short jmp");
 #endif
     intptr_t offs = entry - pc();
-    emit_byte(0xEB);
-    emit_byte((offs - short_size) & 0xFF);
+    emit_int8((unsigned char)0xEB);
+    emit_int8((offs - short_size) & 0xFF);
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    emit_byte(0xEB);
-    emit_byte(0);
+    emit_int8((unsigned char)0xEB);
+    emit_int8(0);
   }
 }
 
@@ -1553,46 +1555,46 @@
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(2), src);
 }
 
 void Assembler::leal(Register dst, Address src) {
   InstructionMark im(this);
 #ifdef _LP64
-  emit_byte(0x67); // addr32
+  emit_int8(0x67); // addr32
   prefix(src, dst);
 #endif // LP64
-  emit_byte(0x8D);
+  emit_int8((unsigned char)0x8D);
   emit_operand(dst, src);
 }
 
 void Assembler::lfence() {
-  emit_byte(0x0F);
-  emit_byte(0xAE);
-  emit_byte(0xE8);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
+  emit_int8((unsigned char)0xE8);
 }
 
 void Assembler::lock() {
-  emit_byte(0xF0);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::lzcntl(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Emit mfence instruction
 void Assembler::mfence() {
   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
-  emit_byte( 0x0F );
-  emit_byte( 0xAE );
-  emit_byte( 0xF0 );
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::mov(Register dst, Register src) {
@@ -1612,15 +1614,15 @@
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
-  emit_byte(0x16);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x16);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movb(Register dst, Address src) {
   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
   InstructionMark im(this);
   prefix(src, dst, true);
-  emit_byte(0x8A);
+  emit_int8((unsigned char)0x8A);
   emit_operand(dst, src);
 }
 
@@ -1628,9 +1630,9 @@
 void Assembler::movb(Address dst, int imm8) {
   InstructionMark im(this);
    prefix(dst);
-  emit_byte(0xC6);
+  emit_int8((unsigned char)0xC6);
   emit_operand(rax, dst, 1);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 
@@ -1638,30 +1640,30 @@
   assert(src->has_byte_register(), "must have byte register");
   InstructionMark im(this);
   prefix(dst, src, true);
-  emit_byte(0x88);
+  emit_int8((unsigned char)0x88);
   emit_operand(src, dst);
 }
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
-  emit_byte(0x7E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x7E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
+  emit_int8(0x6E);
   emit_operand(dst, src);
 }
 
@@ -1669,7 +1671,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x7E);
+  emit_int8(0x7E);
   emit_operand(src, dst);
 }
 
@@ -1692,7 +1694,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x7F);
+  emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
@@ -1701,8 +1703,8 @@
   assert(UseAVX, "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
@@ -1710,7 +1712,7 @@
   InstructionMark im(this);
   bool vector256 = true;
   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
-  emit_byte(0x6F);
+  emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
@@ -1721,7 +1723,7 @@
   // swap src<->dst for encoding
   assert(src != xnoreg, "sanity");
   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
-  emit_byte(0x7F);
+  emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
@@ -1729,27 +1731,27 @@
 
 void Assembler::movl(Register dst, int32_t imm32) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_long(imm32);
 }
 
 void Assembler::movl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x8B);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x8B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_long(imm32);
 }
@@ -1757,7 +1759,7 @@
 void Assembler::movl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
@@ -1771,15 +1773,15 @@
 
 void Assembler::movq( MMXRegister dst, Address src ) {
   assert( VM_Version::supports_mmx(), "" );
-  emit_byte(0x0F);
-  emit_byte(0x6F);
+  emit_int8(0x0F);
+  emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::movq( Address dst, MMXRegister src ) {
   assert( VM_Version::supports_mmx(), "" );
-  emit_byte(0x0F);
-  emit_byte(0x7F);
+  emit_int8(0x0F);
+  emit_int8(0x7F);
   // workaround gcc (3.2.1-7a) bug
   // In that version of gcc with only an emit_operand(MMX, Address)
   // gcc will tail jump and try and reverse the parameters completely
@@ -1793,7 +1795,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x7E);
+  emit_int8(0x7E);
   emit_operand(dst, src);
 }
 
@@ -1801,24 +1803,24 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0xD6);
+  emit_int8((unsigned char)0xD6);
   emit_operand(src, dst);
 }
 
 void Assembler::movsbl(Register dst, Address src) { // movsxb
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
   emit_operand(dst, src);
 }
 
 void Assembler::movsbl(Register dst, Register src) { // movsxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
@@ -1835,7 +1837,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
@@ -1853,93 +1855,93 @@
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x11);
+  emit_int8(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movswl(Register dst, Address src) { // movsxw
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBF);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
   emit_operand(dst, src);
 }
 
 void Assembler::movswl(Register dst, Register src) { // movsxw
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movw(Address dst, int imm16) {
   InstructionMark im(this);
 
-  emit_byte(0x66); // switch to 16-bit mode
+  emit_int8(0x66); // switch to 16-bit mode
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 2);
-  emit_word(imm16);
+  emit_int16(imm16);
 }
 
 void Assembler::movw(Register dst, Address src) {
   InstructionMark im(this);
-  emit_byte(0x66);
+  emit_int8(0x66);
   prefix(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movw(Address dst, Register src) {
   InstructionMark im(this);
-  emit_byte(0x66);
+  emit_int8(0x66);
   prefix(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
 void Assembler::movzbl(Register dst, Address src) { // movzxb
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
   emit_operand(dst, src);
 }
 
 void Assembler::movzbl(Register dst, Register src) { // movzxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::movzwl(Register dst, Address src) { // movzxw
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB7);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB7);
   emit_operand(dst, src);
 }
 
 void Assembler::movzwl(Register dst, Register src) { // movzxw
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB7);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB7);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::mull(Address src) {
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0xF7);
+  emit_int8((unsigned char)0xF7);
   emit_operand(rsp, src);
 }
 
 void Assembler::mull(Register src) {
   int encode = prefix_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
@@ -1964,8 +1966,8 @@
 
 void Assembler::negl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD8 | encode));
 }
 
 void Assembler::nop(int i) {
@@ -1976,7 +1978,7 @@
   // speed is not an issue so simply use the single byte traditional nop
   // to do alignment.
 
-  for (; i > 0 ; i--) emit_byte(0x90);
+  for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
   return;
 
 #endif // ASSERT
@@ -2006,33 +2008,35 @@
     while(i >= 15) {
       // For Intel don't generate consecutive addess nops (mix with regular nops)
       i -= 15;
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
       addr_nop_8();
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x66);   // size prefix
-      emit_byte(0x90);   // nop
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8(0x66);   // size prefix
+      emit_int8((unsigned char)0x90);
+                         // nop
     }
     switch (i) {
       case 14:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 13:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 12:
         addr_nop_8();
-        emit_byte(0x66); // size prefix
-        emit_byte(0x66); // size prefix
-        emit_byte(0x66); // size prefix
-        emit_byte(0x90); // nop
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8(0x66); // size prefix
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       case 11:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 10:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 9:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 8:
         addr_nop_8();
         break;
@@ -2040,7 +2044,7 @@
         addr_nop_7();
         break;
       case 6:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 5:
         addr_nop_5();
         break;
@@ -2049,11 +2053,12 @@
         break;
       case 3:
         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 2:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 1:
-        emit_byte(0x90); // nop
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       default:
         assert(i == 0, " ");
@@ -2086,24 +2091,24 @@
 
     while(i >= 22) {
       i -= 11;
-      emit_byte(0x66); // size prefix
-      emit_byte(0x66); // size prefix
-      emit_byte(0x66); // size prefix
+      emit_int8(0x66); // size prefix
+      emit_int8(0x66); // size prefix
+      emit_int8(0x66); // size prefix
       addr_nop_8();
     }
     // Generate first nop for size between 21-12
     switch (i) {
       case 21:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 20:
       case 19:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 18:
       case 17:
         i -= 1;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 16:
       case 15:
         i -= 8;
@@ -2116,7 +2121,7 @@
         break;
       case 12:
         i -= 6;
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
         addr_nop_5();
         break;
       default:
@@ -2126,11 +2131,11 @@
     // Generate second nop for size between 11-1
     switch (i) {
       case 11:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 10:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 9:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 8:
         addr_nop_8();
         break;
@@ -2138,7 +2143,7 @@
         addr_nop_7();
         break;
       case 6:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 5:
         addr_nop_5();
         break;
@@ -2147,11 +2152,12 @@
         break;
       case 3:
         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 2:
-        emit_byte(0x66); // size prefix
+        emit_int8(0x66); // size prefix
       case 1:
-        emit_byte(0x90); // nop
+        emit_int8((unsigned char)0x90);
+                         // nop
         break;
       default:
         assert(i == 0, " ");
@@ -2174,42 +2180,43 @@
   //
   while(i > 12) {
     i -= 4;
-    emit_byte(0x66); // size prefix
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90); // nop
+    emit_int8(0x66); // size prefix
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
+                     // nop
   }
   // 1 - 12 nops
   if(i > 8) {
     if(i > 9) {
       i -= 1;
-      emit_byte(0x66);
+      emit_int8(0x66);
     }
     i -= 3;
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90);
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
   }
   // 1 - 8 nops
   if(i > 4) {
     if(i > 6) {
       i -= 1;
-      emit_byte(0x66);
+      emit_int8(0x66);
     }
     i -= 3;
-    emit_byte(0x66);
-    emit_byte(0x66);
-    emit_byte(0x90);
+    emit_int8(0x66);
+    emit_int8(0x66);
+    emit_int8((unsigned char)0x90);
   }
   switch (i) {
     case 4:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 3:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 2:
-      emit_byte(0x66);
+      emit_int8(0x66);
     case 1:
-      emit_byte(0x90);
+      emit_int8((unsigned char)0x90);
       break;
     default:
       assert(i == 0, " ");
@@ -2218,8 +2225,8 @@
 
 void Assembler::notl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD0 | encode );
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 void Assembler::orl(Address dst, int32_t imm32) {
@@ -2236,7 +2243,7 @@
 void Assembler::orl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x0B);
+  emit_int8(0x0B);
   emit_operand(dst, src);
 }
 
@@ -2260,61 +2267,61 @@
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
-  emit_byte(0x61);
+  emit_int8(0x61);
   emit_operand(dst, src);
-  emit_byte(imm8);
+  emit_int8(imm8);
 }
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
-  emit_byte(0x61);
-  emit_byte(0xC0 | encode);
-  emit_byte(imm8);
+  emit_int8(0x61);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x30);
+  emit_int8(0x30);
   emit_operand(dst, src);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x30);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x30);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // generic
 void Assembler::pop(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0x58 | encode);
+  emit_int8(0x58 | encode);
 }
 
 void Assembler::popcntl(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB8);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB8);
   emit_operand(dst, src);
 }
 
 void Assembler::popcntl(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB8);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::popf() {
-  emit_byte(0x9D);
+  emit_int8((unsigned char)0x9D);
 }
 
 #ifndef _LP64 // no 32bit push/pop on amd64
@@ -2322,21 +2329,21 @@
   // NOTE: this will adjust stack by 8byte on 64bits
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x8F);
+  emit_int8((unsigned char)0x8F);
   emit_operand(rax, dst);
 }
 #endif
 
 void Assembler::prefetch_prefix(Address src) {
   prefix(src);
-  emit_byte(0x0F);
+  emit_int8(0x0F);
 }
 
 void Assembler::prefetchnta(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rax, src); // 0, src
 }
 
@@ -2344,7 +2351,7 @@
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x0D);
+  emit_int8(0x0D);
   emit_operand(rax, src); // 0, src
 }
 
@@ -2352,7 +2359,7 @@
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rcx, src); // 1, src
 }
 
@@ -2360,7 +2367,7 @@
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rdx, src); // 2, src
 }
 
@@ -2368,7 +2375,7 @@
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(rbx, src); // 3, src
 }
 
@@ -2376,27 +2383,26 @@
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
-  emit_byte(0x0D);
+  emit_int8(0x0D);
   emit_operand(rcx, src); // 1, src
 }
 
 void Assembler::prefix(Prefix p) {
-  a_byte(p);
+  emit_int8(p);
 }
 
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x00);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x00);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pshufb(XMMRegister dst, Address src) {
   assert(VM_Version::supports_ssse3(), "");
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x00);
+  emit_int8(0x00);
   emit_operand(dst, src);
 }
 
@@ -2404,7 +2410,7 @@
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 
 }
 
@@ -2414,16 +2420,16 @@
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x70);
+  emit_int8(0x70);
   emit_operand(dst, src);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
@@ -2432,18 +2438,18 @@
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x70);
+  emit_int8(0x70);
   emit_operand(dst, src);
-  emit_byte(mode & 0xFF);
+  emit_int8(mode & 0xFF);
 }
 
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift);
 }
 
 void Assembler::ptest(XMMRegister dst, Address src) {
@@ -2451,15 +2457,15 @@
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x17);
+  emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x17);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x17);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
@@ -2492,18 +2498,18 @@
 void Assembler::push(int32_t imm32) {
   // in 64bits we push 64bits onto the stack but only
   // take a 32bit immediate
-  emit_byte(0x68);
+  emit_int8(0x68);
   emit_long(imm32);
 }
 
 void Assembler::push(Register src) {
   int encode = prefix_and_encode(src->encoding());
 
-  emit_byte(0x50 | encode);
+  emit_int8(0x50 | encode);
 }
 
 void Assembler::pushf() {
-  emit_byte(0x9C);
+  emit_int8((unsigned char)0x9C);
 }
 
 #ifndef _LP64 // no 32bit push/pop on amd64
@@ -2511,7 +2517,7 @@
   // Note this will push 64bit on 64bit
   InstructionMark im(this);
   prefix(src);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsi, src);
 }
 #endif
@@ -2520,58 +2526,58 @@
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xD0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xD0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xD0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)0xD0 | encode);
+    emit_int8(imm8);
   }
 }
 
 // copies data from [esi] to [edi] using rcx pointer sized words
 // generic
 void Assembler::rep_mov() {
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   // MOVSQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xA5);
+  emit_int8((unsigned char)0xA5);
 }
 
 // sets rcx pointer sized words with rax, value at [edi]
 // generic
 void Assembler::rep_set() { // rep_set
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   // STOSQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xAB);
+  emit_int8((unsigned char)0xAB);
 }
 
 // scans rcx pointer sized words at [edi] for occurance of rax,
 // generic
 void Assembler::repne_scan() { // repne_scan
-  emit_byte(0xF2);
+  emit_int8((unsigned char)0xF2);
   // SCASQ
   LP64_ONLY(prefix(REX_W));
-  emit_byte(0xAF);
+  emit_int8((unsigned char)0xAF);
 }
 
 #ifdef _LP64
 // scans rcx 4 byte words at [edi] for occurance of rax,
 // generic
 void Assembler::repne_scanl() { // repne_scan
-  emit_byte(0xF2);
+  emit_int8((unsigned char)0xF2);
   // SCASL
-  emit_byte(0xAF);
+  emit_int8((unsigned char)0xAF);
 }
 #endif
 
 void Assembler::ret(int imm16) {
   if (imm16 == 0) {
-    emit_byte(0xC3);
+    emit_int8((unsigned char)0xC3);
   } else {
-    emit_byte(0xC2);
-    emit_word(imm16);
+    emit_int8((unsigned char)0xC2);
+    emit_int16(imm16);
   }
 }
 
@@ -2580,26 +2586,26 @@
   // Not supported in 64bit mode
   ShouldNotReachHere();
 #endif
-  emit_byte(0x9E);
+  emit_int8((unsigned char)0x9E);
 }
 
 void Assembler::sarl(Register dst, int imm8) {
   int encode = prefix_and_encode(dst->encoding());
   assert(isShiftCount(imm8), "illegal shift count");
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xF8 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xF8 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xF8 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xF8 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::sarl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::sbbl(Address dst, int32_t imm32) {
@@ -2617,7 +2623,7 @@
 void Assembler::sbbl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x1B);
+  emit_int8(0x1B);
   emit_operand(dst, src);
 }
 
@@ -2629,47 +2635,47 @@
 void Assembler::setb(Condition cc, Register dst) {
   assert(0 <= cc && cc < 16, "illegal cc");
   int encode = prefix_and_encode(dst->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0x90 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x90 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::shll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
   if (imm8 == 1 ) {
-    emit_byte(0xD1);
-    emit_byte(0xE0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xE0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xE0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xE0 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::shll(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::shrl(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xC1);
-  emit_byte(0xE8 | encode);
-  emit_byte(imm8);
+  emit_int8((unsigned char)0xC1);
+  emit_int8((unsigned char)(0xE8 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::shrl(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE8 | encode));
 }
 
 // copies a single word from [esi] to [edi]
 void Assembler::smovl() {
-  emit_byte(0xA5);
+  emit_int8((unsigned char)0xA5);
 }
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
@@ -2688,7 +2694,7 @@
 }
 
 void Assembler::std() {
-  emit_byte(0xfd);
+  emit_int8((unsigned char)0xFD);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
@@ -2700,8 +2706,8 @@
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(3), dst);
 }
 
@@ -2714,7 +2720,7 @@
 void Assembler::subl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x29);
+  emit_int8(0x29);
   emit_operand(src, dst);
 }
 
@@ -2732,7 +2738,7 @@
 void Assembler::subl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x2B);
+  emit_int8(0x2B);
   emit_operand(dst, src);
 }
 
@@ -2773,11 +2779,11 @@
   // 8bit operands
   int encode = dst->encoding();
   if (encode == 0) {
-    emit_byte(0xA9);
+    emit_int8((unsigned char)0xA9);
   } else {
     encode = prefix_and_encode(encode);
-    emit_byte(0xF7);
-    emit_byte(0xC0 | encode);
+    emit_int8((unsigned char)0xF7);
+    emit_int8((unsigned char)(0xC0 | encode));
   }
   emit_long(imm32);
 }
@@ -2790,7 +2796,7 @@
 void Assembler::testl(Register dst, Address  src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x85);
+  emit_int8((unsigned char)0x85);
   emit_operand(dst, src);
 }
 
@@ -2818,28 +2824,28 @@
 void Assembler::xaddl(Address dst, Register src) {
   InstructionMark im(this);
   prefix(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0xC1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst);
 }
 
 void Assembler::xchgl(Register dst, Address src) { // xchg
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x87);
+  emit_int8((unsigned char)0x87);
   emit_operand(dst, src);
 }
 
 void Assembler::xchgl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x87);
-  emit_byte(0xc0 | encode);
+  emit_int8((unsigned char)0x87);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xgetbv() {
-  emit_byte(0x0F);
-  emit_byte(0x01);
-  emit_byte(0xD0);
+  emit_int8(0x0F);
+  emit_int8(0x01);
+  emit_int8((unsigned char)0xD0);
 }
 
 void Assembler::xorl(Register dst, int32_t imm32) {
@@ -2850,7 +2856,7 @@
 void Assembler::xorl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
-  emit_byte(0x33);
+  emit_int8(0x33);
   emit_operand(dst, src);
 }
 
@@ -3276,8 +3282,8 @@
 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
-  emit_byte(0x40);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x40);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
@@ -3288,8 +3294,8 @@
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
-  emit_byte(0x40);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x40);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
@@ -3303,7 +3309,7 @@
   int dst_enc = dst->encoding();
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
-  emit_byte(0x40);
+  emit_int8(0x40);
   emit_operand(dst, src);
 }
 
@@ -3312,27 +3318,27 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::pslld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllq(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
@@ -3354,21 +3360,21 @@
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3391,18 +3397,18 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlq(XMMRegister dst, int shift) {
@@ -3411,9 +3417,9 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
@@ -3435,21 +3441,21 @@
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3472,18 +3478,18 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
-  emit_byte(0x71);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrad(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
-  emit_byte(0x72);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift & 0xFF);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
@@ -3500,14 +3506,14 @@
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
-  emit_byte(shift & 0xFF);
+  emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
@@ -3572,11 +3578,11 @@
   assert(VM_Version::supports_avx(), "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
-  emit_byte(0x18);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x18);
+  emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
@@ -3587,10 +3593,10 @@
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x18);
+  emit_int8(0x18);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
@@ -3600,21 +3606,21 @@
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x19);
+  emit_int8(0x19);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
   bool vector256 = true;
   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
-  emit_byte(0x38);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x38);
+  emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, Address src) {
@@ -3625,10 +3631,10 @@
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x38);
+  emit_int8(0x38);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vextracti128h(Address dst, XMMRegister src) {
@@ -3638,16 +3644,16 @@
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
-  emit_byte(0x39);
+  emit_int8(0x39);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
-  emit_byte(0x01);
+  emit_int8(0x01);
 }
 
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
-  emit_byte(0x77);
+  emit_int8(0x77);
 }
 
 
@@ -3657,15 +3663,15 @@
 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   // NO PREFIX AS NEVER 64BIT
   InstructionMark im(this);
-  emit_byte(0x81);
-  emit_byte(0xF8 | src1->encoding());
+  emit_int8((unsigned char)0x81);
+  emit_int8((unsigned char)(0xF8 | src1->encoding()));
   emit_data(imm32, rspec, 0);
 }
 
 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
   InstructionMark im(this);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, src1);
   emit_data(imm32, rspec, 0);
 }
@@ -3675,14 +3681,14 @@
 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
 void Assembler::cmpxchg8(Address adr) {
   InstructionMark im(this);
-  emit_byte(0x0F);
-  emit_byte(0xc7);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rcx, adr);
 }
 
 void Assembler::decl(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementl() instead.
- emit_byte(0x48 | dst->encoding());
+ emit_int8(0x48 | dst->encoding());
 }
 
 #endif // _LP64
@@ -3690,8 +3696,8 @@
 // 64bit typically doesn't use the x87 but needs to for the trig funcs
 
 void Assembler::fabs() {
-  emit_byte(0xD9);
-  emit_byte(0xE1);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE1);
 }
 
 void Assembler::fadd(int i) {
@@ -3700,13 +3706,13 @@
 
 void Assembler::fadd_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rax, src);
 }
 
 void Assembler::fadd_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rax, src);
 }
 
@@ -3719,8 +3725,8 @@
 }
 
 void Assembler::fchs() {
-  emit_byte(0xD9);
-  emit_byte(0xE0);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE0);
 }
 
 void Assembler::fcom(int i) {
@@ -3733,29 +3739,29 @@
 
 void Assembler::fcomp_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rbx, src);
 }
 
 void Assembler::fcomp_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rbx, src);
 }
 
 void Assembler::fcompp() {
-  emit_byte(0xDE);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xDE);
+  emit_int8((unsigned char)0xD9);
 }
 
 void Assembler::fcos() {
-  emit_byte(0xD9);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFF);
 }
 
 void Assembler::fdecstp() {
-  emit_byte(0xD9);
-  emit_byte(0xF6);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF6);
 }
 
 void Assembler::fdiv(int i) {
@@ -3764,13 +3770,13 @@
 
 void Assembler::fdiv_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rsi, src);
 }
 
 void Assembler::fdiv_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rsi, src);
 }
 
@@ -3791,13 +3797,13 @@
 
 void Assembler::fdivr_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rdi, src);
 }
 
 void Assembler::fdivr_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rdi, src);
 }
 
@@ -3815,59 +3821,59 @@
 
 void Assembler::fild_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDF);
+  emit_int8((unsigned char)0xDF);
   emit_operand32(rbp, adr);
 }
 
 void Assembler::fild_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rax, adr);
 }
 
 void Assembler::fincstp() {
-  emit_byte(0xD9);
-  emit_byte(0xF7);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF7);
 }
 
 void Assembler::finit() {
-  emit_byte(0x9B);
-  emit_byte(0xDB);
-  emit_byte(0xE3);
+  emit_int8((unsigned char)0x9B);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)0xE3);
 }
 
 void Assembler::fist_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fistp_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDF);
+  emit_int8((unsigned char)0xDF);
   emit_operand32(rdi, adr);
 }
 
 void Assembler::fistp_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rbx, adr);
 }
 
 void Assembler::fld1() {
-  emit_byte(0xD9);
-  emit_byte(0xE8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE8);
 }
 
 void Assembler::fld_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rax, adr);
 }
 
 void Assembler::fld_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rax, adr);
 }
 
@@ -3878,35 +3884,35 @@
 
 void Assembler::fld_x(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rbp, adr);
 }
 
 void Assembler::fldcw(Address src) {
   InstructionMark im(this);
-  emit_byte(0xd9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rbp, src);
 }
 
 void Assembler::fldenv(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fldlg2() {
-  emit_byte(0xD9);
-  emit_byte(0xEC);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEC);
 }
 
 void Assembler::fldln2() {
-  emit_byte(0xD9);
-  emit_byte(0xED);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xED);
 }
 
 void Assembler::fldz() {
-  emit_byte(0xD9);
-  emit_byte(0xEE);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEE);
 }
 
 void Assembler::flog() {
@@ -3927,13 +3933,13 @@
 
 void Assembler::fmul_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rcx, src);
 }
 
 void Assembler::fmul_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rcx, src);
 }
 
@@ -3947,63 +3953,63 @@
 
 void Assembler::fnsave(Address dst) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rsi, dst);
 }
 
 void Assembler::fnstcw(Address src) {
   InstructionMark im(this);
-  emit_byte(0x9B);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0x9B);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rdi, src);
 }
 
 void Assembler::fnstsw_ax() {
-  emit_byte(0xdF);
-  emit_byte(0xE0);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)0xE0);
 }
 
 void Assembler::fprem() {
-  emit_byte(0xD9);
-  emit_byte(0xF8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF8);
 }
 
 void Assembler::fprem1() {
-  emit_byte(0xD9);
-  emit_byte(0xF5);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF5);
 }
 
 void Assembler::frstor(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fsin() {
-  emit_byte(0xD9);
-  emit_byte(0xFE);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFE);
 }
 
 void Assembler::fsqrt() {
-  emit_byte(0xD9);
-  emit_byte(0xFA);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFA);
 }
 
 void Assembler::fst_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fst_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rdx, adr);
 }
 
 void Assembler::fstp_d(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDD);
+  emit_int8((unsigned char)0xDD);
   emit_operand32(rbx, adr);
 }
 
@@ -4013,13 +4019,13 @@
 
 void Assembler::fstp_s(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xD9);
+  emit_int8((unsigned char)0xD9);
   emit_operand32(rbx, adr);
 }
 
 void Assembler::fstp_x(Address adr) {
   InstructionMark im(this);
-  emit_byte(0xDB);
+  emit_int8((unsigned char)0xDB);
   emit_operand32(rdi, adr);
 }
 
@@ -4029,13 +4035,13 @@
 
 void Assembler::fsub_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rsp, src);
 }
 
 void Assembler::fsub_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rsp, src);
 }
 
@@ -4053,13 +4059,13 @@
 
 void Assembler::fsubr_d(Address src) {
   InstructionMark im(this);
-  emit_byte(0xDC);
+  emit_int8((unsigned char)0xDC);
   emit_operand32(rbp, src);
 }
 
 void Assembler::fsubr_s(Address src) {
   InstructionMark im(this);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD8);
   emit_operand32(rbp, src);
 }
 
@@ -4072,15 +4078,15 @@
 }
 
 void Assembler::ftan() {
-  emit_byte(0xD9);
-  emit_byte(0xF2);
-  emit_byte(0xDD);
-  emit_byte(0xD8);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)0xDD);
+  emit_int8((unsigned char)0xD8);
 }
 
 void Assembler::ftst() {
-  emit_byte(0xD9);
-  emit_byte(0xE4);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xE4);
 }
 
 void Assembler::fucomi(int i) {
@@ -4096,7 +4102,7 @@
 }
 
 void Assembler::fwait() {
-  emit_byte(0x9B);
+  emit_int8((unsigned char)0x9B);
 }
 
 void Assembler::fxch(int i) {
@@ -4104,23 +4110,23 @@
 }
 
 void Assembler::fyl2x() {
-  emit_byte(0xD9);
-  emit_byte(0xF1);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF1);
 }
 
 void Assembler::frndint() {
-  emit_byte(0xD9);
-  emit_byte(0xFC);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xFC);
 }
 
 void Assembler::f2xm1() {
-  emit_byte(0xD9);
-  emit_byte(0xF0);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xF0);
 }
 
 void Assembler::fldl2e() {
-  emit_byte(0xD9);
-  emit_byte(0xEA);
+  emit_int8((unsigned char)0xD9);
+  emit_int8((unsigned char)0xEA);
 }
 
 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
@@ -4131,7 +4137,7 @@
 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
   if (pre > 0) {
-    emit_byte(simd_pre[pre]);
+    emit_int8(simd_pre[pre]);
   }
   if (rex_w) {
     prefixq(adr, xreg);
@@ -4139,25 +4145,25 @@
     prefix(adr, xreg);
   }
   if (opc > 0) {
-    emit_byte(0x0F);
+    emit_int8(0x0F);
     int opc2 = simd_opc[opc];
     if (opc2 > 0) {
-      emit_byte(opc2);
+      emit_int8(opc2);
     }
   }
 }
 
 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
   if (pre > 0) {
-    emit_byte(simd_pre[pre]);
+    emit_int8(simd_pre[pre]);
   }
   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
                           prefix_and_encode(dst_enc, src_enc);
   if (opc > 0) {
-    emit_byte(0x0F);
+    emit_int8(0x0F);
     int opc2 = simd_opc[opc];
     if (opc2 > 0) {
-      emit_byte(opc2);
+      emit_int8(opc2);
     }
   }
   return encode;
@@ -4171,11 +4177,11 @@
     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
     byte1 = (~byte1) & 0xE0;
     byte1 |= opc;
-    a_byte(byte1);
+    emit_int8(byte1);
 
     int byte2 = ((~nds_enc) & 0xf) << 3;
     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
-    emit_byte(byte2);
+    emit_int8(byte2);
   } else {
     prefix(VEX_2bytes);
 
@@ -4183,7 +4189,7 @@
     byte1 = (~byte1) & 0x80;
     byte1 |= ((~nds_enc) & 0xf) << 3;
     byte1 |= (vector256 ? 4 : 0) | pre;
-    emit_byte(byte1);
+    emit_int8(byte1);
   }
 }
 
@@ -4229,28 +4235,28 @@
 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
   InstructionMark im(this);
   simd_prefix(dst, dst, src, pre);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
   int encode = simd_prefix_and_encode(dst, dst, src, pre);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Versions with no second source register (non-destructive source).
 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
   InstructionMark im(this);
   simd_prefix(dst, xnoreg, src, pre);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // 3-operands AVX instructions
@@ -4258,22 +4264,22 @@
                                Address src, VexSimdPrefix pre, bool vector256) {
   InstructionMark im(this);
   vex_prefix(dst, nds, src, pre, vector256);
-  emit_byte(opcode);
+  emit_int8(opcode);
   emit_operand(dst, src);
 }
 
 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
-  emit_byte(opcode);
-  emit_byte(0xC0 | encode);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
-  emit_byte(0x40 | dst->encoding());
+  emit_int8(0x40 | dst->encoding());
 }
 
 void Assembler::lea(Register dst, Address src) {
@@ -4282,7 +4288,7 @@
 
 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst);
   emit_data((int)imm32, rspec, 0);
 }
@@ -4290,49 +4296,49 @@
 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_data((int)imm32, rspec, 0);
 }
 
 void Assembler::popa() { // 32bit
-  emit_byte(0x61);
+  emit_int8(0x61);
 }
 
 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
-  emit_byte(0x68);
+  emit_int8(0x68);
   emit_data(imm32, rspec, 0);
 }
 
 void Assembler::pusha() { // 32bit
-  emit_byte(0x60);
+  emit_int8(0x60);
 }
 
 void Assembler::set_byte_if_not_zero(Register dst) {
-  emit_byte(0x0F);
-  emit_byte(0x95);
-  emit_byte(0xE0 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x95);
+  emit_int8((unsigned char)(0xE0 | dst->encoding()));
 }
 
 void Assembler::shldl(Register dst, Register src) {
-  emit_byte(0x0F);
-  emit_byte(0xA5);
-  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA5);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
 void Assembler::shrdl(Register dst, Register src) {
-  emit_byte(0x0F);
-  emit_byte(0xAD);
-  emit_byte(0xC0 | src->encoding() << 3 | dst->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAD);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
 #else // LP64
 
 void Assembler::set_byte_if_not_zero(Register dst) {
   int enc = prefix_and_encode(dst->encoding(), true);
-  emit_byte(0x0F);
-  emit_byte(0x95);
-  emit_byte(0xE0 | enc);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0x95);
+  emit_int8((unsigned char)(0xE0 | enc));
 }
 
 // 64bit only pieces of the assembler
@@ -4670,7 +4676,7 @@
 void Assembler::adcq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x13);
+  emit_int8(0x13);
   emit_operand(dst, src);
 }
 
@@ -4688,7 +4694,7 @@
 void Assembler::addq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x01);
+  emit_int8(0x01);
   emit_operand(src, dst);
 }
 
@@ -4700,7 +4706,7 @@
 void Assembler::addq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x03);
+  emit_int8(0x03);
   emit_operand(dst, src);
 }
 
@@ -4712,7 +4718,7 @@
 void Assembler::andq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rsp, dst, 4);
   emit_long(imm32);
 }
@@ -4725,7 +4731,7 @@
 void Assembler::andq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x23);
+  emit_int8(0x23);
   emit_operand(dst, src);
 }
 
@@ -4736,56 +4742,56 @@
 
 void Assembler::bsfq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBC);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bsrq(Register dst, Register src) {
   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::bswapq(Register reg) {
   int encode = prefixq_and_encode(reg->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xC8 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::cdqq() {
   prefix(REX_W);
-  emit_byte(0x99);
+  emit_int8((unsigned char)0x99);
 }
 
 void Assembler::clflush(Address adr) {
   prefix(adr);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(rdi, adr);
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x40 | cc);
+  emit_int8(0x0F);
+  emit_int8(0x40 | cc);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rdi, dst, 4);
   emit_long(imm32);
 }
@@ -4798,7 +4804,7 @@
 void Assembler::cmpq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x3B);
+  emit_int8(0x3B);
   emit_operand(src, dst);
 }
 
@@ -4810,122 +4816,122 @@
 void Assembler::cmpq(Register dst, Address  src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x3B);
+  emit_int8(0x3B);
   emit_operand(dst, src);
 }
 
 void Assembler::cmpxchgq(Register reg, Address adr) {
   InstructionMark im(this);
   prefixq(adr, reg);
-  emit_byte(0x0F);
-  emit_byte(0xB1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr);
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
+  emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
+  emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
-  emit_byte(0x2C);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x2C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::decl(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementl() instead.
   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC8 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC8 | encode));
 }
 
 void Assembler::decq(Register dst) {
   // Don't use it directly. Use MacroAssembler::decrementq() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC8 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8(0xC8 | encode);
 }
 
 void Assembler::decq(Address dst) {
   // Don't use it directly. Use MacroAssembler::decrementq() instead.
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rcx, dst);
 }
 
 void Assembler::fxrstor(Address src) {
   prefixq(src);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(1), src);
 }
 
 void Assembler::fxsave(Address dst) {
   prefixq(dst);
-  emit_byte(0x0F);
-  emit_byte(0xAE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAE);
   emit_operand(as_Register(0), dst);
 }
 
 void Assembler::idivq(Register src) {
   int encode = prefixq_and_encode(src->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::imulq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xAF);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xAF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::imulq(Register dst, Register src, int value) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
   if (is8bit(value)) {
-    emit_byte(0x6B);
-    emit_byte(0xC0 | encode);
-    emit_byte(value & 0xFF);
+    emit_int8(0x6B);
+    emit_int8((unsigned char)(0xC0 | encode));
+    emit_int8(value & 0xFF);
   } else {
-    emit_byte(0x69);
-    emit_byte(0xC0 | encode);
+    emit_int8(0x69);
+    emit_int8((unsigned char)(0xC0 | encode));
     emit_long(value);
   }
 }
@@ -4934,23 +4940,23 @@
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::incq(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementq() instead.
   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xFF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0xFF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::incq(Address dst) {
   // Don't use it directly. Use MacroAssembler::incrementq() instead.
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rax, dst);
 }
 
@@ -4961,35 +4967,35 @@
 void Assembler::leaq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x8D);
+  emit_int8((unsigned char)0x8D);
   emit_operand(dst, src);
 }
 
 void Assembler::mov64(Register dst, int64_t imm64) {
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_int64(imm64);
 }
 
 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8(0xB8 | encode);
   emit_data64(imm64, rspec);
 }
 
 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(dst->encoding());
-  emit_byte(0xB8 | encode);
+  emit_int8((unsigned char)(0xB8 | encode));
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
@@ -4997,34 +5003,34 @@
 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   int encode = prefix_and_encode(src1->encoding());
-  emit_byte(0x81);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0x81);
+  emit_int8((unsigned char)(0xF8 | encode));
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   prefix(src1);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rax, src1, 4);
   emit_data((int)imm32, rspec, narrow_oop_operand);
 }
 
 void Assembler::lzcntq(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBD);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
-  emit_byte(0x6E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x6E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdq(Register dst, XMMRegister src) {
@@ -5032,43 +5038,43 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
-  emit_byte(0x7E);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x7E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x8B);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x8B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x8B);
+  emit_int8((unsigned char)0x8B);
   emit_operand(dst, src);
 }
 
 void Assembler::movq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x89);
+  emit_int8((unsigned char)0x89);
   emit_operand(src, dst);
 }
 
 void Assembler::movsbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBE);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
   emit_operand(dst, src);
 }
 
 void Assembler::movsbq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBE);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movslq(Register dst, int32_t imm32) {
@@ -5078,7 +5084,7 @@
   ShouldNotReachHere();
   InstructionMark im(this);
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xC7 | encode);
+  emit_int8((unsigned char)(0xC7 | encode));
   emit_long(imm32);
 }
 
@@ -5086,7 +5092,7 @@
   assert(is_simm32(imm32), "lost bits");
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0xC7);
+  emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_long(imm32);
 }
@@ -5094,77 +5100,77 @@
 void Assembler::movslq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x63);
+  emit_int8(0x63);
   emit_operand(dst, src);
 }
 
 void Assembler::movslq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x63);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x63);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movswq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xBF);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBF);
   emit_operand(dst, src);
 }
 
 void Assembler::movswq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xBF);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xBF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movzbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB6);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB6);
   emit_operand(dst, src);
 }
 
 void Assembler::movzbq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB6);
-  emit_byte(0xC0 | encode);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xB6);
+  emit_int8(0xC0 | encode);
 }
 
 void Assembler::movzwq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB7);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB7);
   emit_operand(dst, src);
 }
 
 void Assembler::movzwq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB7);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB7);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::negq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD8 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD8 | encode));
 }
 
 void Assembler::notq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xF7);
-  emit_byte(0xD0 | encode);
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xD0 | encode));
 }
 
 void Assembler::orq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x81);
+  emit_int8((unsigned char)0x81);
   emit_operand(rcx, dst, 4);
   emit_long(imm32);
 }
@@ -5177,7 +5183,7 @@
 void Assembler::orq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x0B);
+  emit_int8(0x0B);
   emit_operand(dst, src);
 }
 
@@ -5210,26 +5216,26 @@
 void Assembler::popcntq(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   prefixq(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0xB8);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB8);
   emit_operand(dst, src);
 }
 
 void Assembler::popcntq(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
-  emit_byte(0xF3);
+  emit_int8((unsigned char)0xF3);
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0xB8);
-  emit_byte(0xC0 | encode);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xB8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::popq(Address dst) {
   InstructionMark im(this);
   prefixq(dst);
-  emit_byte(0x8F);
+  emit_int8((unsigned char)0x8F);
   emit_operand(rax, dst);
 }
 
@@ -5261,7 +5267,7 @@
 void Assembler::pushq(Address src) {
   InstructionMark im(this);
   prefixq(src);
-  emit_byte(0xFF);
+  emit_int8((unsigned char)0xFF);
   emit_operand(rsi, src);
 }
 
@@ -5269,31 +5275,31 @@
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xD0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xD0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xD0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xD0 | encode));
+    emit_int8(imm8);
   }
 }
 void Assembler::sarq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xF8 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xF8 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xF8 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xF8 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::sarq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xF8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xF8 | encode));
 }
 
 void Assembler::sbbq(Address dst, int32_t imm32) {
@@ -5310,7 +5316,7 @@
 void Assembler::sbbq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x1B);
+  emit_int8(0x1B);
   emit_operand(dst, src);
 }
 
@@ -5323,33 +5329,33 @@
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
   if (imm8 == 1) {
-    emit_byte(0xD1);
-    emit_byte(0xE0 | encode);
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xE0 | encode));
   } else {
-    emit_byte(0xC1);
-    emit_byte(0xE0 | encode);
-    emit_byte(imm8);
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xE0 | encode));
+    emit_int8(imm8);
   }
 }
 
 void Assembler::shlq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE0 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xE0 | encode));
 }
 
 void Assembler::shrq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xC1);
-  emit_byte(0xE8 | encode);
-  emit_byte(imm8);
+  emit_int8((unsigned char)0xC1);
+  emit_int8((unsigned char)(0xE8 | encode));
+  emit_int8(imm8);
 }
 
 void Assembler::shrq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
-  emit_byte(0xD3);
-  emit_byte(0xE8 | encode);
+  emit_int8((unsigned char)0xD3);
+  emit_int8(0xE8 | encode);
 }
 
 void Assembler::subq(Address dst, int32_t imm32) {
@@ -5361,7 +5367,7 @@
 void Assembler::subq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x29);
+  emit_int8(0x29);
   emit_operand(src, dst);
 }
 
@@ -5379,7 +5385,7 @@
 void Assembler::subq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x2B);
+  emit_int8(0x2B);
   emit_operand(dst, src);
 }
 
@@ -5395,11 +5401,11 @@
   int encode = dst->encoding();
   if (encode == 0) {
     prefix(REX_W);
-    emit_byte(0xA9);
+    emit_int8((unsigned char)0xA9);
   } else {
     encode = prefixq_and_encode(encode);
-    emit_byte(0xF7);
-    emit_byte(0xC0 | encode);
+    emit_int8((unsigned char)0xF7);
+    emit_int8((unsigned char)(0xC0 | encode));
   }
   emit_long(imm32);
 }
@@ -5412,22 +5418,22 @@
 void Assembler::xaddq(Address dst, Register src) {
   InstructionMark im(this);
   prefixq(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0xC1);
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst);
 }
 
 void Assembler::xchgq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x87);
+  emit_int8((unsigned char)0x87);
   emit_operand(dst, src);
 }
 
 void Assembler::xchgq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x87);
-  emit_byte(0xc0 | encode);
+  emit_int8((unsigned char)0x87);
+  emit_int8((unsigned char)(0xc0 | encode));
 }
 
 void Assembler::xorq(Register dst, Register src) {
@@ -5438,7 +5444,7 @@
 void Assembler::xorq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
-  emit_byte(0x33);
+  emit_int8(0x33);
   emit_operand(dst, src);
 }
 
--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -313,10 +313,10 @@
 #endif
   } else {
     // make a copy the code which is going to be patched.
-    for ( int i = 0; i < _bytes_to_copy; i++) {
+    for (int i = 0; i < _bytes_to_copy; i++) {
       address ptr = (address)(_pc_start + i);
       int a_byte = (*ptr) & 0xFF;
-      __ a_byte (a_byte);
+      __ emit_int8(a_byte);
       *ptr = 0x90; // make the site look like a nop
     }
   }
@@ -363,11 +363,11 @@
   // emit the offsets needed to find the code to patch
   int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record;
 
-  __ a_byte(0xB8);
-  __ a_byte(0);
-  __ a_byte(being_initialized_entry_offset);
-  __ a_byte(bytes_to_skip);
-  __ a_byte(_bytes_to_copy);
+  __ emit_int8((unsigned char)0xB8);
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
   address patch_info_pc = __ pc();
   assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -1023,7 +1023,7 @@
 
 void MacroAssembler::leave() {
   // %%% is this really better? Why not on 32bit too?
-  emit_byte(0xC9); // LEAVE
+  emit_int8((unsigned char)0xC9); // LEAVE
 }
 
 void MacroAssembler::lneg(Register hi, Register lo) {
@@ -2112,11 +2112,11 @@
   if (UseAddressNop) {
     addr_nop_5();
   } else {
-    emit_byte(0x26); // es:
-    emit_byte(0x2e); // cs:
-    emit_byte(0x64); // fs:
-    emit_byte(0x65); // gs:
-    emit_byte(0x90);
+    emit_int8(0x26); // es:
+    emit_int8(0x2e); // cs:
+    emit_int8(0x64); // fs:
+    emit_int8(0x65); // gs:
+    emit_int8((unsigned char)0x90);
   }
 }
 
@@ -2534,12 +2534,12 @@
     int offs = (intptr_t)dst.target() - ((intptr_t)pc());
     if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
       // 0111 tttn #8-bit disp
-      emit_byte(0x70 | cc);
-      emit_byte((offs - short_size) & 0xFF);
+      emit_int8(0x70 | cc);
+      emit_int8((offs - short_size) & 0xFF);
     } else {
       // 0000 1111 1000 tttn #32-bit disp
-      emit_byte(0x0F);
-      emit_byte(0x80 | cc);
+      emit_int8(0x0F);
+      emit_int8((unsigned char)(0x80 | cc));
       emit_long(offs - long_size);
     }
   } else {
@@ -3085,7 +3085,8 @@
 
 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
   // Used in sign-bit flipping with aligned address.
-  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
+  assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes");
   if (reachable(src)) {
     Assembler::pshufb(dst, as_Address(src));
   } else {
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -126,25 +126,6 @@
     }
   }
 
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch) {
-    const char* s;
-    unsigned char op = branch[0];
-    if (op == 0xE8) {
-      s = "call";
-    } else if (op == 0xE9 || op == 0xEB) {
-      s = "jmp";
-    } else if ((op & 0xF0) == 0x70) {
-      s = "jcc";
-    } else if (op == 0x0F) {
-      s = "jcc";
-    } else {
-      s = "????";
-    }
-    tty->print("%s (unresolved)", s);
-  }
-#endif
-
   // The following 4 methods return the offset of the appropriate move instruction
 
   // Support for fast byte/short loading with zero extension (depending on particular CPU)
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -2174,13 +2174,13 @@
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_encryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
     Label L_doLast;
     address start = __ pc();
 
-    const Register from        = rsi;      // source array address
+    const Register from        = rdx;      // source array address
     const Register to          = rdx;      // destination array address
     const Register key         = rcx;      // key array address
     const Register keylen      = rax;
@@ -2189,47 +2189,74 @@
     const Address  key_param (rbp, 8+8);
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ movptr(from , from_param);
-    __ movptr(to   , to_param);
-    __ movptr(key  , key_param);
-
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
+
+    __ enter();   // required for proper stackwalking of RuntimeStub frame
+    __ movptr(from, from_param);
+    __ movptr(key, key_param);
+
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
+    __ movptr(to, to_param);
 
     // For encryption, the java expanded key ordering is just what we need
 
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ pxor(xmm_result, xmm_temp);
-    for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
-      aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp1);
+
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
-    __ aesenclast(xmm_result, xmm_temp);
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenclast(xmm_result, xmm_temp2);
     __ movdqu(Address(to, 0), xmm_result);        // store the result
     __ xorptr(rax, rax); // return 0
-    __ pop(rsi);
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
@@ -2245,13 +2272,13 @@
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_decryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
     Label L_doLast;
     address start = __ pc();
 
-    const Register from        = rsi;      // source array address
+    const Register from        = rdx;      // source array address
     const Register to          = rdx;      // destination array address
     const Register key         = rcx;      // key array address
     const Register keylen      = rax;
@@ -2260,51 +2287,76 @@
     const Address  key_param (rbp, 8+8);
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ movptr(from , from_param);
-    __ movptr(to   , to_param);
-    __ movptr(key  , key_param);
-
+    __ movptr(from, from_param);
+    __ movptr(key, key_param);
+
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));
+    __ movptr(to, to_param);
 
     // for decryption java expanded key ordering is rotated one position from what we want
     // so we start from 0x10 here and hit 0x00 last
     // we don't know if the key is aligned, hence not using load-execute form
-    load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
-    __ pxor  (xmm_result, xmm_temp);
-    for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
-      aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 192 and 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ pxor  (xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
     // for decryption the aesdeclast operation is always on key+0x00
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ aesdeclast(xmm_result, xmm_temp);
-
+    __ aesdeclast(xmm_result, xmm_temp3);
     __ movdqu(Address(to, 0), xmm_result);  // store the result
-
     __ xorptr(rax, rax); // return 0
-    __ pop(rsi);
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
@@ -2340,7 +2392,7 @@
   //   c_rarg4   - input length
   //
   address generate_cipherBlockChaining_encryptAESCrypt() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
     address start = __ pc();
@@ -2393,7 +2445,7 @@
     __ jcc(Assembler::notEqual, L_key_192_256);
 
     // 128 bit code follows here
-    __ movptr(pos, 0);
+    __ movl(pos, 0);
     __ align(OptoLoopAlignment);
     __ BIND(L_loopTop_128);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
@@ -2423,15 +2475,15 @@
     __ leave();                                  // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-  __ BIND(L_key_192_256);
-  // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
     __ cmpl(rax, 52);
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be changed to use more xmm registers)
-    __ movptr(pos, 0);
-  __ align(OptoLoopAlignment);
-  __ BIND(L_loopTop_192);
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_192);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
 
@@ -2452,11 +2504,11 @@
     __ jcc(Assembler::notEqual, L_loopTop_192);
     __ jmp(L_exit);
 
-  __ BIND(L_key_256);
+    __ BIND(L_key_256);
     // 256-bit code follows here (could be changed to use more xmm registers)
-    __ movptr(pos, 0);
-  __ align(OptoLoopAlignment);
-  __ BIND(L_loopTop_256);
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_256);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
 
@@ -2495,7 +2547,7 @@
   //
 
   address generate_cipherBlockChaining_decryptAESCrypt() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
     address start = __ pc();
@@ -2556,9 +2608,9 @@
 
 
     // 128-bit code follows here, parallelized
-    __ movptr(pos, 0);
-  __ align(OptoLoopAlignment);
-  __ BIND(L_singleBlock_loopTop_128);
+    __ movl(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_128);
     __ cmpptr(len_reg, 0);           // any blocks left??
     __ jcc(Assembler::equal, L_exit);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
@@ -2597,7 +2649,7 @@
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be optimized to use parallelism)
-    __ movptr(pos, 0);
+    __ movl(pos, 0);
     __ align(OptoLoopAlignment);
     __ BIND(L_singleBlock_loopTop_192);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
@@ -2622,7 +2674,7 @@
 
     __ BIND(L_key_256);
     // 256-bit code follows here (could be optimized to use parallelism)
-    __ movptr(pos, 0);
+    __ movl(pos, 0);
     __ align(OptoLoopAlignment);
     __ BIND(L_singleBlock_loopTop_256);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -2953,21 +2953,6 @@
     }
   }
 
-  // aesenc using specified key+offset
-  // can optionally specify that the shuffle mask is already in an xmmregister
-  void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
-    load_key(xmmtmp, key, offset, xmm_shuf_mask);
-    __ aesenc(xmmdst, xmmtmp);
-  }
-
-  // aesdec using specified key+offset
-  // can optionally specify that the shuffle mask is already in an xmmregister
-  void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
-    load_key(xmmtmp, key, offset, xmm_shuf_mask);
-    __ aesdec(xmmdst, xmmtmp);
-  }
-
-
   // Arguments:
   //
   // Inputs:
@@ -2976,7 +2961,7 @@
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_encryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
     Label L_doLast;
@@ -2988,15 +2973,17 @@
     const Register keylen      = rax;
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    // On win64 xmm6-xmm15 must be preserved so don't use them.
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
 
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
@@ -3004,25 +2991,53 @@
     // For encryption, the java expanded key ordering is just what we need
     // we don't know if the key is aligned, hence not using load-execute form
 
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ pxor(xmm_result, xmm_temp);
-    for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
-      aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
-    aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp1);
+
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+    __ aesenc(xmm_result, xmm_temp3);
+    __ aesenc(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenc(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
-    __ aesenclast(xmm_result, xmm_temp);
+    __ aesenc(xmm_result, xmm_temp1);
+    __ aesenclast(xmm_result, xmm_temp2);
     __ movdqu(Address(to, 0), xmm_result);        // store the result
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -3040,7 +3055,7 @@
   //   c_rarg2   - K (key) in little endian int array
   //
   address generate_aescrypt_decryptBlock() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
     Label L_doLast;
@@ -3052,15 +3067,17 @@
     const Register keylen      = rax;
 
     const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    const XMMRegister xmm_key_shuf_mask = xmm2;
+    const XMMRegister xmm_key_shuf_mask = xmm1;
+    // On win64 xmm6-xmm15 must be preserved so don't use them.
+    const XMMRegister xmm_temp1  = xmm2;
+    const XMMRegister xmm_temp2  = xmm3;
+    const XMMRegister xmm_temp3  = xmm4;
+    const XMMRegister xmm_temp4  = xmm5;
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
 
+    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    // keylen = # of 32-bit words, convert to 128-bit words
-    __ shrl(keylen, 2);
-    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
 
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     __ movdqu(xmm_result, Address(from, 0));
@@ -3068,29 +3085,55 @@
     // for decryption java expanded key ordering is rotated one position from what we want
     // so we start from 0x10 here and hit 0x00 last
     // we don't know if the key is aligned, hence not using load-execute form
-    load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
-    __ pxor  (xmm_result, xmm_temp);
-    for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
-      aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
-    }
-    __ cmpl(keylen, 0);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 192 and 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
-    __ subl(keylen, 2);
-    __ jcc(Assembler::equal, L_doLast);
-    // only in 256 bit keys
-    aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
+
+    __ pxor  (xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
+    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+    __ aesdec(xmm_result, xmm_temp3);
+    __ aesdec(xmm_result, xmm_temp4);
+
+    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
+    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 44);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
+
+    __ cmpl(keylen, 52);
+    __ jccb(Assembler::equal, L_doLast);
+
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
+    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
 
     __ BIND(L_doLast);
+    __ aesdec(xmm_result, xmm_temp1);
+    __ aesdec(xmm_result, xmm_temp2);
+
     // for decryption the aesdeclast operation is always on key+0x00
-    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
-    __ aesdeclast(xmm_result, xmm_temp);
-
+    __ aesdeclast(xmm_result, xmm_temp3);
     __ movdqu(Address(to, 0), xmm_result);  // store the result
-
     __ xorptr(rax, rax); // return 0
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
@@ -3109,7 +3152,7 @@
   //   c_rarg4   - input length
   //
   address generate_cipherBlockChaining_encryptAESCrypt() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
     address start = __ pc();
@@ -3133,16 +3176,19 @@
     const XMMRegister xmm_temp   = xmm1;
     // keys 0-10 preloaded into xmm2-xmm12
     const int XMM_REG_NUM_KEY_FIRST = 2;
-    const int XMM_REG_NUM_KEY_LAST  = 12;
+    const int XMM_REG_NUM_KEY_LAST  = 15;
     const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
-    const XMMRegister xmm_key10  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+    const XMMRegister xmm_key10  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
+    const XMMRegister xmm_key11  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
+    const XMMRegister xmm_key12  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
+    const XMMRegister xmm_key13  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
 
 #ifdef _WIN64
     // on win64, fill len_reg from stack position
     __ movl(len_reg, len_mem);
-    // save the xmm registers which must be preserved 6-12
+    // save the xmm registers which must be preserved 6-15
     __ subptr(rsp, -rsp_after_call_off * wordSize);
     for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
       __ movdqu(xmm_save(i), as_XMMRegister(i));
@@ -3151,12 +3197,11 @@
 
     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
-    // load up xmm regs 2 thru 12 with key 0x00 - 0xa0
-    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+    // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
       offset += 0x10;
     }
-
     __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
 
     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
@@ -3167,16 +3212,15 @@
     // 128 bit code follows here
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_loopTop_128);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
-
     __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
       __ aesenc(xmm_result, as_XMMRegister(rnum));
     }
     __ aesenclast(xmm_result, xmm_key10);
-
     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
     // no need to store r to memory until we exit
     __ addptr(pos, AESBlockSize);
@@ -3198,24 +3242,23 @@
 
     __ BIND(L_key_192_256);
     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
     __ cmpl(rax, 52);
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be changed to use more xmm registers)
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_loopTop_192);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
-
     __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
       __ aesenc(xmm_result, as_XMMRegister(rnum));
     }
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
-    load_key(xmm_temp, key, 0xc0);
-    __ aesenclast(xmm_result, xmm_temp);
-
+    __ aesenclast(xmm_result, xmm_key12);
     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
     // no need to store r to memory until we exit
     __ addptr(pos, AESBlockSize);
@@ -3225,22 +3268,19 @@
 
     __ BIND(L_key_256);
     // 256-bit code follows here (could be changed to use more xmm registers)
+    load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_loopTop_256);
     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
     __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
-
     __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
       __ aesenc(xmm_result, as_XMMRegister(rnum));
     }
-    aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
-    aes_enc_key(xmm_result, xmm_temp, key, 0xc0);
-    aes_enc_key(xmm_result, xmm_temp, key, 0xd0);
     load_key(xmm_temp, key, 0xe0);
     __ aesenclast(xmm_result, xmm_temp);
-
     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
     // no need to store r to memory until we exit
     __ addptr(pos, AESBlockSize);
@@ -3267,7 +3307,7 @@
   //
 
   address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
-    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    assert(UseAES, "need AES instructions and misaligned SSE support");
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
     address start = __ pc();
@@ -3288,12 +3328,10 @@
 #endif
     const Register pos         = rax;
 
-    // xmm register assignments for the loops below
-    const XMMRegister xmm_result = xmm0;
     // keys 0-10 preloaded into xmm2-xmm12
     const int XMM_REG_NUM_KEY_FIRST = 5;
     const int XMM_REG_NUM_KEY_LAST  = 15;
-    const XMMRegister xmm_key_first   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+    const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
     const XMMRegister xmm_key_last  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
 
     __ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -3312,13 +3350,14 @@
     const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
     // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
-    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
-      if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00;
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
       offset += 0x10;
     }
+    load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
 
     const XMMRegister xmm_prev_block_cipher = xmm1;  // holds cipher of previous block
+
     // registers holding the four results in the parallelized loop
     const XMMRegister xmm_result0 = xmm0;
     const XMMRegister xmm_result1 = xmm2;
@@ -3376,8 +3415,12 @@
     __ jmp(L_multiBlock_loopTop_128);
 
     // registers used in the non-parallelized loops
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
     const XMMRegister xmm_prev_block_cipher_save = xmm2;
-    const XMMRegister xmm_temp   = xmm3;
+    const XMMRegister xmm_key11 = xmm3;
+    const XMMRegister xmm_key12 = xmm4;
+    const XMMRegister xmm_temp  = xmm4;
 
     __ align(OptoLoopAlignment);
     __ BIND(L_singleBlock_loopTop_128);
@@ -3415,12 +3458,15 @@
 
     __ BIND(L_key_192_256);
     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    load_key(xmm_key11, key, 0xb0);
     __ cmpl(rax, 52);
     __ jcc(Assembler::notEqual, L_key_256);
 
     // 192-bit code follows here (could be optimized to use parallelism)
+    load_key(xmm_key12, key, 0xc0);     // 192-bit key goes up to c0
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_singleBlock_loopTop_192);
     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
     __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
@@ -3428,14 +3474,13 @@
     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
       __ aesdec(xmm_result, as_XMMRegister(rnum));
     }
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0);     // 192-bit key goes up to c0
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+    __ aesdec(xmm_result, xmm_key11);
+    __ aesdec(xmm_result, xmm_key12);
     __ aesdeclast(xmm_result, xmm_key_last);                    // xmm15 always came from key+0
     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
-    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
     // no need to store r to memory until we exit
-    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
-
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
     __ addptr(pos, AESBlockSize);
     __ subptr(len_reg, AESBlockSize);
     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
@@ -3445,23 +3490,26 @@
     // 256-bit code follows here (could be optimized to use parallelism)
     __ movptr(pos, 0);
     __ align(OptoLoopAlignment);
+
     __ BIND(L_singleBlock_loopTop_256);
-    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
     __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
     __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
       __ aesdec(xmm_result, as_XMMRegister(rnum));
     }
-    aes_dec_key(xmm_result, xmm_temp, key, 0xb0);     // 256-bit key goes up to e0
-    aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xd0);
-    aes_dec_key(xmm_result, xmm_temp, key, 0xe0);
-    __ aesdeclast(xmm_result, xmm_key_last);             // xmm15 came from key+0
+    __ aesdec(xmm_result, xmm_key11);
+    load_key(xmm_temp, key, 0xc0);
+    __ aesdec(xmm_result, xmm_temp);
+    load_key(xmm_temp, key, 0xd0);
+    __ aesdec(xmm_result, xmm_temp);
+    load_key(xmm_temp, key, 0xe0);     // 256-bit key goes up to e0
+    __ aesdec(xmm_result, xmm_temp);
+    __ aesdeclast(xmm_result, xmm_key_last);          // xmm15 came from key+0
     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
-    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
     // no need to store r to memory until we exit
-    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
-
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
     __ addptr(pos, AESBlockSize);
     __ subptr(len_reg, AESBlockSize);
     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -489,8 +489,8 @@
   }
 
   // The AES intrinsic stubs require AES instruction support (of course)
-  // but also require AVX and sse3 modes for instructions it use.
-  if (UseAES && (UseAVX > 0) && (UseSSE > 2)) {
+  // but also require sse3 mode for instructions it use.
+  if (UseAES && (UseSSE > 2)) {
     if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
       UseAESIntrinsics = true;
     }
--- a/hotspot/src/cpu/zero/vm/assembler_zero.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/zero/vm/assembler_zero.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -56,15 +56,9 @@
   ShouldNotCallThis();
 }
 
-#ifndef PRODUCT
-void Assembler::pd_print_patched_instruction(address branch) {
-  ShouldNotCallThis();
-}
-#endif // PRODUCT
-
 void MacroAssembler::align(int modulus) {
   while (offset() % modulus != 0)
-    emit_byte(AbstractAssembler::code_fill_byte());
+    emit_int8(AbstractAssembler::code_fill_byte());
 }
 
 void MacroAssembler::bang_stack_with_offset(int offset) {
@@ -72,8 +66,7 @@
 }
 
 void MacroAssembler::advance(int bytes) {
-  _code_pos += bytes;
-  sync();
+  code_section()->set_end(code_section()->end() + bytes);
 }
 
 RegisterOrConstant MacroAssembler::delayed_value_impl(
--- a/hotspot/src/cpu/zero/vm/assembler_zero.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/cpu/zero/vm/assembler_zero.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -37,9 +37,6 @@
 
  public:
   void pd_patch_instruction(address branch, address target);
-#ifndef PRODUCT
-  static void pd_print_patched_instruction(address branch);
-#endif // PRODUCT
 };
 
 class MacroAssembler : public Assembler {
--- a/hotspot/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -116,7 +116,7 @@
   ThreadLocalStorage::pd_tlsAccessMode tlsMode = ThreadLocalStorage::pd_getTlsAccessMode ();
   if (tlsMode == ThreadLocalStorage::pd_tlsAccessIndirect) {            // T1
      // Use thread as a temporary: mov r, gs:[0]; mov r, [r+tlsOffset]
-     emit_byte (segment);
+     emit_int8 (segment);
      // ExternalAddress doesn't work because it can't take NULL
      AddressLiteral null(0, relocInfo::none);
      movptr (thread, null);
@@ -125,7 +125,7 @@
   } else
   if (tlsMode == ThreadLocalStorage::pd_tlsAccessDirect) {              // T2
      // mov r, gs:[tlsOffset]
-     emit_byte (segment);
+     emit_int8 (segment);
      AddressLiteral tls_off((address)ThreadLocalStorage::pd_getTlsOffset(), relocInfo::none);
      movptr (thread, tls_off);
      return ;
--- a/hotspot/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -30,7 +30,7 @@
 
 
 void MacroAssembler::int3() {
-  emit_byte(0xCC);
+  emit_int8((unsigned char)0xCC);
 }
 
 #ifndef _LP64
--- a/hotspot/src/share/vm/asm/assembler.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/asm/assembler.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -109,37 +109,6 @@
   ICache::invalidate_range(addr_at(0), offset());
 }
 
-
-void AbstractAssembler::a_byte(int x) {
-  emit_byte(x);
-}
-
-
-void AbstractAssembler::a_long(jint x) {
-  emit_long(x);
-}
-
-// Labels refer to positions in the (to be) generated code.  There are bound
-// and unbound
-//
-// Bound labels refer to known positions in the already generated code.
-// offset() is the position the label refers to.
-//
-// Unbound labels refer to unknown positions in the code to be generated; it
-// may contain a list of unresolved displacements that refer to it
-#ifndef PRODUCT
-void AbstractAssembler::print(Label& L) {
-  if (L.is_bound()) {
-    tty->print_cr("bound label to %d|%d", L.loc_pos(), L.loc_sect());
-  } else if (L.is_unbound()) {
-    L.print_instructions((MacroAssembler*)this);
-  } else {
-    tty->print_cr("label in inconsistent state (loc = %d)", L.loc());
-  }
-}
-#endif // PRODUCT
-
-
 void AbstractAssembler::bind(Label& L) {
   if (L.is_bound()) {
     // Assembler can bind a label more than once to the same place.
@@ -342,28 +311,3 @@
 #endif
   return offset < 0 || os::vm_page_size() <= offset;
 }
-
-#ifndef PRODUCT
-void Label::print_instructions(MacroAssembler* masm) const {
-  CodeBuffer* cb = masm->code();
-  for (int i = 0; i < _patch_index; ++i) {
-    int branch_loc;
-    if (i >= PatchCacheSize) {
-      branch_loc = _patch_overflow->at(i - PatchCacheSize);
-    } else {
-      branch_loc = _patches[i];
-    }
-    int branch_pos  = CodeBuffer::locator_pos(branch_loc);
-    int branch_sect = CodeBuffer::locator_sect(branch_loc);
-    address branch = cb->locator_address(branch_loc);
-    tty->print_cr("unbound label");
-    tty->print("@ %d|%d ", branch_pos, branch_sect);
-    if (branch_sect == CodeBuffer::SECT_CONSTS) {
-      tty->print_cr(PTR_FORMAT, *(address*)branch);
-      continue;
-    }
-    masm->pd_print_patched_instruction(branch);
-    tty->cr();
-  }
-}
-#endif // ndef PRODUCT
--- a/hotspot/src/share/vm/asm/assembler.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/asm/assembler.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -216,17 +216,6 @@
   bool isByte(int x) const             { return 0 <= x && x < 0x100; }
   bool isShiftCount(int x) const       { return 0 <= x && x < 32; }
 
-  void emit_int8(   int8_t  x) { code_section()->emit_int8(   x); }
-  void emit_int16(  int16_t x) { code_section()->emit_int16(  x); }
-  void emit_int32(  int32_t x) { code_section()->emit_int32(  x); }
-  void emit_int64(  int64_t x) { code_section()->emit_int64(  x); }
-
-  void emit_float(  jfloat  x) { code_section()->emit_float(  x); }
-  void emit_double( jdouble x) { code_section()->emit_double( x); }
-  void emit_address(address x) { code_section()->emit_address(x); }
-
-  void emit_byte(int x)  { emit_int8 (x); }  // deprecated
-  void emit_word(int x)  { emit_int16(x); }  // deprecated
   void emit_long(jint x) { emit_int32(x); }  // deprecated
 
   // Instruction boundaries (required when emitting relocatable values).
@@ -277,9 +266,6 @@
   };
 #endif
 
-  // Label functions
-  void print(Label& L);
-
  public:
 
   // Creation
@@ -288,6 +274,15 @@
   // ensure buf contains all code (call this before using/copying the code)
   void flush();
 
+  void emit_int8(   int8_t  x) { code_section()->emit_int8(   x); }
+  void emit_int16(  int16_t x) { code_section()->emit_int16(  x); }
+  void emit_int32(  int32_t x) { code_section()->emit_int32(  x); }
+  void emit_int64(  int64_t x) { code_section()->emit_int64(  x); }
+
+  void emit_float(  jfloat  x) { code_section()->emit_float(  x); }
+  void emit_double( jdouble x) { code_section()->emit_double( x); }
+  void emit_address(address x) { code_section()->emit_address(x); }
+
   // min and max values for signed immediate ranges
   static int min_simm(int nbits) { return -(intptr_t(1) << (nbits - 1))    ; }
   static int max_simm(int nbits) { return  (intptr_t(1) << (nbits - 1)) - 1; }
@@ -327,8 +322,6 @@
   void    clear_inst_mark()       {        code_section()->clear_mark(); }
 
   // Constants in code
-  void a_byte(int x);
-  void a_long(jint x);
   void relocate(RelocationHolder const& rspec, int format = 0) {
     assert(!pd_check_instruction_mark()
         || inst_mark() == NULL || inst_mark() == code_section()->end(),
@@ -441,15 +434,6 @@
    */
   void pd_patch_instruction(address branch, address target);
 
-#ifndef PRODUCT
-  /**
-   * Platform-dependent method of printing an instruction that needs to be
-   * patched.
-   *
-   * @param branch the instruction to be patched in the buffer.
-   */
-  static void pd_print_patched_instruction(address branch);
-#endif // PRODUCT
 };
 
 #ifdef TARGET_ARCH_x86
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -3442,6 +3442,11 @@
       preserves_state = true;
       break;
 
+    case vmIntrinsics::_loadFence :
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence :
+      break;
+
     default                       : return false; // do not inline
   }
   // create intrinsic node
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -2977,6 +2977,16 @@
     do_CompareAndSwap(x, longType);
     break;
 
+  case vmIntrinsics::_loadFence :
+    if (os::is_MP()) __ membar_acquire();
+    break;
+  case vmIntrinsics::_storeFence:
+    if (os::is_MP()) __ membar_release();
+    break;
+  case vmIntrinsics::_fullFence :
+    if (os::is_MP()) __ membar();
+    break;
+
   case vmIntrinsics::_Reference_get:
     do_Reference_get(x);
     break;
--- a/hotspot/src/share/vm/ci/ciField.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/ci/ciField.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -366,10 +366,12 @@
 // ------------------------------------------------------------------
 // ciField::print
 void ciField::print() {
-  tty->print("<ciField ");
+  tty->print("<ciField name=");
   _holder->print_name();
   tty->print(".");
   _name->print_symbol();
+  tty->print(" signature=");
+  _signature->print_symbol();
   tty->print(" offset=%d type=", _offset);
   if (_type != NULL) _type->print_name();
   else               tty->print("(reference)");
--- a/hotspot/src/share/vm/classfile/classLoaderData.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/classfile/classLoaderData.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -169,16 +169,18 @@
     ok = (objArrayOop)ok->obj_at(1);
   }
 
+  // Must handle over GC points
+  assert (last != NULL, "dependencies should be initialized");
+  objArrayHandle last_handle(THREAD, last);
+
   // Create a new dependency node with fields for (class_loader or mirror, next)
   objArrayOop deps = oopFactory::new_objectArray(2, CHECK);
   deps->obj_at_put(0, dependency());
 
-  // Must handle over more GC points
+  // Must handle over GC points
   objArrayHandle new_dependency(THREAD, deps);
 
   // Add the dependency under lock
-  assert (last != NULL, "dependencies should be initialized");
-  objArrayHandle last_handle(THREAD, last);
   locked_add_dependency(last_handle, new_dependency);
 }
 
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -756,6 +756,15 @@
   do_intrinsic(_unpark,                   sun_misc_Unsafe,        unpark_name, unpark_signature,                 F_RN)  \
    do_name(     unpark_name,                                     "unpark")                                              \
    do_alias(    unpark_signature,                               /*(LObject;)V*/ object_void_signature)                  \
+  do_intrinsic(_loadFence,                sun_misc_Unsafe,        loadFence_name, loadFence_signature,           F_RN)  \
+   do_name(     loadFence_name,                                  "loadFence")                                           \
+   do_alias(    loadFence_signature,                              void_method_signature)                                \
+  do_intrinsic(_storeFence,               sun_misc_Unsafe,        storeFence_name, storeFence_signature,         F_RN)  \
+   do_name(     storeFence_name,                                 "storeFence")                                          \
+   do_alias(    storeFence_signature,                             void_method_signature)                                \
+  do_intrinsic(_fullFence,                sun_misc_Unsafe,        fullFence_name, fullFence_signature,           F_RN)  \
+   do_name(     fullFence_name,                                  "fullFence")                                           \
+   do_alias(    fullFence_signature,                              void_method_signature)                                \
                                                                                                                         \
   /* unsafe memory references (there are a lot of them...) */                                                           \
   do_signature(getObject_signature,       "(Ljava/lang/Object;J)Ljava/lang/Object;")                                    \
@@ -897,12 +906,14 @@
   do_intrinsic(_getAndAddLong,            sun_misc_Unsafe,        getAndAddLong_name, getAndAddLong_signature, F_R)     \
    do_name(     getAndAddLong_name,                               "getAndAddLong")                                      \
    do_signature(getAndAddLong_signature,                          "(Ljava/lang/Object;JJ)J" )                           \
-  do_intrinsic(_getAndSetInt,             sun_misc_Unsafe,        getAndSet_name, getAndSetInt_signature, F_R)          \
-   do_name(     getAndSet_name,                                   "getAndSet")                                          \
+  do_intrinsic(_getAndSetInt,             sun_misc_Unsafe,        getAndSetInt_name, getAndSetInt_signature, F_R)       \
+   do_name(     getAndSetInt_name,                                "getAndSetInt")                                       \
    do_alias(    getAndSetInt_signature,                         /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature)   \
-  do_intrinsic(_getAndSetLong,            sun_misc_Unsafe,        getAndSet_name, getAndSetLong_signature, F_R)         \
+  do_intrinsic(_getAndSetLong,            sun_misc_Unsafe,        getAndSetLong_name, getAndSetLong_signature, F_R)     \
+   do_name(     getAndSetLong_name,                               "getAndSetLong")                                      \
    do_alias(    getAndSetLong_signature,                        /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature)  \
-  do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSet_name, getAndSetObject_signature,  F_R)      \
+  do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSetObject_name, getAndSetObject_signature,  F_R)\
+   do_name(     getAndSetObject_name,                             "getAndSetObject")                                    \
    do_signature(getAndSetObject_signature,                        "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
                                                                                                                         \
   /* prefetch_signature is shared by all prefetch variants */                                                           \
--- a/hotspot/src/share/vm/compiler/compilerOracle.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/compiler/compilerOracle.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -538,6 +538,7 @@
 
   if (match != NULL) {
     if (!_quiet) {
+      ResourceMark rm;
       tty->print("CompilerOracle: %s ", command_names[command]);
       match->print();
     }
--- a/hotspot/src/share/vm/opto/addnode.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/addnode.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -189,6 +189,11 @@
       set_req(1, addx);
       set_req(2, a22);
       progress = this;
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if (add2->outcnt() == 0 && igvn) {
+        // add disconnected.
+        igvn->_worklist.push(add2);
+      }
     }
   }
 
@@ -624,6 +629,11 @@
     if( t22->singleton() && (t22 != Type::TOP) ) {  // Right input is an add of a constant?
       set_req(Address, phase->transform(new (phase->C) AddPNode(in(Base),in(Address),add->in(1))));
       set_req(Offset, add->in(2));
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if (add->outcnt() == 0 && igvn) {
+        // add disconnected.
+        igvn->_worklist.push((Node*)add);
+      }
       return this;              // Made progress
     }
   }
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -403,7 +403,7 @@
 //------------------------------print_inlining---------------------------------
 // Really, the failure_msg can be a success message also.
 void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
-  CompileTask::print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
+  C->print_inlining(callee_method, inline_level(), caller_bci, failure_msg ? failure_msg : "inline");
   if (callee_method == NULL)  tty->print(" callee not monotonic or profiled");
   if (Verbose && callee_method) {
     const InlineTree *top = this;
--- a/hotspot/src/share/vm/opto/callGenerator.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/callGenerator.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -274,6 +274,9 @@
   virtual void do_late_inline();
 
   virtual JVMState* generate(JVMState* jvms) {
+    Compile *C = Compile::current();
+    C->print_inlining_skip(this);
+
     // Record that this call site should be revisited once the main
     // parse is finished.
     Compile::current()->add_late_inline(this);
@@ -284,7 +287,6 @@
     // as is done for allocations and macro expansion.
     return DirectCallGenerator::generate(jvms);
   }
-
 };
 
 
@@ -307,7 +309,9 @@
 
   // Make sure the state is a MergeMem for parsing.
   if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
-    map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
+    Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory));
+    C->initial_gvn()->set_type_bottom(mem);
+    map->set_req(TypeFunc::Memory, mem);
   }
 
   // Make enough space for the expression stack and transfer the incoming arguments
@@ -320,6 +324,8 @@
     }
   }
 
+  C->print_inlining_insert(this);
+
   CompileLog* log = C->log();
   if (log != NULL) {
     log->head("late_inline method='%d'", log->identify(method()));
@@ -608,7 +614,7 @@
         if (cg != NULL && cg->is_inline())
           return cg;
       } else {
-        if (PrintInlining)  CompileTask::print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
+        if (PrintInlining)  C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
       }
     }
     break;
--- a/hotspot/src/share/vm/opto/callGenerator.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/callGenerator.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -147,9 +147,9 @@
                                                 CallGenerator* cg);
   virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
 
-  static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
+  static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) {
     if (PrintInlining)
-      CompileTask::print_inlining(callee, inline_level, bci, msg);
+      C->print_inlining(callee, inline_level, bci, msg);
   }
 };
 
--- a/hotspot/src/share/vm/opto/callnode.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -751,7 +751,7 @@
         projs->fallthrough_ioproj = pn;
       for (DUIterator j = pn->outs(); pn->has_out(j); j++) {
         Node* e = pn->out(j);
-        if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) {
+        if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj() && e->outcnt() > 0) {
           assert(projs->exobj == NULL, "only one");
           projs->exobj = e;
         }
--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -1566,6 +1566,10 @@
     Node* n = in(j);            // Get the input
     if (rc == NULL || phase->type(rc) == Type::TOP) {
       if (n != top) {           // Not already top?
+        PhaseIterGVN *igvn = phase->is_IterGVN();
+        if (can_reshape && igvn != NULL) {
+          igvn->_worklist.push(r);
+        }
         set_req(j, top);        // Nuke it down
         progress = this;        // Record progress
       }
--- a/hotspot/src/share/vm/opto/compile.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/compile.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -610,7 +610,9 @@
                   _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
                   _printer(IdealGraphPrinter::printer()),
 #endif
-                  _congraph(NULL) {
+                  _congraph(NULL),
+                  _print_inlining_list(NULL),
+                  _print_inlining(0) {
   C = this;
 
   CompileWrapper cw(this);
@@ -666,6 +668,9 @@
   PhaseGVN gvn(node_arena(), estimated_size);
   set_initial_gvn(&gvn);
 
+  if (PrintInlining) {
+    _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
+  }
   { // Scope for timing the parser
     TracePhase t3("parse", &_t_parser, true);
 
@@ -754,6 +759,7 @@
       }
     }
     assert(_late_inlines.length() == 0, "should have been processed");
+    dump_inlining();
 
     print_method("Before RemoveUseless", 3);
 
@@ -899,7 +905,9 @@
 #endif
     _dead_node_list(comp_arena()),
     _dead_node_count(0),
-    _congraph(NULL) {
+    _congraph(NULL),
+    _print_inlining_list(NULL),
+    _print_inlining(0) {
   C = this;
 
 #ifndef PRODUCT
@@ -3351,3 +3359,11 @@
     cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
   }
 }
+
+void Compile::dump_inlining() {
+  if (PrintInlining) {
+    for (int i = 0; i < _print_inlining_list->length(); i++) {
+      tty->print(_print_inlining_list->at(i).ss()->as_string());
+    }
+  }
+}
--- a/hotspot/src/share/vm/opto/compile.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/compile.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -30,6 +30,7 @@
 #include "code/debugInfoRec.hpp"
 #include "code/exceptionHandlerTable.hpp"
 #include "compiler/compilerOracle.hpp"
+#include "compiler/compileBroker.hpp"
 #include "libadt/dict.hpp"
 #include "libadt/port.hpp"
 #include "libadt/vectset.hpp"
@@ -369,6 +370,61 @@
   GrowableArray<CallGenerator*> _late_inlines;  // List of CallGenerators to be revisited after
                                                 // main parsing has finished.
 
+  // Inlining may not happen in parse order which would make
+  // PrintInlining output confusing. Keep track of PrintInlining
+  // pieces in order.
+  class PrintInliningBuffer : public ResourceObj {
+   private:
+    CallGenerator* _cg;
+    stringStream* _ss;
+
+   public:
+    PrintInliningBuffer()
+      : _cg(NULL) { _ss = new stringStream(); }
+
+    stringStream* ss() const { return _ss; }
+    CallGenerator* cg() const { return _cg; }
+    void set_cg(CallGenerator* cg) { _cg = cg; }
+  };
+
+  GrowableArray<PrintInliningBuffer>* _print_inlining_list;
+  int _print_inlining;
+
+ public:
+
+  outputStream* print_inlining_stream() const {
+    return _print_inlining_list->at(_print_inlining).ss();
+  }
+
+  void print_inlining_skip(CallGenerator* cg) {
+    if (PrintInlining) {
+      _print_inlining_list->at(_print_inlining).set_cg(cg);
+      _print_inlining++;
+      _print_inlining_list->insert_before(_print_inlining, PrintInliningBuffer());
+    }
+  }
+
+  void print_inlining_insert(CallGenerator* cg) {
+    if (PrintInlining) {
+      for (int i = 0; i < _print_inlining_list->length(); i++) {
+        if (_print_inlining_list->at(i).cg() == cg) {
+          _print_inlining_list->insert_before(i+1, PrintInliningBuffer());
+          _print_inlining = i+1;
+          _print_inlining_list->at(i).set_cg(NULL);
+          return;
+        }
+      }
+      ShouldNotReachHere();
+    }
+  }
+
+  void print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) {
+    stringStream ss;
+    CompileTask::print_inlining(&ss, method, inline_level, bci, msg);
+    print_inlining_stream()->print(ss.as_string());
+  }
+
+ private:
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*             _cfg;                   // Results of CFG finding
   bool                  _select_24_bit_instr;   // We selected an instruction with a 24-bit result
@@ -591,7 +647,7 @@
   void         reset_dead_node_list()      { _dead_node_list.Reset();
                                              _dead_node_count = 0;
                                            }
-  uint          live_nodes()               {
+  uint          live_nodes() const         {
     int  val = _unique - _dead_node_count;
     assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count));
             return (uint) val;
@@ -702,7 +758,7 @@
 
   void              identify_useful_nodes(Unique_Node_List &useful);
   void              update_dead_node_list(Unique_Node_List &useful);
-  void              remove_useless_nodes  (Unique_Node_List &useful);
+  void              remove_useless_nodes (Unique_Node_List &useful);
 
   WarmCallInfo*     warm_calls() const          { return _warm_calls; }
   void          set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
@@ -711,6 +767,8 @@
   // Record this CallGenerator for inlining at the end of parsing.
   void              add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); }
 
+  void dump_inlining();
+
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*         cfg()                       { return _cfg; }
   bool              select_24_bit_instr() const { return _select_24_bit_instr; }
--- a/hotspot/src/share/vm/opto/doCall.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/doCall.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -40,19 +40,24 @@
 #include "prims/nativeLookup.hpp"
 #include "runtime/sharedRuntime.hpp"
 
-void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
+void trace_type_profile(Compile* C, ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
   if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) {
+    outputStream* out = tty;
     if (!PrintInlining) {
       if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) {
         method->print_short_name();
         tty->cr();
       }
       CompileTask::print_inlining(prof_method, depth, bci);
+    } else {
+      out = C->print_inlining_stream();
     }
-    CompileTask::print_inline_indent(depth);
-    tty->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
-    prof_klass->name()->print_symbol();
-    tty->cr();
+    CompileTask::print_inline_indent(depth, out);
+    out->print(" \\-> TypeProfile (%d/%d counts) = ", receiver_count, site_count);
+    stringStream ss;
+    prof_klass->name()->print_symbol_on(&ss);
+    out->print(ss.as_string());
+    out->cr();
   }
 }
 
@@ -233,13 +238,13 @@
           }
           if (miss_cg != NULL) {
             if (next_hit_cg != NULL) {
-              trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
+              trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1));
               // We don't need to record dependency on a receiver here and below.
               // Whenever we inline, the dependency is added by Parse::Parse().
               miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX);
             }
             if (miss_cg != NULL) {
-              trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
+              trace_type_profile(C, jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count);
               CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
               if (cg != NULL)  return cg;
             }
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -1771,11 +1771,21 @@
   CallProjections callprojs;
   call->extract_projections(&callprojs, true);
 
+  Node* init_mem = call->in(TypeFunc::Memory);
+  Node* final_mem = final_state->in(TypeFunc::Memory);
+  Node* final_ctl = final_state->in(TypeFunc::Control);
+  Node* final_io = final_state->in(TypeFunc::I_O);
+
   // Replace all the old call edges with the edges from the inlining result
-  C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control));
-  C->gvn_replace_by(callprojs.fallthrough_memproj,   final_state->in(TypeFunc::Memory));
-  C->gvn_replace_by(callprojs.fallthrough_ioproj,    final_state->in(TypeFunc::I_O));
-  Node* final_mem = final_state->in(TypeFunc::Memory);
+  if (callprojs.fallthrough_catchproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl);
+  }
+  if (callprojs.fallthrough_memproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_memproj,   final_mem);
+  }
+  if (callprojs.fallthrough_ioproj != NULL) {
+    C->gvn_replace_by(callprojs.fallthrough_ioproj,    final_io);
+  }
 
   // Replace the result with the new result if it exists and is used
   if (callprojs.resproj != NULL && result != NULL) {
@@ -2980,7 +2990,7 @@
   set_control( _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Control) ) );
   // create memory projection for i_o
   set_memory ( _gvn.transform( new (C) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx );
-  make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true);
+  make_slow_call_ex(allocx, env()->Throwable_klass(), true);
 
   // create a memory projection as for the normal control path
   Node* malloc = _gvn.transform(new (C) ProjNode(allocx, TypeFunc::Memory));
--- a/hotspot/src/share/vm/opto/library_call.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -282,6 +282,7 @@
   typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
   bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind);
   bool inline_unsafe_ordered_store(BasicType type);
+  bool inline_unsafe_fence(vmIntrinsics::ID id);
   bool inline_fp_conversions(vmIntrinsics::ID id);
   bool inline_number_methods(vmIntrinsics::ID id);
   bool inline_reference_get();
@@ -334,6 +335,9 @@
     case vmIntrinsics::_getAndSetInt:
     case vmIntrinsics::_getAndSetLong:
     case vmIntrinsics::_getAndSetObject:
+    case vmIntrinsics::_loadFence:
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence:
       break;  // InlineNatives does not control String.compareTo
     case vmIntrinsics::_Reference_get:
       break;  // InlineNatives does not control Reference.get
@@ -536,7 +540,7 @@
   // Try to inline the intrinsic.
   if (kit.try_to_inline()) {
     if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
-      CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
     C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
     if (C->log()) {
@@ -555,7 +559,7 @@
     if (jvms->has_method()) {
       // Not a root compile.
       const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
-      CompileTask::print_inlining(callee, jvms->depth() - 1, bci, msg);
+      C->print_inlining(callee, jvms->depth() - 1, bci, msg);
     } else {
       // Root compile
       tty->print("Did not generate intrinsic %s%s at bci:%d in",
@@ -585,7 +589,7 @@
   Node* slow_ctl = kit.try_to_predicate();
   if (!kit.failing()) {
     if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
-      CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
     C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
     if (C->log()) {
@@ -602,12 +606,12 @@
     if (jvms->has_method()) {
       // Not a root compile.
       const char* msg = "failed to generate predicate for intrinsic";
-      CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
+      C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
     } else {
       // Root compile
-      tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
-               vmIntrinsics::name_at(intrinsic_id()),
-               (is_virtual() ? " (virtual)" : ""), bci);
+      C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
+                                        vmIntrinsics::name_at(intrinsic_id()),
+                                        (is_virtual() ? " (virtual)" : ""), bci);
     }
   }
   C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
@@ -732,6 +736,10 @@
   case vmIntrinsics::_getAndSetLong:            return inline_unsafe_load_store(T_LONG,   LS_xchg);
   case vmIntrinsics::_getAndSetObject:          return inline_unsafe_load_store(T_OBJECT, LS_xchg);
 
+  case vmIntrinsics::_loadFence:
+  case vmIntrinsics::_storeFence:
+  case vmIntrinsics::_fullFence:                return inline_unsafe_fence(intrinsic_id());
+
   case vmIntrinsics::_currentThread:            return inline_native_currentThread();
   case vmIntrinsics::_isInterrupted:            return inline_native_isInterrupted();
 
@@ -2840,6 +2848,26 @@
   return true;
 }
 
+bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
+  // Regardless of form, don't allow previous ld/st to move down,
+  // then issue acquire, release, or volatile mem_bar.
+  insert_mem_bar(Op_MemBarCPUOrder);
+  switch(id) {
+    case vmIntrinsics::_loadFence:
+      insert_mem_bar(Op_MemBarAcquire);
+      return true;
+    case vmIntrinsics::_storeFence:
+      insert_mem_bar(Op_MemBarRelease);
+      return true;
+    case vmIntrinsics::_fullFence:
+      insert_mem_bar(Op_MemBarVolatile);
+      return true;
+    default:
+      fatal_unexpected_iid(id);
+      return false;
+  }
+}
+
 //----------------------------inline_unsafe_allocate---------------------------
 // public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls);
 bool LibraryCallKit::inline_unsafe_allocate() {
@@ -2952,14 +2980,23 @@
 
   // We only go to the fast case code if we pass two guards.
   // Paths which do not pass are accumulated in the slow_region.
+
+  enum {
+    no_int_result_path   = 1, // t == Thread.current() && !TLS._osthread._interrupted
+    no_clear_result_path = 2, // t == Thread.current() &&  TLS._osthread._interrupted && !clear_int
+    slow_result_path     = 3, // slow path: t.isInterrupted(clear_int)
+    PATH_LIMIT
+  };
+
+  // Ensure that it's not possible to move the load of TLS._osthread._interrupted flag
+  // out of the function.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
+  RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
+  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
+
   RegionNode* slow_region = new (C) RegionNode(1);
   record_for_igvn(slow_region);
-  RegionNode* result_rgn = new (C) RegionNode(1+3); // fast1, fast2, slow
-  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
-  enum { no_int_result_path   = 1,
-         no_clear_result_path = 2,
-         slow_result_path     = 3
-  };
 
   // (a) Receiving thread must be the current thread.
   Node* rec_thr = argument(0);
@@ -2968,14 +3005,13 @@
   Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) );
   Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) );
 
-  bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
-  if (!known_current_thread)
-    generate_slow_guard(bol_thr, slow_region);
+  generate_slow_guard(bol_thr, slow_region);
 
   // (b) Interrupt bit on TLS must be false.
   Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
   Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
   p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
+
   // Set the control input on the field _interrupted read to prevent it floating up.
   Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
   Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) );
@@ -3020,22 +3056,20 @@
     Node* slow_val = set_results_for_java_call(slow_call);
     // this->control() comes from set_results_for_java_call
 
-    // If we know that the result of the slow call will be true, tell the optimizer!
-    if (known_current_thread)  slow_val = intcon(1);
-
     Node* fast_io  = slow_call->in(TypeFunc::I_O);
     Node* fast_mem = slow_call->in(TypeFunc::Memory);
+
     // These two phis are pre-filled with copies of of the fast IO and Memory
-    Node* io_phi   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
-    Node* mem_phi  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+    PhiNode* result_mem  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+    PhiNode* result_io   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
 
     result_rgn->init_req(slow_result_path, control());
-    io_phi    ->init_req(slow_result_path, i_o());
-    mem_phi   ->init_req(slow_result_path, reset_memory());
+    result_io ->init_req(slow_result_path, i_o());
+    result_mem->init_req(slow_result_path, reset_memory());
     result_val->init_req(slow_result_path, slow_val);
 
-    set_all_memory( _gvn.transform(mem_phi) );
-    set_i_o(        _gvn.transform(io_phi) );
+    set_all_memory(_gvn.transform(result_mem));
+    set_i_o(       _gvn.transform(result_io));
   }
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
@@ -3319,7 +3353,7 @@
     Node* arg = args[which_arg];
     arg = null_check(arg);
     if (stopped())  break;
-    args[which_arg] = _gvn.transform(arg);
+    args[which_arg] = arg;
 
     Node* p = basic_plus_adr(arg, class_klass_offset);
     Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type);
--- a/hotspot/src/share/vm/opto/parse3.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/parse3.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -509,6 +509,7 @@
                           makecon(TypeKlassPtr::make(array_klass)),
                           dims);
   }
+  make_slow_call_ex(c, env()->Throwable_klass(), false);
 
   Node* res = _gvn.transform(new (C) ProjNode(c, TypeFunc::Parms));
 
--- a/hotspot/src/share/vm/opto/runtime.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -989,7 +989,7 @@
       // since we're notifying the VM on every catch.
       // Force deoptimization and the rest of the lookup
       // will be fine.
-      deoptimize_caller_frame(thread, true);
+      deoptimize_caller_frame(thread);
     }
 
     // Check the stack guard pages.  If enabled, look for handler in this frame;
@@ -1143,17 +1143,22 @@
 
 
 void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
-  // Deoptimize frame
-  if (doit) {
-    // Called from within the owner thread, so no need for safepoint
-    RegisterMap reg_map(thread);
-    frame stub_frame = thread->last_frame();
-    assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
-    frame caller_frame = stub_frame.sender(&reg_map);
+  // Deoptimize the caller before continuing, as the compiled
+  // exception handler table may not be valid.
+  if (!StressCompiledExceptionHandlers && doit) {
+    deoptimize_caller_frame(thread);
+  }
+}
 
-    // Deoptimize the caller frame.
-    Deoptimization::deoptimize_frame(thread, caller_frame.id());
-  }
+void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) {
+  // Called from within the owner thread, so no need for safepoint
+  RegisterMap reg_map(thread);
+  frame stub_frame = thread->last_frame();
+  assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
+  frame caller_frame = stub_frame.sender(&reg_map);
+
+  // Deoptimize the caller frame.
+  Deoptimization::deoptimize_frame(thread, caller_frame.id());
 }
 
 
--- a/hotspot/src/share/vm/opto/runtime.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -174,6 +174,7 @@
   static address handle_exception_C       (JavaThread* thread);
   static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
   static address rethrow_C                (oopDesc* exception, JavaThread *thread, address return_pc );
+  static void deoptimize_caller_frame     (JavaThread *thread);
   static void deoptimize_caller_frame     (JavaThread *thread, bool doit);
   static bool is_deoptimized_caller_frame (JavaThread *thread);
 
--- a/hotspot/src/share/vm/opto/stringopts.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/opto/stringopts.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -744,7 +744,9 @@
       ctrl_path.push(cn);
       ctrl_path.push(cn->proj_out(0));
       ctrl_path.push(cn->proj_out(0)->unique_out());
-      ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
+      if (cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0) != NULL) {
+        ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
+      }
     } else {
       ShouldNotReachHere();
     }
@@ -762,6 +764,12 @@
     } else if (ptr->is_IfTrue()) {
       IfNode* iff = ptr->in(0)->as_If();
       BoolNode* b = iff->in(1)->isa_Bool();
+
+      if (b == NULL) {
+        fail = true;
+        break;
+      }
+
       Node* cmp = b->in(1);
       Node* v1 = cmp->in(1);
       Node* v2 = cmp->in(2);
@@ -1408,71 +1416,76 @@
                       Deoptimization::Action_make_not_entrant);
   }
 
-  // length now contains the number of characters needed for the
-  // char[] so create a new AllocateArray for the char[]
-  Node* char_array = NULL;
-  {
-    PreserveReexecuteState preexecs(&kit);
-    // The original jvms is for an allocation of either a String or
-    // StringBuffer so no stack adjustment is necessary for proper
-    // reexecution.  If we deoptimize in the slow path the bytecode
-    // will be reexecuted and the char[] allocation will be thrown away.
-    kit.jvms()->set_should_reexecute(true);
-    char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
-                               length, 1);
-  }
+  Node* result;
+  if (!kit.stopped()) {
+
+    // length now contains the number of characters needed for the
+    // char[] so create a new AllocateArray for the char[]
+    Node* char_array = NULL;
+    {
+      PreserveReexecuteState preexecs(&kit);
+      // The original jvms is for an allocation of either a String or
+      // StringBuffer so no stack adjustment is necessary for proper
+      // reexecution.  If we deoptimize in the slow path the bytecode
+      // will be reexecuted and the char[] allocation will be thrown away.
+      kit.jvms()->set_should_reexecute(true);
+      char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
+                                 length, 1);
+    }
+
+    // Mark the allocation so that zeroing is skipped since the code
+    // below will overwrite the entire array
+    AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
+    char_alloc->maybe_set_complete(_gvn);
 
-  // Mark the allocation so that zeroing is skipped since the code
-  // below will overwrite the entire array
-  AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
-  char_alloc->maybe_set_complete(_gvn);
-
-  // Now copy the string representations into the final char[]
-  Node* start = __ intcon(0);
-  for (int argi = 0; argi < sc->num_arguments(); argi++) {
-    Node* arg = sc->argument(argi);
-    switch (sc->mode(argi)) {
-      case StringConcat::IntMode: {
-        Node* end = __ AddI(start, string_sizes->in(argi));
-        // getChars words backwards so pass the ending point as well as the start
-        int_getChars(kit, arg, char_array, start, end);
-        start = end;
-        break;
+    // Now copy the string representations into the final char[]
+    Node* start = __ intcon(0);
+    for (int argi = 0; argi < sc->num_arguments(); argi++) {
+      Node* arg = sc->argument(argi);
+      switch (sc->mode(argi)) {
+        case StringConcat::IntMode: {
+          Node* end = __ AddI(start, string_sizes->in(argi));
+          // getChars words backwards so pass the ending point as well as the start
+          int_getChars(kit, arg, char_array, start, end);
+          start = end;
+          break;
+        }
+        case StringConcat::StringNullCheckMode:
+        case StringConcat::StringMode: {
+          start = copy_string(kit, arg, char_array, start);
+          break;
+        }
+        case StringConcat::CharMode: {
+          __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
+                             arg, T_CHAR, char_adr_idx);
+          start = __ AddI(start, __ intcon(1));
+          break;
+        }
+        default:
+          ShouldNotReachHere();
       }
-      case StringConcat::StringNullCheckMode:
-      case StringConcat::StringMode: {
-        start = copy_string(kit, arg, char_array, start);
-        break;
-      }
-      case StringConcat::CharMode: {
-        __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
-                           arg, T_CHAR, char_adr_idx);
-        start = __ AddI(start, __ intcon(1));
-        break;
-      }
-      default:
-        ShouldNotReachHere();
     }
-  }
 
-  // If we're not reusing an existing String allocation then allocate one here.
-  Node* result = sc->string_alloc();
-  if (result == NULL) {
-    PreserveReexecuteState preexecs(&kit);
-    // The original jvms is for an allocation of either a String or
-    // StringBuffer so no stack adjustment is necessary for proper
-    // reexecution.
-    kit.jvms()->set_should_reexecute(true);
-    result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
+    // If we're not reusing an existing String allocation then allocate one here.
+    result = sc->string_alloc();
+    if (result == NULL) {
+      PreserveReexecuteState preexecs(&kit);
+      // The original jvms is for an allocation of either a String or
+      // StringBuffer so no stack adjustment is necessary for proper
+      // reexecution.
+      kit.jvms()->set_should_reexecute(true);
+      result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
+    }
+
+    // Intialize the string
+    if (java_lang_String::has_offset_field()) {
+      kit.store_String_offset(kit.control(), result, __ intcon(0));
+      kit.store_String_length(kit.control(), result, length);
+    }
+    kit.store_String_value(kit.control(), result, char_array);
+  } else {
+    result = C->top();
   }
-
-  // Intialize the string
-  if (java_lang_String::has_offset_field()) {
-    kit.store_String_offset(kit.control(), result, __ intcon(0));
-    kit.store_String_length(kit.control(), result, length);
-  }
-  kit.store_String_value(kit.control(), result, char_array);
-
   // hook up the outgoing control and result
   kit.replace_call(sc->end(), result);
 
--- a/hotspot/src/share/vm/prims/methodHandles.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/prims/methodHandles.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -1168,8 +1168,8 @@
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
+    java_lang_invoke_CallSite::set_target(call_site(), target());
   }
-  java_lang_invoke_CallSite::set_target(call_site(), target());
 }
 JVM_END
 
@@ -1180,8 +1180,8 @@
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
+    java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
   }
-  java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
 }
 JVM_END
 
--- a/hotspot/src/share/vm/prims/unsafe.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/prims/unsafe.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -468,6 +468,21 @@
 #endif
 UNSAFE_END
 
+UNSAFE_ENTRY(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_LoadFence");
+  OrderAccess::acquire();
+UNSAFE_END
+
+UNSAFE_ENTRY(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_StoreFence");
+  OrderAccess::release();
+UNSAFE_END
+
+UNSAFE_ENTRY(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe))
+  UnsafeWrapper("Unsafe_FullFence");
+  OrderAccess::fence();
+UNSAFE_END
+
 ////// Data in the C heap.
 
 // Note:  These do not throw NullPointerException for bad pointers.
@@ -1550,6 +1565,9 @@
     {CC"putOrderedObject",   CC"("OBJ"J"OBJ")V",         FN_PTR(Unsafe_SetOrderedObject)},
     {CC"putOrderedInt",      CC"("OBJ"JI)V",             FN_PTR(Unsafe_SetOrderedInt)},
     {CC"putOrderedLong",     CC"("OBJ"JJ)V",             FN_PTR(Unsafe_SetOrderedLong)},
+    {CC"loadFence",          CC"()V",                    FN_PTR(Unsafe_LoadFence)},
+    {CC"storeFence",         CC"()V",                    FN_PTR(Unsafe_StoreFence)},
+    {CC"fullFence",          CC"()V",                    FN_PTR(Unsafe_FullFence)},
     {CC"park",               CC"(ZJ)V",                  FN_PTR(Unsafe_Park)},
     {CC"unpark",             CC"("OBJ")V",               FN_PTR(Unsafe_Unpark)}
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Fri Dec 21 01:39:34 2012 -0800
@@ -922,6 +922,9 @@
   develop(bool, PrintExceptionHandlers, false,                              \
           "Print exception handler tables for all nmethods when generated") \
                                                                             \
+  develop(bool, StressCompiledExceptionHandlers, false,                     \
+         "Exercise compiled exception handlers")                            \
+                                                                            \
   develop(bool, InterceptOSException, false,                                \
           "Starts debugger when an implicit OS (e.g., NULL) "               \
           "exception happens")                                              \
--- a/hotspot/src/share/vm/runtime/thread.cpp	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Fri Dec 21 01:39:34 2012 -0800
@@ -2190,7 +2190,7 @@
           // BiasedLocking needs an updated RegisterMap for the revoke monitors pass
           RegisterMap reg_map(this, UseBiasedLocking);
           frame compiled_frame = f.sender(&reg_map);
-          if (compiled_frame.can_be_deoptimized()) {
+          if (!StressCompiledExceptionHandlers && compiled_frame.can_be_deoptimized()) {
             Deoptimization::deoptimize(this, compiled_frame, &reg_map);
           }
         }
--- a/hotspot/test/compiler/7184394/TestAESBase.java	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/test/compiler/7184394/TestAESBase.java	Fri Dec 21 01:39:34 2012 -0800
@@ -54,7 +54,6 @@
   String paddingStr = "PKCS5Padding";
   AlgorithmParameters algParams;
   SecretKey key;
-  int ivLen;
 
   static int numThreads = 0;
   int  threadId;
@@ -68,7 +67,7 @@
 
   public void prepare() {
     try {
-    System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput);
+    System.out.println("\nalgorithm=" + algorithm + ", mode=" + mode + ", msgSize=" + msgSize + ", keySize=" + keySize + ", noReinit=" + noReinit + ", checkOutput=" + checkOutput);
 
       int keyLenBytes = (keySize == 0 ? 16 : keySize/8);
       byte keyBytes[] = new byte[keyLenBytes];
@@ -90,10 +89,14 @@
       cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
       dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
 
-      ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
-      IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
-
-      cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+      if (mode.equals("CBC")) {
+        int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
+        IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
+        cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
+      } else {
+        algParams = cipher.getParameters();
+        cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
+      }
       algParams = cipher.getParameters();
       dCipher.init(Cipher.DECRYPT_MODE, key, algParams);
       if (threadId == 0) {
--- a/hotspot/test/compiler/7184394/TestAESMain.java	Wed Dec 19 16:10:19 2012 -0800
+++ b/hotspot/test/compiler/7184394/TestAESMain.java	Fri Dec 21 01:39:34 2012 -0800
@@ -27,7 +27,8 @@
  * @bug 7184394
  * @summary add intrinsics to use AES instructions
  *
- * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CBC TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB TestAESMain
  *
  * @author Tom Deneau
  */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/8004741/Test8004741.java	Fri Dec 21 01:39:34 2012 -0800
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test Test8004741.java
+ * @bug 8004741
+ * @summary Missing compiled exception handle table entry for multidimensional array allocation
+ * @run main/othervm -Xmx64m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:+StressCompiledExceptionHandlers Test8004741
+ *
+ */
+
+import java.util.*;
+
+public class Test8004741 extends Thread {
+
+  static int[][] test(int a, int b) throws Exception {
+    int[][] ar = null;
+    try {
+      ar = new int[a][b];
+    } catch (Error e) {
+      System.out.println("test got Error");
+      passed = true;
+      throw(e);
+    } catch (Exception e) {
+      System.out.println("test got Exception");
+      throw(e);
+    }
+    return ar;
+  }
+
+  static boolean passed = false;
+
+  public void run() {
+      System.out.println("test started");
+      try {
+        while(true) {
+          test(2,20000);
+        }
+      } catch (ThreadDeath e) {
+        System.out.println("test got ThreadDeath");
+        passed = true;
+      } catch (Error e) {
+        e.printStackTrace();
+        System.out.println("test got Error");
+      } catch (Exception e) {
+        e.printStackTrace();
+        System.out.println("test got Exception");
+      }
+  }
+
+  public static void main(String[] args) throws Exception {
+    for (int n = 0; n < 11000; n++) {
+      test(2, 20);
+    }
+
+    // First test exception catch
+    Test8004741 t = new Test8004741();
+
+    passed = false;
+    t.start();
+    Thread.sleep(1000);
+    t.stop();
+
+    Thread.sleep(5000);
+    t.join();
+    if (passed) {
+      System.out.println("PASSED");
+    } else {
+      System.out.println("FAILED");
+      System.exit(97);
+    }
+  }
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/8005033/Test8005033.java	Fri Dec 21 01:39:34 2012 -0800
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2012 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8005033
+ * @summary On sparcv9, C2's intrinsic for Integer.bitCount(OV) returns wrong result if OV is the result of an operation with int overflow.
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test8005033::testBitCount Test8005033
+ * @author Richard Reingruber richard DOT reingruber AT sap DOT com
+ */
+
+public class Test8005033 {
+    public static int MINUS_ONE = -1;
+
+    public static void main(String[] args) {
+        System.out.println("EXECUTING test.");
+        Integer.bitCount(1);   // load class
+        int expectedBitCount = 0;
+        int calculatedBitCount = testBitCount();
+        if (expectedBitCount != calculatedBitCount) {
+            throw new InternalError("got " + calculatedBitCount + " but expected " + expectedBitCount);
+        }
+        System.out.println("SUCCESSFULLY passed test.");
+    }
+
+    // testBitCount will be compiled using the Integer.bitCount() intrinsic if possible
+    private static int testBitCount() {
+        return Integer.bitCount(MINUS_ONE+1);   // -1 + 1 => int overflow
+    }
+}