hotspot/src/cpu/x86/vm/templateTable_x86.cpp
changeset 32391 01e2f5e916c7
parent 30132 1f788eb36811
child 32400 ed1a43020a93
equal deleted inserted replaced
32371:8815f2d1447f 32391:01e2f5e916c7
   347 
   347 
   348 
   348 
   349 
   349 
   350 void TemplateTable::fconst(int value) {
   350 void TemplateTable::fconst(int value) {
   351   transition(vtos, ftos);
   351   transition(vtos, ftos);
       
   352   if (UseSSE >= 1) {
       
   353     static float one = 1.0f, two = 2.0f;
       
   354     switch (value) {
       
   355     case 0:
       
   356       __ xorps(xmm0, xmm0);
       
   357       break;
       
   358     case 1:
       
   359       __ movflt(xmm0, ExternalAddress((address) &one));
       
   360       break;
       
   361     case 2:
       
   362       __ movflt(xmm0, ExternalAddress((address) &two));
       
   363       break;
       
   364     default:
       
   365       ShouldNotReachHere();
       
   366       break;
       
   367     }
       
   368   } else {
   352 #ifdef _LP64
   369 #ifdef _LP64
   353   static float one = 1.0f, two = 2.0f;
       
   354   switch (value) {
       
   355   case 0:
       
   356     __ xorps(xmm0, xmm0);
       
   357     break;
       
   358   case 1:
       
   359     __ movflt(xmm0, ExternalAddress((address) &one));
       
   360     break;
       
   361   case 2:
       
   362     __ movflt(xmm0, ExternalAddress((address) &two));
       
   363     break;
       
   364   default:
       
   365     ShouldNotReachHere();
   370     ShouldNotReachHere();
   366     break;
       
   367   }
       
   368 #else
   371 #else
   369          if (value == 0) { __ fldz();
   372            if (value == 0) { __ fldz();
   370   } else if (value == 1) { __ fld1();
   373     } else if (value == 1) { __ fld1();
   371   } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
   374     } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
   372   } else                 { ShouldNotReachHere();
   375     } else                 { ShouldNotReachHere();
   373   }
   376     }
   374 #endif
   377 #endif // _LP64
       
   378   }
   375 }
   379 }
   376 
   380 
   377 void TemplateTable::dconst(int value) {
   381 void TemplateTable::dconst(int value) {
   378   transition(vtos, dtos);
   382   transition(vtos, dtos);
       
   383   if (UseSSE >= 2) {
       
   384     static double one = 1.0;
       
   385     switch (value) {
       
   386     case 0:
       
   387       __ xorpd(xmm0, xmm0);
       
   388       break;
       
   389     case 1:
       
   390       __ movdbl(xmm0, ExternalAddress((address) &one));
       
   391       break;
       
   392     default:
       
   393       ShouldNotReachHere();
       
   394       break;
       
   395     }
       
   396   } else {
   379 #ifdef _LP64
   397 #ifdef _LP64
   380   static double one = 1.0;
       
   381   switch (value) {
       
   382   case 0:
       
   383     __ xorpd(xmm0, xmm0);
       
   384     break;
       
   385   case 1:
       
   386     __ movdbl(xmm0, ExternalAddress((address) &one));
       
   387     break;
       
   388   default:
       
   389     ShouldNotReachHere();
   398     ShouldNotReachHere();
   390     break;
       
   391   }
       
   392 
       
   393 #else
   399 #else
   394          if (value == 0) { __ fldz();
   400            if (value == 0) { __ fldz();
   395   } else if (value == 1) { __ fld1();
   401     } else if (value == 1) { __ fld1();
   396   } else                 { ShouldNotReachHere();
   402     } else                 { ShouldNotReachHere();
   397   }
   403     }
   398 #endif
   404 #endif
       
   405   }
   399 }
   406 }
   400 
   407 
   401 void TemplateTable::bipush() {
   408 void TemplateTable::bipush() {
   402   transition(vtos, itos);
   409   transition(vtos, itos);
   403   __ load_signed_byte(rax, at_bcp(1));
   410   __ load_signed_byte(rax, at_bcp(1));
   452   __ bind(notClass);
   459   __ bind(notClass);
   453   __ cmpl(rdx, JVM_CONSTANT_Float);
   460   __ cmpl(rdx, JVM_CONSTANT_Float);
   454   __ jccb(Assembler::notEqual, notFloat);
   461   __ jccb(Assembler::notEqual, notFloat);
   455 
   462 
   456   // ftos
   463   // ftos
   457   LP64_ONLY(__ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset)));
   464   __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset));
   458   NOT_LP64(__ fld_s(    Address(rcx, rbx, Address::times_ptr, base_offset)));
       
   459   __ push(ftos);
   465   __ push(ftos);
   460   __ jmp(Done);
   466   __ jmp(Done);
   461 
   467 
   462   __ bind(notFloat);
   468   __ bind(notFloat);
   463 #ifdef ASSERT
   469 #ifdef ASSERT
   520   __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
   526   __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
   521           JVM_CONSTANT_Double);
   527           JVM_CONSTANT_Double);
   522   __ jccb(Assembler::notEqual, Long);
   528   __ jccb(Assembler::notEqual, Long);
   523 
   529 
   524   // dtos
   530   // dtos
   525   LP64_ONLY(__ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset)));
   531   __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset));
   526   NOT_LP64(__ fld_d(    Address(rcx, rbx, Address::times_ptr, base_offset)));
       
   527   __ push(dtos);
   532   __ push(dtos);
   528 
   533 
   529   __ jmpb(Done);
   534   __ jmpb(Done);
   530   __ bind(Long);
   535   __ bind(Long);
   531 
   536 
   615 }
   620 }
   616 
   621 
   617 void TemplateTable::fload() {
   622 void TemplateTable::fload() {
   618   transition(vtos, ftos);
   623   transition(vtos, ftos);
   619   locals_index(rbx);
   624   locals_index(rbx);
   620   LP64_ONLY(__ movflt(xmm0, faddress(rbx)));
   625   __ load_float(faddress(rbx));
   621   NOT_LP64(__ fld_s(faddress(rbx)));
       
   622 }
   626 }
   623 
   627 
   624 void TemplateTable::dload() {
   628 void TemplateTable::dload() {
   625   transition(vtos, dtos);
   629   transition(vtos, dtos);
   626   locals_index(rbx);
   630   locals_index(rbx);
   627   LP64_ONLY(__ movdbl(xmm0, daddress(rbx)));
   631   __ load_double(daddress(rbx));
   628   NOT_LP64(__ fld_d(daddress(rbx)));
       
   629 }
   632 }
   630 
   633 
   631 void TemplateTable::aload() {
   634 void TemplateTable::aload() {
   632   transition(vtos, atos);
   635   transition(vtos, atos);
   633   locals_index(rbx);
   636   locals_index(rbx);
   655 }
   658 }
   656 
   659 
   657 void TemplateTable::wide_fload() {
   660 void TemplateTable::wide_fload() {
   658   transition(vtos, ftos);
   661   transition(vtos, ftos);
   659   locals_index_wide(rbx);
   662   locals_index_wide(rbx);
   660   LP64_ONLY(__ movflt(xmm0, faddress(rbx)));
   663   __ load_float(faddress(rbx));
   661   NOT_LP64(__ fld_s(faddress(rbx)));
       
   662 }
   664 }
   663 
   665 
   664 void TemplateTable::wide_dload() {
   666 void TemplateTable::wide_dload() {
   665   transition(vtos, dtos);
   667   transition(vtos, dtos);
   666   locals_index_wide(rbx);
   668   locals_index_wide(rbx);
   667   LP64_ONLY(__ movdbl(xmm0, daddress(rbx)));
   669   __ load_double(daddress(rbx));
   668   NOT_LP64(__ fld_d(daddress(rbx)));
       
   669 }
   670 }
   670 
   671 
   671 void TemplateTable::wide_aload() {
   672 void TemplateTable::wide_aload() {
   672   transition(vtos, atos);
   673   transition(vtos, atos);
   673   locals_index_wide(rbx);
   674   locals_index_wide(rbx);
   724 void TemplateTable::faload() {
   725 void TemplateTable::faload() {
   725   transition(itos, ftos);
   726   transition(itos, ftos);
   726   // rax: index
   727   // rax: index
   727   // rdx: array
   728   // rdx: array
   728   index_check(rdx, rax); // kills rbx
   729   index_check(rdx, rax); // kills rbx
   729   LP64_ONLY(__ movflt(xmm0, Address(rdx, rax,
   730   __ load_float(Address(rdx, rax,
   730                          Address::times_4,
   731                         Address::times_4,
   731                          arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
   732                         arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
   732   NOT_LP64(__ fld_s(Address(rdx, rax, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
       
   733 }
   733 }
   734 
   734 
   735 void TemplateTable::daload() {
   735 void TemplateTable::daload() {
   736   transition(itos, dtos);
   736   transition(itos, dtos);
   737   // rax: index
   737   // rax: index
   738   // rdx: array
   738   // rdx: array
   739   index_check(rdx, rax); // kills rbx
   739   index_check(rdx, rax); // kills rbx
   740   LP64_ONLY(__ movdbl(xmm0, Address(rdx, rax,
   740   __ load_double(Address(rdx, rax,
   741                           Address::times_8,
   741                          Address::times_8,
   742                           arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
   742                          arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
   743   NOT_LP64(__ fld_d(Address(rdx, rax, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
       
   744 }
   743 }
   745 
   744 
   746 void TemplateTable::aaload() {
   745 void TemplateTable::aaload() {
   747   transition(itos, atos);
   746   transition(itos, atos);
   748   // rax: index
   747   // rax: index
   805   NOT_LP64(__ movptr(rdx, haddress(n)));
   804   NOT_LP64(__ movptr(rdx, haddress(n)));
   806 }
   805 }
   807 
   806 
   808 void TemplateTable::fload(int n) {
   807 void TemplateTable::fload(int n) {
   809   transition(vtos, ftos);
   808   transition(vtos, ftos);
   810   LP64_ONLY(__ movflt(xmm0, faddress(n)));
   809   __ load_float(faddress(n));
   811   NOT_LP64(__ fld_s(faddress(n)));
       
   812 }
   810 }
   813 
   811 
   814 void TemplateTable::dload(int n) {
   812 void TemplateTable::dload(int n) {
   815   transition(vtos, dtos);
   813   transition(vtos, dtos);
   816   LP64_ONLY(__ movdbl(xmm0, daddress(n)));
   814   __ load_double(daddress(n));
   817   NOT_LP64(__ fld_d(daddress(n)));
       
   818 }
   815 }
   819 
   816 
   820 void TemplateTable::aload(int n) {
   817 void TemplateTable::aload(int n) {
   821   transition(vtos, atos);
   818   transition(vtos, atos);
   822   __ movptr(rax, aaddress(n));
   819   __ movptr(rax, aaddress(n));
   917 }
   914 }
   918 
   915 
   919 void TemplateTable::fstore() {
   916 void TemplateTable::fstore() {
   920   transition(ftos, vtos);
   917   transition(ftos, vtos);
   921   locals_index(rbx);
   918   locals_index(rbx);
   922   LP64_ONLY(__ movflt(faddress(rbx), xmm0));
   919   __ store_float(faddress(rbx));
   923   NOT_LP64(__ fstp_s(faddress(rbx)));
       
   924 }
   920 }
   925 
   921 
   926 void TemplateTable::dstore() {
   922 void TemplateTable::dstore() {
   927   transition(dtos, vtos);
   923   transition(dtos, vtos);
   928   locals_index(rbx);
   924   locals_index(rbx);
   929   LP64_ONLY(__ movdbl(daddress(rbx), xmm0));
   925   __ store_double(daddress(rbx));
   930   NOT_LP64(__ fstp_d(daddress(rbx)));
       
   931 }
   926 }
   932 
   927 
   933 void TemplateTable::astore() {
   928 void TemplateTable::astore() {
   934   transition(vtos, vtos);
   929   transition(vtos, vtos);
   935   __ pop_ptr(rax);
   930   __ pop_ptr(rax);
   954 }
   949 }
   955 
   950 
   956 void TemplateTable::wide_fstore() {
   951 void TemplateTable::wide_fstore() {
   957 #ifdef _LP64
   952 #ifdef _LP64
   958   transition(vtos, vtos);
   953   transition(vtos, vtos);
   959   __ pop_f();
   954   __ pop_f(xmm0);
   960   locals_index_wide(rbx);
   955   locals_index_wide(rbx);
   961   __ movflt(faddress(rbx), xmm0);
   956   __ movflt(faddress(rbx), xmm0);
   962 #else
   957 #else
   963   wide_istore();
   958   wide_istore();
   964 #endif
   959 #endif
   965 }
   960 }
   966 
   961 
   967 void TemplateTable::wide_dstore() {
   962 void TemplateTable::wide_dstore() {
   968 #ifdef _LP64
   963 #ifdef _LP64
   969   transition(vtos, vtos);
   964   transition(vtos, vtos);
   970   __ pop_d();
   965   __ pop_d(xmm0);
   971   locals_index_wide(rbx);
   966   locals_index_wide(rbx);
   972   __ movdbl(daddress(rbx), xmm0);
   967   __ movdbl(daddress(rbx), xmm0);
   973 #else
   968 #else
   974   wide_lstore();
   969   wide_lstore();
   975 #endif
   970 #endif
  1009 
  1004 
  1010 
  1005 
  1011 void TemplateTable::fastore() {
  1006 void TemplateTable::fastore() {
  1012   transition(ftos, vtos);
  1007   transition(ftos, vtos);
  1013   __ pop_i(rbx);
  1008   __ pop_i(rbx);
  1014   // xmm0: value
  1009   // value is in UseSSE >= 1 ? xmm0 : ST(0)
  1015   // rbx:  index
  1010   // rbx:  index
  1016   // rdx:  array
  1011   // rdx:  array
  1017   index_check(rdx, rbx); // prefer index in rbx
  1012   index_check(rdx, rbx); // prefer index in rbx
  1018   LP64_ONLY(__ movflt(Address(rdx, rbx,
  1013   __ store_float(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
  1019                    Address::times_4,
       
  1020                    arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
       
  1021            xmm0));
       
  1022   NOT_LP64(__ fstp_s(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT))));
       
  1023 }
  1014 }
  1024 
  1015 
  1025 void TemplateTable::dastore() {
  1016 void TemplateTable::dastore() {
  1026   transition(dtos, vtos);
  1017   transition(dtos, vtos);
  1027   __ pop_i(rbx);
  1018   __ pop_i(rbx);
  1028   // xmm0: value
  1019   // value is in UseSSE >= 2 ? xmm0 : ST(0)
  1029   // rbx:  index
  1020   // rbx:  index
  1030   // rdx:  array
  1021   // rdx:  array
  1031   index_check(rdx, rbx); // prefer index in rbx
  1022   index_check(rdx, rbx); // prefer index in rbx
  1032   LP64_ONLY(__ movdbl(Address(rdx, rbx,
  1023   __ store_double(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
  1033                    Address::times_8,
       
  1034                    arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
       
  1035            xmm0));
       
  1036   NOT_LP64(__ fstp_d(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))));
       
  1037 }
  1024 }
  1038 
  1025 
  1039 void TemplateTable::aastore() {
  1026 void TemplateTable::aastore() {
  1040   Label is_null, ok_is_subtype, done;
  1027   Label is_null, ok_is_subtype, done;
  1041   transition(vtos, vtos);
  1028   transition(vtos, vtos);
  1132   NOT_LP64(__ movptr(haddress(n), rdx));
  1119   NOT_LP64(__ movptr(haddress(n), rdx));
  1133 }
  1120 }
  1134 
  1121 
  1135 void TemplateTable::fstore(int n) {
  1122 void TemplateTable::fstore(int n) {
  1136   transition(ftos, vtos);
  1123   transition(ftos, vtos);
  1137   LP64_ONLY(__ movflt(faddress(n), xmm0));
  1124   __ store_float(faddress(n));
  1138   NOT_LP64(__ fstp_s(faddress(n)));
       
  1139 }
  1125 }
  1140 
  1126 
  1141 void TemplateTable::dstore(int n) {
  1127 void TemplateTable::dstore(int n) {
  1142   transition(dtos, vtos);
  1128   transition(dtos, vtos);
  1143   LP64_ONLY(__ movdbl(daddress(n), xmm0));
  1129   __ store_double(daddress(n));
  1144   NOT_LP64(__ fstp_d(daddress(n)));
       
  1145 }
  1130 }
  1146 
  1131 
  1147 
  1132 
  1148 void TemplateTable::astore(int n) {
  1133 void TemplateTable::astore(int n) {
  1149   transition(vtos, vtos);
  1134   transition(vtos, vtos);
  1423 #endif
  1408 #endif
  1424 }
  1409 }
  1425 
  1410 
  1426 void TemplateTable::fop2(Operation op) {
  1411 void TemplateTable::fop2(Operation op) {
  1427   transition(ftos, ftos);
  1412   transition(ftos, ftos);
       
  1413 
       
  1414   if (UseSSE >= 1) {
       
  1415     switch (op) {
       
  1416     case add:
       
  1417       __ addss(xmm0, at_rsp());
       
  1418       __ addptr(rsp, Interpreter::stackElementSize);
       
  1419       break;
       
  1420     case sub:
       
  1421       __ movflt(xmm1, xmm0);
       
  1422       __ pop_f(xmm0);
       
  1423       __ subss(xmm0, xmm1);
       
  1424       break;
       
  1425     case mul:
       
  1426       __ mulss(xmm0, at_rsp());
       
  1427       __ addptr(rsp, Interpreter::stackElementSize);
       
  1428       break;
       
  1429     case div:
       
  1430       __ movflt(xmm1, xmm0);
       
  1431       __ pop_f(xmm0);
       
  1432       __ divss(xmm0, xmm1);
       
  1433       break;
       
  1434     case rem:
       
  1435       // On x86_64 platforms the SharedRuntime::frem method is called to perform the
       
  1436       // modulo operation. The frem method calls the function
       
  1437       // double fmod(double x, double y) in math.h. The documentation of fmod states:
       
  1438       // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN
       
  1439       // (signalling or quiet) is returned.
       
  1440       //
       
  1441       // On x86_32 platforms the FPU is used to perform the modulo operation. The
       
  1442       // reason is that on 32-bit Windows the sign of modulo operations diverges from
       
  1443       // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f).
       
  1444       // The fprem instruction used on x86_32 is functionally equivalent to
       
  1445       // SharedRuntime::frem in that it returns a NaN.
  1428 #ifdef _LP64
  1446 #ifdef _LP64
  1429   switch (op) {
  1447       __ movflt(xmm1, xmm0);
  1430   case add:
  1448       __ pop_f(xmm0);
  1431     __ addss(xmm0, at_rsp());
  1449       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
  1432     __ addptr(rsp, Interpreter::stackElementSize);
  1450 #else
  1433     break;
  1451       __ push_f(xmm0);
  1434   case sub:
  1452       __ pop_f();
  1435     __ movflt(xmm1, xmm0);
  1453       __ fld_s(at_rsp());
  1436     __ pop_f(xmm0);
  1454       __ fremr(rax);
  1437     __ subss(xmm0, xmm1);
  1455       __ f2ieee();
  1438     break;
  1456       __ pop(rax);  // pop second operand off the stack
  1439   case mul:
  1457       __ push_f();
  1440     __ mulss(xmm0, at_rsp());
  1458       __ pop_f(xmm0);
  1441     __ addptr(rsp, Interpreter::stackElementSize);
  1459 #endif
  1442     break;
  1460       break;
  1443   case div:
  1461     default:
  1444     __ movflt(xmm1, xmm0);
  1462       ShouldNotReachHere();
  1445     __ pop_f(xmm0);
  1463       break;
  1446     __ divss(xmm0, xmm1);
  1464     }
  1447     break;
  1465   } else {
  1448   case rem:
  1466 #ifdef _LP64
  1449     __ movflt(xmm1, xmm0);
       
  1450     __ pop_f(xmm0);
       
  1451     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
       
  1452     break;
       
  1453   default:
       
  1454     ShouldNotReachHere();
  1467     ShouldNotReachHere();
  1455     break;
       
  1456   }
       
  1457 #else
  1468 #else
  1458   switch (op) {
  1469     switch (op) {
  1459     case add: __ fadd_s (at_rsp());                break;
  1470     case add: __ fadd_s (at_rsp());                break;
  1460     case sub: __ fsubr_s(at_rsp());                break;
  1471     case sub: __ fsubr_s(at_rsp());                break;
  1461     case mul: __ fmul_s (at_rsp());                break;
  1472     case mul: __ fmul_s (at_rsp());                break;
  1462     case div: __ fdivr_s(at_rsp());                break;
  1473     case div: __ fdivr_s(at_rsp());                break;
  1463     case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
  1474     case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
  1464     default : ShouldNotReachHere();
  1475     default : ShouldNotReachHere();
  1465   }
  1476     }
  1466   __ f2ieee();
  1477     __ f2ieee();
  1467   __ pop(rax);  // pop float thing off
  1478     __ pop(rax);  // pop second operand off the stack
  1468 #endif
  1479 #endif // _LP64
       
  1480   }
  1469 }
  1481 }
  1470 
  1482 
  1471 void TemplateTable::dop2(Operation op) {
  1483 void TemplateTable::dop2(Operation op) {
  1472   transition(dtos, dtos);
  1484   transition(dtos, dtos);
       
  1485   if (UseSSE >= 2) {
       
  1486     switch (op) {
       
  1487     case add:
       
  1488       __ addsd(xmm0, at_rsp());
       
  1489       __ addptr(rsp, 2 * Interpreter::stackElementSize);
       
  1490       break;
       
  1491     case sub:
       
  1492       __ movdbl(xmm1, xmm0);
       
  1493       __ pop_d(xmm0);
       
  1494       __ subsd(xmm0, xmm1);
       
  1495       break;
       
  1496     case mul:
       
  1497       __ mulsd(xmm0, at_rsp());
       
  1498       __ addptr(rsp, 2 * Interpreter::stackElementSize);
       
  1499       break;
       
  1500     case div:
       
  1501       __ movdbl(xmm1, xmm0);
       
  1502       __ pop_d(xmm0);
       
  1503       __ divsd(xmm0, xmm1);
       
  1504       break;
       
  1505     case rem:
       
  1506       // Similar to fop2(), the modulo operation is performed using the
       
  1507       // SharedRuntime::drem method (on x86_64 platforms) or using the
       
  1508       // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2().
  1473 #ifdef _LP64
  1509 #ifdef _LP64
  1474   switch (op) {
  1510       __ movdbl(xmm1, xmm0);
  1475   case add:
  1511       __ pop_d(xmm0);
  1476     __ addsd(xmm0, at_rsp());
  1512       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
  1477     __ addptr(rsp, 2 * Interpreter::stackElementSize);
  1513 #else
  1478     break;
  1514       __ push_d(xmm0);
  1479   case sub:
  1515       __ pop_d();
  1480     __ movdbl(xmm1, xmm0);
  1516       __ fld_d(at_rsp());
  1481     __ pop_d(xmm0);
  1517       __ fremr(rax);
  1482     __ subsd(xmm0, xmm1);
  1518       __ d2ieee();
  1483     break;
  1519       __ pop(rax);
  1484   case mul:
  1520       __ pop(rdx);
  1485     __ mulsd(xmm0, at_rsp());
  1521       __ push_d();
  1486     __ addptr(rsp, 2 * Interpreter::stackElementSize);
  1522       __ pop_d(xmm0);
  1487     break;
  1523 #endif
  1488   case div:
  1524       break;
  1489     __ movdbl(xmm1, xmm0);
  1525     default:
  1490     __ pop_d(xmm0);
  1526       ShouldNotReachHere();
  1491     __ divsd(xmm0, xmm1);
  1527       break;
  1492     break;
  1528     }
  1493   case rem:
  1529   } else {
  1494     __ movdbl(xmm1, xmm0);
  1530 #ifdef _LP64
  1495     __ pop_d(xmm0);
       
  1496     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
       
  1497     break;
       
  1498   default:
       
  1499     ShouldNotReachHere();
  1531     ShouldNotReachHere();
  1500     break;
       
  1501   }
       
  1502 #else
  1532 #else
  1503   switch (op) {
  1533     switch (op) {
  1504     case add: __ fadd_d (at_rsp());                break;
  1534     case add: __ fadd_d (at_rsp());                break;
  1505     case sub: __ fsubr_d(at_rsp());                break;
  1535     case sub: __ fsubr_d(at_rsp());                break;
  1506     case mul: {
  1536     case mul: {
  1507       Label L_strict;
  1537       Label L_strict;
  1508       Label L_join;
  1538       Label L_join;
  1541       __ bind(L_join);
  1571       __ bind(L_join);
  1542       break;
  1572       break;
  1543     }
  1573     }
  1544     case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
  1574     case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
  1545     default : ShouldNotReachHere();
  1575     default : ShouldNotReachHere();
  1546   }
  1576     }
  1547   __ d2ieee();
  1577     __ d2ieee();
  1548   // Pop double precision number from rsp.
  1578     // Pop double precision number from rsp.
  1549   __ pop(rax);
  1579     __ pop(rax);
  1550   __ pop(rdx);
  1580     __ pop(rdx);
  1551 #endif
  1581 #endif
       
  1582   }
  1552 }
  1583 }
  1553 
  1584 
  1554 void TemplateTable::ineg() {
  1585 void TemplateTable::ineg() {
  1555   transition(itos, itos);
  1586   transition(itos, itos);
  1556   __ negl(rax);
  1587   __ negl(rax);
  1560   transition(ltos, ltos);
  1591   transition(ltos, ltos);
  1561   LP64_ONLY(__ negq(rax));
  1592   LP64_ONLY(__ negq(rax));
  1562   NOT_LP64(__ lneg(rdx, rax));
  1593   NOT_LP64(__ lneg(rdx, rax));
  1563 }
  1594 }
  1564 
  1595 
  1565 #ifdef _LP64
       
  1566 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  1596 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  1567 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  1597 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  1568   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  1598   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  1569   // of 128-bits operands for SSE instructions.
  1599   // of 128-bits operands for SSE instructions.
  1570   jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
  1600   jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
  1575 }
  1605 }
  1576 
  1606 
  1577 // Buffer for 128-bits masks used by SSE instructions.
  1607 // Buffer for 128-bits masks used by SSE instructions.
  1578 static jlong float_signflip_pool[2*2];
  1608 static jlong float_signflip_pool[2*2];
  1579 static jlong double_signflip_pool[2*2];
  1609 static jlong double_signflip_pool[2*2];
  1580 #endif
       
  1581 
  1610 
  1582 void TemplateTable::fneg() {
  1611 void TemplateTable::fneg() {
  1583   transition(ftos, ftos);
  1612   transition(ftos, ftos);
  1584 #ifdef _LP64
  1613   if (UseSSE >= 1) {
  1585   static jlong *float_signflip  = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
  1614     static jlong *float_signflip  = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
  1586   __ xorps(xmm0, ExternalAddress((address) float_signflip));
  1615     __ xorps(xmm0, ExternalAddress((address) float_signflip));
  1587 #else
  1616   } else {
  1588   __ fchs();
  1617     LP64_ONLY(ShouldNotReachHere());
  1589 #endif
  1618     NOT_LP64(__ fchs());
       
  1619   }
  1590 }
  1620 }
  1591 
  1621 
  1592 void TemplateTable::dneg() {
  1622 void TemplateTable::dneg() {
  1593   transition(dtos, dtos);
  1623   transition(dtos, dtos);
       
  1624   if (UseSSE >= 2) {
       
  1625     static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
       
  1626     __ xorpd(xmm0, ExternalAddress((address) double_signflip));
       
  1627   } else {
  1594 #ifdef _LP64
  1628 #ifdef _LP64
  1595   static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
  1629     ShouldNotReachHere();
  1596   __ xorpd(xmm0, ExternalAddress((address) double_signflip));
       
  1597 #else
  1630 #else
  1598   __ fchs();
  1631     __ fchs();
  1599 #endif
  1632 #endif
       
  1633   }
  1600 }
  1634 }
  1601 
  1635 
  1602 void TemplateTable::iinc() {
  1636 void TemplateTable::iinc() {
  1603   transition(vtos, vtos);
  1637   transition(vtos, vtos);
  1604   __ load_signed_byte(rdx, at_bcp(2)); // get constant
  1638   __ load_signed_byte(rdx, at_bcp(2)); // get constant
  1796   switch (bytecode()) {
  1830   switch (bytecode()) {
  1797     case Bytecodes::_i2l:
  1831     case Bytecodes::_i2l:
  1798       __ extend_sign(rdx, rax);
  1832       __ extend_sign(rdx, rax);
  1799       break;
  1833       break;
  1800     case Bytecodes::_i2f:
  1834     case Bytecodes::_i2f:
  1801       __ push(rax);          // store int on tos
  1835       if (UseSSE >= 1) {
  1802       __ fild_s(at_rsp());   // load int to ST0
  1836         __ cvtsi2ssl(xmm0, rax);
  1803       __ f2ieee();           // truncate to float size
  1837       } else {
  1804       __ pop(rcx);           // adjust rsp
  1838         __ push(rax);          // store int on tos
       
  1839         __ fild_s(at_rsp());   // load int to ST0
       
  1840         __ f2ieee();           // truncate to float size
       
  1841         __ pop(rcx);           // adjust rsp
       
  1842       }
  1805       break;
  1843       break;
  1806     case Bytecodes::_i2d:
  1844     case Bytecodes::_i2d:
       
  1845       if (UseSSE >= 2) {
       
  1846         __ cvtsi2sdl(xmm0, rax);
       
  1847       } else {
  1807       __ push(rax);          // add one slot for d2ieee()
  1848       __ push(rax);          // add one slot for d2ieee()
  1808       __ push(rax);          // store int on tos
  1849       __ push(rax);          // store int on tos
  1809       __ fild_s(at_rsp());   // load int to ST0
  1850       __ fild_s(at_rsp());   // load int to ST0
  1810       __ d2ieee();           // truncate to double size
  1851       __ d2ieee();           // truncate to double size
  1811       __ pop(rcx);           // adjust rsp
  1852       __ pop(rcx);           // adjust rsp
  1812       __ pop(rcx);
  1853       __ pop(rcx);
       
  1854       }
  1813       break;
  1855       break;
  1814     case Bytecodes::_i2b:
  1856     case Bytecodes::_i2b:
  1815       __ shll(rax, 24);      // truncate upper 24 bits
  1857       __ shll(rax, 24);      // truncate upper 24 bits
  1816       __ sarl(rax, 24);      // and sign-extend byte
  1858       __ sarl(rax, 24);      // and sign-extend byte
  1817       LP64_ONLY(__ movsbl(rax, rax));
  1859       LP64_ONLY(__ movsbl(rax, rax));
  1827       break;
  1869       break;
  1828     case Bytecodes::_l2i:
  1870     case Bytecodes::_l2i:
  1829       /* nothing to do */
  1871       /* nothing to do */
  1830       break;
  1872       break;
  1831     case Bytecodes::_l2f:
  1873     case Bytecodes::_l2f:
       
  1874       // On 64-bit platforms, the cvtsi2ssq instruction is used to convert
       
  1875       // 64-bit long values to floats. On 32-bit platforms it is not possible
       
  1876       // to use that instruction with 64-bit operands, therefore the FPU is
       
  1877       // used to perform the conversion.
  1832       __ push(rdx);          // store long on tos
  1878       __ push(rdx);          // store long on tos
  1833       __ push(rax);
  1879       __ push(rax);
  1834       __ fild_d(at_rsp());   // load long to ST0
  1880       __ fild_d(at_rsp());   // load long to ST0
  1835       __ f2ieee();           // truncate to float size
  1881       __ f2ieee();           // truncate to float size
  1836       __ pop(rcx);           // adjust rsp
  1882       __ pop(rcx);           // adjust rsp
  1837       __ pop(rcx);
  1883       __ pop(rcx);
       
  1884       if (UseSSE >= 1) {
       
  1885         __ push_f();
       
  1886         __ pop_f(xmm0);
       
  1887       }
  1838       break;
  1888       break;
  1839     case Bytecodes::_l2d:
  1889     case Bytecodes::_l2d:
       
  1890       // On 32-bit platforms the FPU is used for conversion because on
       
  1891       // 32-bit platforms it is not not possible to use the cvtsi2sdq
       
  1892       // instruction with 64-bit operands.
  1840       __ push(rdx);          // store long on tos
  1893       __ push(rdx);          // store long on tos
  1841       __ push(rax);
  1894       __ push(rax);
  1842       __ fild_d(at_rsp());   // load long to ST0
  1895       __ fild_d(at_rsp());   // load long to ST0
  1843       __ d2ieee();           // truncate to double size
  1896       __ d2ieee();           // truncate to double size
  1844       __ pop(rcx);           // adjust rsp
  1897       __ pop(rcx);           // adjust rsp
  1845       __ pop(rcx);
  1898       __ pop(rcx);
       
  1899       if (UseSSE >= 2) {
       
  1900         __ push_d();
       
  1901         __ pop_d(xmm0);
       
  1902       }
  1846       break;
  1903       break;
  1847     case Bytecodes::_f2i:
  1904     case Bytecodes::_f2i:
  1848       __ push(rcx);          // reserve space for argument
  1905       // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs
  1849       __ fstp_s(at_rsp());   // pass float argument on stack
  1906       // as it returns 0 for any NaN.
       
  1907       if (UseSSE >= 1) {
       
  1908         __ push_f(xmm0);
       
  1909       } else {
       
  1910         __ push(rcx);          // reserve space for argument
       
  1911         __ fstp_s(at_rsp());   // pass float argument on stack
       
  1912       }
  1850       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
  1913       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
  1851       break;
  1914       break;
  1852     case Bytecodes::_f2l:
  1915     case Bytecodes::_f2l:
  1853       __ push(rcx);          // reserve space for argument
  1916       // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs
  1854       __ fstp_s(at_rsp());   // pass float argument on stack
  1917       // as it returns 0 for any NaN.
       
  1918       if (UseSSE >= 1) {
       
  1919        __ push_f(xmm0);
       
  1920       } else {
       
  1921         __ push(rcx);          // reserve space for argument
       
  1922         __ fstp_s(at_rsp());   // pass float argument on stack
       
  1923       }
  1855       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
  1924       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
  1856       break;
  1925       break;
  1857     case Bytecodes::_f2d:
  1926     case Bytecodes::_f2d:
  1858       /* nothing to do */
  1927       if (UseSSE < 1) {
       
  1928         /* nothing to do */
       
  1929       } else if (UseSSE == 1) {
       
  1930         __ push_f(xmm0);
       
  1931         __ pop_f();
       
  1932       } else { // UseSSE >= 2
       
  1933         __ cvtss2sd(xmm0, xmm0);
       
  1934       }
  1859       break;
  1935       break;
  1860     case Bytecodes::_d2i:
  1936     case Bytecodes::_d2i:
  1861       __ push(rcx);          // reserve space for argument
  1937       if (UseSSE >= 2) {
  1862       __ push(rcx);
  1938         __ push_d(xmm0);
  1863       __ fstp_d(at_rsp());   // pass double argument on stack
  1939       } else {
       
  1940         __ push(rcx);          // reserve space for argument
       
  1941         __ push(rcx);
       
  1942         __ fstp_d(at_rsp());   // pass double argument on stack
       
  1943       }
  1864       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
  1944       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
  1865       break;
  1945       break;
  1866     case Bytecodes::_d2l:
  1946     case Bytecodes::_d2l:
  1867       __ push(rcx);          // reserve space for argument
  1947       if (UseSSE >= 2) {
  1868       __ push(rcx);
  1948         __ push_d(xmm0);
  1869       __ fstp_d(at_rsp());   // pass double argument on stack
  1949       } else {
       
  1950         __ push(rcx);          // reserve space for argument
       
  1951         __ push(rcx);
       
  1952         __ fstp_d(at_rsp());   // pass double argument on stack
       
  1953       }
  1870       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
  1954       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
  1871       break;
  1955       break;
  1872     case Bytecodes::_d2f:
  1956     case Bytecodes::_d2f:
  1873       __ push(rcx);          // reserve space for f2ieee()
  1957       if (UseSSE <= 1) {
  1874       __ f2ieee();           // truncate to float size
  1958         __ push(rcx);          // reserve space for f2ieee()
  1875       __ pop(rcx);           // adjust rsp
  1959         __ f2ieee();           // truncate to float size
       
  1960         __ pop(rcx);           // adjust rsp
       
  1961         if (UseSSE == 1) {
       
  1962           // The cvtsd2ss instruction is not available if UseSSE==1, therefore
       
  1963           // the conversion is performed using the FPU in this case.
       
  1964           __ push_f();
       
  1965           __ pop_f(xmm0);
       
  1966         }
       
  1967       } else { // UseSSE >= 2
       
  1968         __ cvtsd2ss(xmm0, xmm0);
       
  1969       }
  1876       break;
  1970       break;
  1877     default             :
  1971     default             :
  1878       ShouldNotReachHere();
  1972       ShouldNotReachHere();
  1879   }
  1973   }
  1880 #endif
  1974 #endif
  1899   __ mov(rax, rcx);
  1993   __ mov(rax, rcx);
  1900 #endif
  1994 #endif
  1901 }
  1995 }
  1902 
  1996 
  1903 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  1997 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
       
  1998   if ((is_float && UseSSE >= 1) ||
       
  1999       (!is_float && UseSSE >= 2)) {
       
  2000     Label done;
       
  2001     if (is_float) {
       
  2002       // XXX get rid of pop here, use ... reg, mem32
       
  2003       __ pop_f(xmm1);
       
  2004       __ ucomiss(xmm1, xmm0);
       
  2005     } else {
       
  2006       // XXX get rid of pop here, use ... reg, mem64
       
  2007       __ pop_d(xmm1);
       
  2008       __ ucomisd(xmm1, xmm0);
       
  2009     }
       
  2010     if (unordered_result < 0) {
       
  2011       __ movl(rax, -1);
       
  2012       __ jccb(Assembler::parity, done);
       
  2013       __ jccb(Assembler::below, done);
       
  2014       __ setb(Assembler::notEqual, rdx);
       
  2015       __ movzbl(rax, rdx);
       
  2016     } else {
       
  2017       __ movl(rax, 1);
       
  2018       __ jccb(Assembler::parity, done);
       
  2019       __ jccb(Assembler::above, done);
       
  2020       __ movl(rax, 0);
       
  2021       __ jccb(Assembler::equal, done);
       
  2022       __ decrementl(rax);
       
  2023     }
       
  2024     __ bind(done);
       
  2025   } else {
  1904 #ifdef _LP64
  2026 #ifdef _LP64
  1905   Label done;
  2027     ShouldNotReachHere();
  1906   if (is_float) {
       
  1907     // XXX get rid of pop here, use ... reg, mem32
       
  1908     __ pop_f(xmm1);
       
  1909     __ ucomiss(xmm1, xmm0);
       
  1910   } else {
       
  1911     // XXX get rid of pop here, use ... reg, mem64
       
  1912     __ pop_d(xmm1);
       
  1913     __ ucomisd(xmm1, xmm0);
       
  1914   }
       
  1915   if (unordered_result < 0) {
       
  1916     __ movl(rax, -1);
       
  1917     __ jccb(Assembler::parity, done);
       
  1918     __ jccb(Assembler::below, done);
       
  1919     __ setb(Assembler::notEqual, rdx);
       
  1920     __ movzbl(rax, rdx);
       
  1921   } else {
       
  1922     __ movl(rax, 1);
       
  1923     __ jccb(Assembler::parity, done);
       
  1924     __ jccb(Assembler::above, done);
       
  1925     __ movl(rax, 0);
       
  1926     __ jccb(Assembler::equal, done);
       
  1927     __ decrementl(rax);
       
  1928   }
       
  1929   __ bind(done);
       
  1930 #else
  2028 #else
  1931   if (is_float) {
  2029     if (is_float) {
  1932     __ fld_s(at_rsp());
  2030       __ fld_s(at_rsp());
  1933   } else {
  2031     } else {
  1934     __ fld_d(at_rsp());
  2032       __ fld_d(at_rsp());
  1935     __ pop(rdx);
  2033       __ pop(rdx);
  1936   }
  2034     }
  1937   __ pop(rcx);
  2035     __ pop(rcx);
  1938   __ fcmp2int(rax, unordered_result < 0);
  2036     __ fcmp2int(rax, unordered_result < 0);
  1939 #endif
  2037 #endif // _LP64
       
  2038   }
  1940 }
  2039 }
  1941 
  2040 
  1942 void TemplateTable::branch(bool is_jsr, bool is_wide) {
  2041 void TemplateTable::branch(bool is_jsr, bool is_wide) {
  1943   __ get_method(rcx); // rcx holds method
  2042   __ get_method(rcx); // rcx holds method
  1944   __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
  2043   __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
  2745   __ bind(notLong);
  2844   __ bind(notLong);
  2746   __ cmpl(flags, ftos);
  2845   __ cmpl(flags, ftos);
  2747   __ jcc(Assembler::notEqual, notFloat);
  2846   __ jcc(Assembler::notEqual, notFloat);
  2748   // ftos
  2847   // ftos
  2749 
  2848 
  2750   LP64_ONLY(__ movflt(xmm0, field));
  2849   __ load_float(field);
  2751   NOT_LP64(__ fld_s(field));
       
  2752   __ push(ftos);
  2850   __ push(ftos);
  2753   // Rewrite bytecode to be faster
  2851   // Rewrite bytecode to be faster
  2754   if (!is_static && rc == may_rewrite) {
  2852   if (!is_static && rc == may_rewrite) {
  2755     patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
  2853     patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
  2756   }
  2854   }
  2760 #ifdef ASSERT
  2858 #ifdef ASSERT
  2761   __ cmpl(flags, dtos);
  2859   __ cmpl(flags, dtos);
  2762   __ jcc(Assembler::notEqual, notDouble);
  2860   __ jcc(Assembler::notEqual, notDouble);
  2763 #endif
  2861 #endif
  2764   // dtos
  2862   // dtos
  2765   LP64_ONLY(__ movdbl(xmm0, field));
  2863   __ load_double(field);
  2766   NOT_LP64(__ fld_d(field));
       
  2767   __ push(dtos);
  2864   __ push(dtos);
  2768   // Rewrite bytecode to be faster
  2865   // Rewrite bytecode to be faster
  2769   if (!is_static && rc == may_rewrite) {
  2866   if (!is_static && rc == may_rewrite) {
  2770     patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
  2867     patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
  2771   }
  2868   }
  3043 
  3140 
  3044   // ftos
  3141   // ftos
  3045   {
  3142   {
  3046     __ pop(ftos);
  3143     __ pop(ftos);
  3047     if (!is_static) pop_and_check_object(obj);
  3144     if (!is_static) pop_and_check_object(obj);
  3048     NOT_LP64( __ fstp_s(field);)
  3145     __ store_float(field);
  3049     LP64_ONLY( __ movflt(field, xmm0);)
       
  3050     if (!is_static && rc == may_rewrite) {
  3146     if (!is_static && rc == may_rewrite) {
  3051       patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
  3147       patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
  3052     }
  3148     }
  3053     __ jmp(Done);
  3149     __ jmp(Done);
  3054   }
  3150   }
  3061 
  3157 
  3062   // dtos
  3158   // dtos
  3063   {
  3159   {
  3064     __ pop(dtos);
  3160     __ pop(dtos);
  3065     if (!is_static) pop_and_check_object(obj);
  3161     if (!is_static) pop_and_check_object(obj);
  3066     NOT_LP64( __ fstp_d(field);)
  3162     __ store_double(field);
  3067     LP64_ONLY( __ movdbl(field, xmm0);)
       
  3068     if (!is_static && rc == may_rewrite) {
  3163     if (!is_static && rc == may_rewrite) {
  3069       patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
  3164       patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
  3070     }
  3165     }
  3071   }
  3166   }
  3072 
  3167 
  3120     case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
  3215     case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
  3121     case Bytecodes::_fast_bputfield: // fall through
  3216     case Bytecodes::_fast_bputfield: // fall through
  3122     case Bytecodes::_fast_sputfield: // fall through
  3217     case Bytecodes::_fast_sputfield: // fall through
  3123     case Bytecodes::_fast_cputfield: // fall through
  3218     case Bytecodes::_fast_cputfield: // fall through
  3124     case Bytecodes::_fast_iputfield: __ push_i(rax); break;
  3219     case Bytecodes::_fast_iputfield: __ push_i(rax); break;
  3125     case Bytecodes::_fast_dputfield: __ push_d(); break;
  3220     case Bytecodes::_fast_dputfield: __ push(dtos); break;
  3126     case Bytecodes::_fast_fputfield: __ push_f(); break;
  3221     case Bytecodes::_fast_fputfield: __ push(ftos); break;
  3127     case Bytecodes::_fast_lputfield: __ push_l(rax); break;
  3222     case Bytecodes::_fast_lputfield: __ push_l(rax); break;
  3128 
  3223 
  3129     default:
  3224     default:
  3130       ShouldNotReachHere();
  3225       ShouldNotReachHere();
  3131     }
  3226     }
  3144     case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
  3239     case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
  3145     case Bytecodes::_fast_bputfield: // fall through
  3240     case Bytecodes::_fast_bputfield: // fall through
  3146     case Bytecodes::_fast_sputfield: // fall through
  3241     case Bytecodes::_fast_sputfield: // fall through
  3147     case Bytecodes::_fast_cputfield: // fall through
  3242     case Bytecodes::_fast_cputfield: // fall through
  3148     case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
  3243     case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
  3149     case Bytecodes::_fast_dputfield: __ pop_d(); break;
  3244     case Bytecodes::_fast_dputfield: __ pop(dtos); break;
  3150     case Bytecodes::_fast_fputfield: __ pop_f(); break;
  3245     case Bytecodes::_fast_fputfield: __ pop(ftos); break;
  3151     case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
  3246     case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
  3152     }
  3247     }
  3153     __ bind(L2);
  3248     __ bind(L2);
  3154   }
  3249   }
  3155 }
  3250 }
  3209     // fall through
  3304     // fall through
  3210   case Bytecodes::_fast_cputfield:
  3305   case Bytecodes::_fast_cputfield:
  3211     __ movw(field, rax);
  3306     __ movw(field, rax);
  3212     break;
  3307     break;
  3213   case Bytecodes::_fast_fputfield:
  3308   case Bytecodes::_fast_fputfield:
  3214     NOT_LP64( __ fstp_s(field); )
  3309     __ store_float(field);
  3215     LP64_ONLY( __ movflt(field, xmm0);)
       
  3216     break;
  3310     break;
  3217   case Bytecodes::_fast_dputfield:
  3311   case Bytecodes::_fast_dputfield:
  3218     NOT_LP64( __ fstp_d(field); )
  3312     __ store_double(field);
  3219     LP64_ONLY( __ movdbl(field, xmm0);)
       
  3220     break;
  3313     break;
  3221   default:
  3314   default:
  3222     ShouldNotReachHere();
  3315     ShouldNotReachHere();
  3223   }
  3316   }
  3224 
  3317 
  3299     break;
  3392     break;
  3300   case Bytecodes::_fast_cgetfield:
  3393   case Bytecodes::_fast_cgetfield:
  3301     __ load_unsigned_short(rax, field);
  3394     __ load_unsigned_short(rax, field);
  3302     break;
  3395     break;
  3303   case Bytecodes::_fast_fgetfield:
  3396   case Bytecodes::_fast_fgetfield:
  3304     LP64_ONLY(__ movflt(xmm0, field));
  3397     __ load_float(field);
  3305     NOT_LP64(__ fld_s(field));
       
  3306     break;
  3398     break;
  3307   case Bytecodes::_fast_dgetfield:
  3399   case Bytecodes::_fast_dgetfield:
  3308     LP64_ONLY(__ movdbl(xmm0, field));
  3400     __ load_double(field);
  3309     NOT_LP64(__ fld_d(field));
       
  3310     break;
  3401     break;
  3311   default:
  3402   default:
  3312     ShouldNotReachHere();
  3403     ShouldNotReachHere();
  3313   }
  3404   }
  3314   // [jk] not needed currently
  3405   // [jk] not needed currently
  3344   case atos:
  3435   case atos:
  3345     __ load_heap_oop(rax, field);
  3436     __ load_heap_oop(rax, field);
  3346     __ verify_oop(rax);
  3437     __ verify_oop(rax);
  3347     break;
  3438     break;
  3348   case ftos:
  3439   case ftos:
  3349     LP64_ONLY(__ movflt(xmm0, field));
  3440     __ load_float(field);
  3350     NOT_LP64(__ fld_s(field));
       
  3351     break;
  3441     break;
  3352   default:
  3442   default:
  3353     ShouldNotReachHere();
  3443     ShouldNotReachHere();
  3354   }
  3444   }
  3355 
  3445