hotspot/src/cpu/x86/vm/x86_32.ad
changeset 12739 09f26b73ae66
parent 12113 71f302d5c8ee
child 12952 a1f3ff3a89e1
equal deleted inserted replaced
12623:09fcb0dc71ad 12739:09f26b73ae66
  2532 
  2532 
  2533   enc_class push_xmm_to_fpr1(regD src) %{
  2533   enc_class push_xmm_to_fpr1(regD src) %{
  2534     MacroAssembler _masm(&cbuf);
  2534     MacroAssembler _masm(&cbuf);
  2535     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
  2535     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
  2536     __ fld_d(Address(rsp, 0));
  2536     __ fld_d(Address(rsp, 0));
  2537   %}
       
  2538 
       
  2539   // Compute X^Y using Intel's fast hardware instructions, if possible.
       
  2540   // Otherwise return a NaN.
       
  2541   enc_class pow_exp_core_encoding %{
       
  2542     // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
       
  2543     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
       
  2544     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
       
  2545     emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
       
  2546     emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
       
  2547     emit_opcode(cbuf,0x1C);
       
  2548     emit_d8(cbuf,0x24);
       
  2549     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
       
  2550     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
       
  2551     emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
       
  2552     emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
       
  2553     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
       
  2554     emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
       
  2555     emit_rm(cbuf, 0x3, 0x0, ECX_enc);
       
  2556     emit_d32(cbuf,0xFFFFF800);
       
  2557     emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
       
  2558     emit_rm(cbuf, 0x3, 0x0, EAX_enc);
       
  2559     emit_d32(cbuf,1023);
       
  2560     emit_opcode(cbuf,0x8B);                          // mov rbx,eax
       
  2561     emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
       
  2562     emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
       
  2563     emit_rm(cbuf,0x3,0x4,EAX_enc);
       
  2564     emit_d8(cbuf,20);
       
  2565     emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
       
  2566     emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
       
  2567     emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
       
  2568     emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
       
  2569     emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
       
  2570     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
       
  2571     emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
       
  2572     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
       
  2573     emit_d32(cbuf,0);
       
  2574     emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
       
  2575     encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
       
  2576   %}
  2537   %}
  2577 
  2538 
  2578   enc_class Push_Result_Mod_DPR( regDPR src) %{
  2539   enc_class Push_Result_Mod_DPR( regDPR src) %{
  2579     if ($src$$reg != FPR1L_enc) {
  2540     if ($src$$reg != FPR1L_enc) {
  2580       // fincstp
  2541       // fincstp
 10098   ins_encode( Push_Reg_DPR(src),
 10059   ins_encode( Push_Reg_DPR(src),
 10099               OpcS, OpcP, Pop_Reg_DPR(dst) );
 10060               OpcS, OpcP, Pop_Reg_DPR(dst) );
 10100   ins_pipe( pipe_slow );
 10061   ins_pipe( pipe_slow );
 10101 %}
 10062 %}
 10102 
 10063 
 10103 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
 10064 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
 10104   predicate (UseSSE<=1);
 10065   predicate (UseSSE<=1);
 10105   match(Set Y (PowD X Y));  // Raise X to the Yth power
 10066   match(Set Y (PowD X Y));  // Raise X to the Yth power
 10106   effect(KILL rax, KILL rbx, KILL rcx);
 10067   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
 10107   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
 10068   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
 10108             "FLD_D  $X\n\t"
 10069   ins_encode %{
 10109             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
 10070     __ subptr(rsp, 8);
 10110 
 10071     __ fld_s($X$$reg - 1);
 10111             "FDUP   \t\t\t# Q Q\n\t"
 10072     __ fast_pow();
 10112             "FRNDINT\t\t\t# int(Q) Q\n\t"
 10073     __ addptr(rsp, 8);
 10113             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
 10074   %}
 10114             "FISTP  dword [ESP]\n\t"
 10075   ins_pipe( pipe_slow );
 10115             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
 10076 %}
 10116             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
 10077 
 10117             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
 10078 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
 10118             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
       
 10119             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
       
 10120             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
       
 10121             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
       
 10122             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
       
 10123             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
       
 10124             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
       
 10125             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
       
 10126             "MOV    [ESP+0],0\n\t"
       
 10127             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
       
 10128 
       
 10129             "ADD    ESP,8"
       
 10130              %}
       
 10131   ins_encode( push_stack_temp_qword,
       
 10132               Push_Reg_DPR(X),
       
 10133               Opcode(0xD9), Opcode(0xF1),   // fyl2x
       
 10134               pow_exp_core_encoding,
       
 10135               pop_stack_temp_qword);
       
 10136   ins_pipe( pipe_slow );
       
 10137 %}
       
 10138 
       
 10139 instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
       
 10140   predicate (UseSSE>=2);
 10079   predicate (UseSSE>=2);
 10141   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
 10080   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
 10142   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
 10081   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
 10143   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
 10082   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
 10144             "MOVSD  [ESP],$src1\n\t"
 10083   ins_encode %{
 10145             "FLD    FPR1,$src1\n\t"
 10084     __ subptr(rsp, 8);
 10146             "MOVSD  [ESP],$src0\n\t"
 10085     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 10147             "FLD    FPR1,$src0\n\t"
 10086     __ fld_d(Address(rsp, 0));
 10148             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
 10087     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 10149 
 10088     __ fld_d(Address(rsp, 0));
 10150             "FDUP   \t\t\t# Q Q\n\t"
 10089     __ fast_pow();
 10151             "FRNDINT\t\t\t# int(Q) Q\n\t"
 10090     __ fstp_d(Address(rsp, 0));
 10152             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
 10091     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 10153             "FISTP  dword [ESP]\n\t"
 10092     __ addptr(rsp, 8);
 10154             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
 10093   %}
 10155             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
 10094   ins_pipe( pipe_slow );
 10156             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
 10095 %}
 10157             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
 10096 
 10158             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
 10097 
 10159             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
 10098 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
 10160             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
       
 10161             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
       
 10162             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
       
 10163             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
       
 10164             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
       
 10165             "MOV    [ESP+0],0\n\t"
       
 10166             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
       
 10167 
       
 10168             "FST_D  [ESP]\n\t"
       
 10169             "MOVSD  $dst,[ESP]\n\t"
       
 10170             "ADD    ESP,8"
       
 10171              %}
       
 10172   ins_encode( push_stack_temp_qword,
       
 10173               push_xmm_to_fpr1(src1),
       
 10174               push_xmm_to_fpr1(src0),
       
 10175               Opcode(0xD9), Opcode(0xF1),   // fyl2x
       
 10176               pow_exp_core_encoding,
       
 10177               Push_ResultD(dst) );
       
 10178   ins_pipe( pipe_slow );
       
 10179 %}
       
 10180 
       
 10181 
       
 10182 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
       
 10183   predicate (UseSSE<=1);
 10099   predicate (UseSSE<=1);
 10184   match(Set dpr1 (ExpD dpr1));
 10100   match(Set dpr1 (ExpD dpr1));
 10185   effect(KILL rax, KILL rbx, KILL rcx);
 10101   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
 10186   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
 10102   format %{ "fast_exp $dpr1 -> $dpr1  // KILL $rax, $rcx, $rdx" %}
 10187             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
 10103   ins_encode %{
 10188             "FMULP  \t\t\t# Q=X*log2(e)\n\t"
 10104     __ fast_exp();
 10189 
 10105   %}
 10190             "FDUP   \t\t\t# Q Q\n\t"
 10106   ins_pipe( pipe_slow );
 10191             "FRNDINT\t\t\t# int(Q) Q\n\t"
 10107 %}
 10192             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
 10108 
 10193             "FISTP  dword [ESP]\n\t"
 10109 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
 10194             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
       
 10195             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
       
 10196             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
       
 10197             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
       
 10198             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
       
 10199             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
       
 10200             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
       
 10201             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
       
 10202             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
       
 10203             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
       
 10204             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
       
 10205             "MOV    [ESP+0],0\n\t"
       
 10206             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
       
 10207 
       
 10208             "ADD    ESP,8"
       
 10209              %}
       
 10210   ins_encode( push_stack_temp_qword,
       
 10211               Opcode(0xD9), Opcode(0xEA),   // fldl2e
       
 10212               Opcode(0xDE), Opcode(0xC9),   // fmulp
       
 10213               pow_exp_core_encoding,
       
 10214               pop_stack_temp_qword);
       
 10215   ins_pipe( pipe_slow );
       
 10216 %}
       
 10217 
       
 10218 instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
       
 10219   predicate (UseSSE>=2);
 10110   predicate (UseSSE>=2);
 10220   match(Set dst (ExpD src));
 10111   match(Set dst (ExpD src));
 10221   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
 10112   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
 10222   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
 10113   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
 10223             "MOVSD  [ESP],$src\n\t"
 10114   ins_encode %{
 10224             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
 10115     __ subptr(rsp, 8);
 10225             "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
 10116     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 10226 
 10117     __ fld_d(Address(rsp, 0));
 10227             "FDUP   \t\t\t# Q Q\n\t"
 10118     __ fast_exp();
 10228             "FRNDINT\t\t\t# int(Q) Q\n\t"
 10119     __ fstp_d(Address(rsp, 0));
 10229             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
 10120     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 10230             "FISTP  dword [ESP]\n\t"
 10121     __ addptr(rsp, 8);
 10231             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
 10122   %}
 10232             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
 10123   ins_pipe( pipe_slow );
 10233             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
 10124 %}
 10234             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
       
 10235             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
       
 10236             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
       
 10237             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
       
 10238             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
       
 10239             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
       
 10240             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
       
 10241             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
       
 10242             "MOV    [ESP+0],0\n\t"
       
 10243             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
       
 10244 
       
 10245             "FST_D  [ESP]\n\t"
       
 10246             "MOVSD  $dst,[ESP]\n\t"
       
 10247             "ADD    ESP,8"
       
 10248              %}
       
 10249   ins_encode( Push_SrcD(src),
       
 10250               Opcode(0xD9), Opcode(0xEA),   // fldl2e
       
 10251               Opcode(0xDE), Opcode(0xC9),   // fmulp
       
 10252               pow_exp_core_encoding,
       
 10253               Push_ResultD(dst) );
       
 10254   ins_pipe( pipe_slow );
       
 10255 %}
       
 10256 
       
 10257 
       
 10258 
 10125 
 10259 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
 10126 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
 10260   predicate (UseSSE<=1);
 10127   predicate (UseSSE<=1);
 10261   // The source Double operand on FPU stack
 10128   // The source Double operand on FPU stack
 10262   match(Set dst (Log10D src));
 10129   match(Set dst (Log10D src));