hotspot/src/cpu/x86/vm/x86.ad
changeset 30211 442fbbb31f75
parent 25715 d5a8dbdc5150
child 30305 b92a97e1e9cb
equal deleted inserted replaced
30210:507826ef56fd 30211:442fbbb31f75
   488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
   488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
   489 
   489 
   490 class NativeJump;
   490 class NativeJump;
   491 
   491 
   492 class CallStubImpl {
   492 class CallStubImpl {
   493  
   493 
   494   //--------------------------------------------------------------
   494   //--------------------------------------------------------------
   495   //---<  Used for optimization in Compile::shorten_branches  >---
   495   //---<  Used for optimization in Compile::shorten_branches  >---
   496   //--------------------------------------------------------------
   496   //--------------------------------------------------------------
   497 
   497 
   498  public:
   498  public:
   499   // Size of call trampoline stub.
   499   // Size of call trampoline stub.
   500   static uint size_call_trampoline() {
   500   static uint size_call_trampoline() {
   501     return 0; // no call trampolines on this platform
   501     return 0; // no call trampolines on this platform
   502   }
   502   }
   503   
   503 
   504   // number of relocations needed by a call trampoline stub
   504   // number of relocations needed by a call trampoline stub
   505   static uint reloc_call_trampoline() { 
   505   static uint reloc_call_trampoline() {
   506     return 0; // no call trampolines on this platform
   506     return 0; // no call trampolines on this platform
   507   }
   507   }
   508 };
   508 };
   509 
   509 
   510 class HandlerImpl {
   510 class HandlerImpl {
   619       if (!UsePopCountInstruction)
   619       if (!UsePopCountInstruction)
   620         return false;
   620         return false;
   621     break;
   621     break;
   622     case Op_MulVI:
   622     case Op_MulVI:
   623       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
   623       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
       
   624         return false;
       
   625     break;
       
   626     case Op_AddReductionVL:
       
   627       if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
       
   628         return false;
       
   629     case Op_AddReductionVI:
       
   630       if (UseSSE < 3) // requires at least SSE3
       
   631         return false;
       
   632     case Op_MulReductionVI:
       
   633       if (UseSSE < 4) // requires at least SSE4
       
   634         return false;
       
   635     case Op_AddReductionVF:
       
   636     case Op_AddReductionVD:
       
   637     case Op_MulReductionVF:
       
   638     case Op_MulReductionVD:
       
   639       if (UseSSE < 1) // requires at least SSE
   624         return false;
   640         return false;
   625     break;
   641     break;
   626     case Op_CompareAndSwapL:
   642     case Op_CompareAndSwapL:
   627 #ifdef _LP64
   643 #ifdef _LP64
   628     case Op_CompareAndSwapP:
   644     case Op_CompareAndSwapP:
  2530     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2546     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
  2531   %}
  2547   %}
  2532   ins_pipe( fpu_reg_reg );
  2548   ins_pipe( fpu_reg_reg );
  2533 %}
  2549 %}
  2534 
  2550 
       
  2551 // ====================REDUCTION ARITHMETIC=======================================
       
  2552 
       
  2553 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
       
  2554   predicate(UseSSE > 2 && UseAVX == 0);
       
  2555   match(Set dst (AddReductionVI src1 src2));
       
  2556   effect(TEMP tmp2, TEMP tmp);
       
  2557   format %{ "movdqu  $tmp2,$src2\n\t"
       
  2558             "phaddd  $tmp2,$tmp2\n\t"
       
  2559             "movd    $tmp,$src1\n\t"
       
  2560             "paddd   $tmp,$tmp2\n\t"
       
  2561             "movd    $dst,$tmp\t! add reduction2I" %}
       
  2562   ins_encode %{
       
  2563     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
       
  2564     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
       
  2565     __ movdl($tmp$$XMMRegister, $src1$$Register);
       
  2566     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2567     __ movdl($dst$$Register, $tmp$$XMMRegister);
       
  2568   %}
       
  2569   ins_pipe( pipe_slow );
       
  2570 %}
       
  2571 
       
  2572 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
       
  2573   predicate(UseAVX > 0);
       
  2574   match(Set dst (AddReductionVI src1 src2));
       
  2575   effect(TEMP tmp, TEMP tmp2);
       
  2576   format %{ "vphaddd $tmp,$src2,$src2\n\t"
       
  2577             "movd    $tmp2,$src1\n\t"
       
  2578             "vpaddd  $tmp2,$tmp2,$tmp\n\t"
       
  2579             "movd    $dst,$tmp2\t! add reduction2I" %}
       
  2580   ins_encode %{
       
  2581     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
       
  2582     __ movdl($tmp2$$XMMRegister, $src1$$Register);
       
  2583     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
       
  2584     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2585   %}
       
  2586   ins_pipe( pipe_slow );
       
  2587 %}
       
  2588 
       
  2589 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
       
  2590   predicate(UseSSE > 2 && UseAVX == 0);
       
  2591   match(Set dst (AddReductionVI src1 src2));
       
  2592   effect(TEMP tmp2, TEMP tmp);
       
  2593   format %{ "movdqu  $tmp2,$src2\n\t"
       
  2594             "phaddd  $tmp2,$tmp2\n\t"
       
  2595             "phaddd  $tmp2,$tmp2\n\t"
       
  2596             "movd    $tmp,$src1\n\t"
       
  2597             "paddd   $tmp,$tmp2\n\t"
       
  2598             "movd    $dst,$tmp\t! add reduction4I" %}
       
  2599   ins_encode %{
       
  2600     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
       
  2601     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
       
  2602     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
       
  2603     __ movdl($tmp$$XMMRegister, $src1$$Register);
       
  2604     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2605     __ movdl($dst$$Register, $tmp$$XMMRegister);
       
  2606   %}
       
  2607   ins_pipe( pipe_slow );
       
  2608 %}
       
  2609 
       
  2610 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
       
  2611   predicate(UseAVX > 0);
       
  2612   match(Set dst (AddReductionVI src1 src2));
       
  2613   effect(TEMP tmp, TEMP tmp2);
       
  2614   format %{ "vphaddd $tmp,$src2,$src2\n\t"
       
  2615             "vphaddd $tmp,$tmp,$tmp2\n\t"
       
  2616             "movd    $tmp2,$src1\n\t"
       
  2617             "vpaddd  $tmp2,$tmp2,$tmp\n\t"
       
  2618             "movd    $dst,$tmp2\t! add reduction4I" %}
       
  2619   ins_encode %{
       
  2620     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
       
  2621     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2622     __ movdl($tmp2$$XMMRegister, $src1$$Register);
       
  2623     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
       
  2624     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2625   %}
       
  2626   ins_pipe( pipe_slow );
       
  2627 %}
       
  2628 
       
  2629 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
       
  2630   predicate(UseAVX > 0);
       
  2631   match(Set dst (AddReductionVI src1 src2));
       
  2632   effect(TEMP tmp, TEMP tmp2);
       
  2633   format %{ "vphaddd $tmp,$src2,$src2\n\t"
       
  2634             "vphaddd $tmp,$tmp,$tmp2\n\t"
       
  2635             "vextractf128  $tmp2,$tmp\n\t"
       
  2636             "vpaddd  $tmp,$tmp,$tmp2\n\t"
       
  2637             "movd    $tmp2,$src1\n\t"
       
  2638             "vpaddd  $tmp2,$tmp2,$tmp\n\t"
       
  2639             "movd    $dst,$tmp2\t! add reduction8I" %}
       
  2640   ins_encode %{
       
  2641     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true);
       
  2642     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true);
       
  2643     __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2644     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2645     __ movdl($tmp2$$XMMRegister, $src1$$Register);
       
  2646     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
       
  2647     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2648   %}
       
  2649   ins_pipe( pipe_slow );
       
  2650 %}
       
  2651 
       
  2652 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
       
  2653   predicate(UseSSE >= 1 && UseAVX == 0);
       
  2654   match(Set dst (AddReductionVF src1 src2));
       
  2655   effect(TEMP tmp, TEMP tmp2);
       
  2656   format %{ "movdqu  $tmp,$src1\n\t"
       
  2657             "addss   $tmp,$src2\n\t"
       
  2658             "pshufd  $tmp2,$src2,0x01\n\t"
       
  2659             "addss   $tmp,$tmp2\n\t"
       
  2660             "movdqu  $dst,$tmp\t! add reduction2F" %}
       
  2661   ins_encode %{
       
  2662     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
       
  2663     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
       
  2664     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2665     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2666     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
       
  2667   %}
       
  2668   ins_pipe( pipe_slow );
       
  2669 %}
       
  2670 
       
  2671 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
       
  2672   predicate(UseAVX > 0);
       
  2673   match(Set dst (AddReductionVF src1 src2));
       
  2674   effect(TEMP tmp2, TEMP tmp);
       
  2675   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
       
  2676             "pshufd  $tmp,$src2,0x01\n\t"
       
  2677             "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
       
  2678   ins_encode %{
       
  2679     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  2680     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2681     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2682   %}
       
  2683   ins_pipe( pipe_slow );
       
  2684 %}
       
  2685 
       
  2686 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
       
  2687   predicate(UseSSE >= 1 && UseAVX == 0);
       
  2688   match(Set dst (AddReductionVF src1 src2));
       
  2689   effect(TEMP tmp, TEMP tmp2);
       
  2690   format %{ "movdqu  $tmp,$src1\n\t"
       
  2691             "addss   $tmp,$src2\n\t"
       
  2692             "pshufd  $tmp2,$src2,0x01\n\t"
       
  2693             "addss   $tmp,$tmp2\n\t"
       
  2694             "pshufd  $tmp2,$src2,0x02\n\t"
       
  2695             "addss   $tmp,$tmp2\n\t"
       
  2696             "pshufd  $tmp2,$src2,0x03\n\t"
       
  2697             "addss   $tmp,$tmp2\n\t"
       
  2698             "movdqu  $dst,$tmp\t! add reduction4F" %}
       
  2699   ins_encode %{
       
  2700     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
       
  2701     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
       
  2702     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2703     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2704     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
       
  2705     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2706     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
       
  2707     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2708     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
       
  2709   %}
       
  2710   ins_pipe( pipe_slow );
       
  2711 %}
       
  2712 
       
  2713 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
       
  2714   predicate(UseAVX > 0);
       
  2715   match(Set dst (AddReductionVF src1 src2));
       
  2716   effect(TEMP tmp, TEMP tmp2);
       
  2717   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
       
  2718             "pshufd  $tmp,$src2,0x01\n\t"
       
  2719             "vaddss  $tmp2,$tmp2,$tmp\n\t"
       
  2720             "pshufd  $tmp,$src2,0x02\n\t"
       
  2721             "vaddss  $tmp2,$tmp2,$tmp\n\t"
       
  2722             "pshufd  $tmp,$src2,0x03\n\t"
       
  2723             "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
       
  2724   ins_encode %{
       
  2725     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  2726     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2727     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2728     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
       
  2729     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2730     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
       
  2731     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2732   %}
       
  2733   ins_pipe( pipe_slow );
       
  2734 %}
       
  2735 
       
  2736 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
       
  2737   predicate(UseAVX > 0);
       
  2738   match(Set dst (AddReductionVF src1 src2));
       
  2739   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
       
  2740   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
       
  2741             "pshufd  $tmp,$src2,0x01\n\t"
       
  2742             "vaddss  $tmp2,$tmp2,$tmp\n\t"
       
  2743             "pshufd  $tmp,$src2,0x02\n\t"
       
  2744             "vaddss  $tmp2,$tmp2,$tmp\n\t"
       
  2745             "pshufd  $tmp,$src2,0x03\n\t"
       
  2746             "vaddss  $tmp2,$tmp2,$tmp\n\t"
       
  2747             "vextractf128  $tmp3,$src2\n\t"
       
  2748             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
       
  2749             "pshufd  $tmp,$tmp3,0x01\n\t"
       
  2750             "vaddss  $tmp2,$tmp2,$tmp\n\t"
       
  2751             "pshufd  $tmp,$tmp3,0x02\n\t"
       
  2752             "vaddss  $tmp2,$tmp2,$tmp\n\t"
       
  2753             "pshufd  $tmp,$tmp3,0x03\n\t"
       
  2754             "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
       
  2755   ins_encode %{
       
  2756     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  2757     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2758     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2759     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
       
  2760     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2761     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
       
  2762     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2763     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
       
  2764     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
       
  2765     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
       
  2766     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2767     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
       
  2768     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2769     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
       
  2770     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2771   %}
       
  2772   ins_pipe( pipe_slow );
       
  2773 %}
       
  2774 
       
  2775 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
       
  2776   predicate(UseSSE >= 1 && UseAVX == 0);
       
  2777   match(Set dst (AddReductionVD src1 src2));
       
  2778   effect(TEMP tmp, TEMP dst);
       
  2779   format %{ "movdqu  $tmp,$src1\n\t"
       
  2780             "addsd   $tmp,$src2\n\t"
       
  2781             "pshufd  $dst,$src2,0xE\n\t"
       
  2782             "addsd   $dst,$tmp\t! add reduction2D" %}
       
  2783   ins_encode %{
       
  2784     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
       
  2785     __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
       
  2786     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  2787     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
       
  2788   %}
       
  2789   ins_pipe( pipe_slow );
       
  2790 %}
       
  2791 
       
  2792 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
       
  2793   predicate(UseAVX > 0);
       
  2794   match(Set dst (AddReductionVD src1 src2));
       
  2795   effect(TEMP tmp, TEMP tmp2);
       
  2796   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
       
  2797             "pshufd  $tmp,$src2,0xE\n\t"
       
  2798             "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
       
  2799   ins_encode %{
       
  2800     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  2801     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  2802     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2803   %}
       
  2804   ins_pipe( pipe_slow );
       
  2805 %}
       
  2806 
       
  2807 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
       
  2808   predicate(UseAVX > 0);
       
  2809   match(Set dst (AddReductionVD src1 src2));
       
  2810   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
       
  2811   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
       
  2812             "pshufd  $tmp,$src2,0xE\n\t"
       
  2813             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
       
  2814             "vextractf128  $tmp3,$src2\n\t"
       
  2815             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
       
  2816             "pshufd  $tmp,$tmp3,0xE\n\t"
       
  2817             "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
       
  2818   ins_encode %{
       
  2819     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  2820     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  2821     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2822     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
       
  2823     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
       
  2824     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
       
  2825     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2826   %}
       
  2827   ins_pipe( pipe_slow );
       
  2828 %}
       
  2829 
       
  2830 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
       
  2831   predicate(UseSSE > 3 && UseAVX == 0);
       
  2832   match(Set dst (MulReductionVI src1 src2));
       
  2833   effect(TEMP tmp, TEMP tmp2);
       
  2834   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
       
  2835             "pmulld  $tmp2,$src2\n\t"
       
  2836             "movd    $tmp,$src1\n\t"
       
  2837             "pmulld  $tmp2,$tmp\n\t"
       
  2838             "movd    $dst,$tmp2\t! mul reduction2I" %}
       
  2839   ins_encode %{
       
  2840     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
       
  2841     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
       
  2842     __ movdl($tmp$$XMMRegister, $src1$$Register);
       
  2843     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2844     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2845   %}
       
  2846   ins_pipe( pipe_slow );
       
  2847 %}
       
  2848 
       
  2849 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
       
  2850   predicate(UseAVX > 0);
       
  2851   match(Set dst (MulReductionVI src1 src2));
       
  2852   effect(TEMP tmp, TEMP tmp2);
       
  2853   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
       
  2854             "vpmulld $tmp,$src2,$tmp2\n\t"
       
  2855             "movd    $tmp2,$src1\n\t"
       
  2856             "vpmulld $tmp2,$tmp,$tmp2\n\t"
       
  2857             "movd    $dst,$tmp2\t! mul reduction2I" %}
       
  2858   ins_encode %{
       
  2859     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
       
  2860     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2861     __ movdl($tmp2$$XMMRegister, $src1$$Register);
       
  2862     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2863     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2864   %}
       
  2865   ins_pipe( pipe_slow );
       
  2866 %}
       
  2867 
       
  2868 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
       
  2869   predicate(UseSSE > 3 && UseAVX == 0);
       
  2870   match(Set dst (MulReductionVI src1 src2));
       
  2871   effect(TEMP tmp, TEMP tmp2);
       
  2872   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
       
  2873             "pmulld  $tmp2,$src2\n\t"
       
  2874             "pshufd  $tmp,$tmp2,0x1\n\t"
       
  2875             "pmulld  $tmp2,$tmp\n\t"
       
  2876             "movd    $tmp,$src1\n\t"
       
  2877             "pmulld  $tmp2,$tmp\n\t"
       
  2878             "movd    $dst,$tmp2\t! mul reduction4I" %}
       
  2879   ins_encode %{
       
  2880     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  2881     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
       
  2882     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
       
  2883     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2884     __ movdl($tmp$$XMMRegister, $src1$$Register);
       
  2885     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2886     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2887   %}
       
  2888   ins_pipe( pipe_slow );
       
  2889 %}
       
  2890 
       
  2891 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
       
  2892   predicate(UseAVX > 0);
       
  2893   match(Set dst (MulReductionVI src1 src2));
       
  2894   effect(TEMP tmp, TEMP tmp2);
       
  2895   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
       
  2896             "vpmulld $tmp,$src2,$tmp2\n\t"
       
  2897             "pshufd  $tmp2,$tmp,0x1\n\t"
       
  2898             "vpmulld $tmp,$tmp,$tmp2\n\t"
       
  2899             "movd    $tmp2,$src1\n\t"
       
  2900             "vpmulld $tmp2,$tmp,$tmp2\n\t"
       
  2901             "movd    $dst,$tmp2\t! mul reduction4I" %}
       
  2902   ins_encode %{
       
  2903     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  2904     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2905     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
       
  2906     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2907     __ movdl($tmp2$$XMMRegister, $src1$$Register);
       
  2908     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2909     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2910   %}
       
  2911   ins_pipe( pipe_slow );
       
  2912 %}
       
  2913 
       
  2914 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
       
  2915   predicate(UseAVX > 0);
       
  2916   match(Set dst (MulReductionVI src1 src2));
       
  2917   effect(TEMP tmp, TEMP tmp2);
       
  2918   format %{ "vextractf128  $tmp,$src2\n\t"
       
  2919             "vpmulld $tmp,$tmp,$src2\n\t"
       
  2920             "pshufd  $tmp2,$tmp,0xE\n\t"
       
  2921             "vpmulld $tmp,$tmp,$tmp2\n\t"
       
  2922             "pshufd  $tmp2,$tmp,0x1\n\t"
       
  2923             "vpmulld $tmp,$tmp,$tmp2\n\t"
       
  2924             "movd    $tmp2,$src1\n\t"
       
  2925             "vpmulld $tmp2,$tmp,$tmp2\n\t"
       
  2926             "movd    $dst,$tmp2\t! mul reduction8I" %}
       
  2927   ins_encode %{
       
  2928     __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister);
       
  2929     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false);
       
  2930     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
       
  2931     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2932     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
       
  2933     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2934     __ movdl($tmp2$$XMMRegister, $src1$$Register);
       
  2935     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
       
  2936     __ movdl($dst$$Register, $tmp2$$XMMRegister);
       
  2937   %}
       
  2938   ins_pipe( pipe_slow );
       
  2939 %}
       
  2940 
       
  2941 instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
       
  2942   predicate(UseSSE >= 1 && UseAVX == 0);
       
  2943   match(Set dst (MulReductionVF src1 src2));
       
  2944   effect(TEMP tmp, TEMP tmp2);
       
  2945   format %{ "movdqu  $tmp,$src1\n\t"
       
  2946             "mulss   $tmp,$src2\n\t"
       
  2947             "pshufd  $tmp2,$src2,0x01\n\t"
       
  2948             "mulss   $tmp,$tmp2\n\t"
       
  2949             "movdqu  $dst,$tmp\t! add reduction2F" %}
       
  2950   ins_encode %{
       
  2951     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
       
  2952     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
       
  2953     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2954     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2955     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
       
  2956   %}
       
  2957   ins_pipe( pipe_slow );
       
  2958 %}
       
  2959 
       
  2960 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
       
  2961   predicate(UseAVX > 0);
       
  2962   match(Set dst (MulReductionVF src1 src2));
       
  2963   effect(TEMP tmp, TEMP tmp2);
       
  2964   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
       
  2965             "pshufd  $tmp,$src2,0x01\n\t"
       
  2966             "vmulss  $dst,$tmp2,$tmp\t! add reduction2F" %}
       
  2967   ins_encode %{
       
  2968     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  2969     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2970     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  2971   %}
       
  2972   ins_pipe( pipe_slow );
       
  2973 %}
       
  2974 
       
  2975 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
       
  2976   predicate(UseSSE >= 1 && UseAVX == 0);
       
  2977   match(Set dst (MulReductionVF src1 src2));
       
  2978   effect(TEMP tmp, TEMP tmp2);
       
  2979   format %{ "movdqu  $tmp,$src1\n\t"
       
  2980             "mulss   $tmp,$src2\n\t"
       
  2981             "pshufd  $tmp2,$src2,0x01\n\t"
       
  2982             "mulss   $tmp,$tmp2\n\t"
       
  2983             "pshufd  $tmp2,$src2,0x02\n\t"
       
  2984             "mulss   $tmp,$tmp2\n\t"
       
  2985             "pshufd  $tmp2,$src2,0x03\n\t"
       
  2986             "mulss   $tmp,$tmp2\n\t"
       
  2987             "movdqu  $dst,$tmp\t! add reduction4F" %}
       
  2988   ins_encode %{
       
  2989     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
       
  2990     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
       
  2991     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  2992     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2993     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
       
  2994     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2995     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
       
  2996     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
       
  2997     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
       
  2998   %}
       
  2999   ins_pipe( pipe_slow );
       
  3000 %}
       
  3001 
       
  3002 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
       
  3003   predicate(UseAVX > 0);
       
  3004   match(Set dst (MulReductionVF src1 src2));
       
  3005   effect(TEMP tmp, TEMP tmp2);
       
  3006   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
       
  3007             "pshufd  $tmp,$src2,0x01\n\t"
       
  3008             "vmulss  $tmp2,$tmp2,$tmp\n\t"
       
  3009             "pshufd  $tmp,$src2,0x02\n\t"
       
  3010             "vmulss  $tmp2,$tmp2,$tmp\n\t"
       
  3011             "pshufd  $tmp,$src2,0x03\n\t"
       
  3012             "vmulss  $dst,$tmp2,$tmp\t! add reduction4F" %}
       
  3013   ins_encode %{
       
  3014     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  3015     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  3016     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3017     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
       
  3018     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3019     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
       
  3020     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3021   %}
       
  3022   ins_pipe( pipe_slow );
       
  3023 %}
       
  3024 
       
  3025 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
       
  3026   predicate(UseAVX > 0);
       
  3027   match(Set dst (MulReductionVF src1 src2));
       
  3028   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
       
  3029   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
       
  3030             "pshufd  $tmp,$src2,0x01\n\t"
       
  3031             "vmulss  $tmp2,$tmp2,$tmp\n\t"
       
  3032             "pshufd  $tmp,$src2,0x02\n\t"
       
  3033             "vmulss  $tmp2,$tmp2,$tmp\n\t"
       
  3034             "pshufd  $tmp,$src2,0x03\n\t"
       
  3035             "vmulss  $tmp2,$tmp2,$tmp\n\t"
       
  3036             "vextractf128  $tmp3,$src2\n\t"
       
  3037             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
       
  3038             "pshufd  $tmp,$tmp3,0x01\n\t"
       
  3039             "vmulss  $tmp2,$tmp2,$tmp\n\t"
       
  3040             "pshufd  $tmp,$tmp3,0x02\n\t"
       
  3041             "vmulss  $tmp2,$tmp2,$tmp\n\t"
       
  3042             "pshufd  $tmp,$tmp3,0x03\n\t"
       
  3043             "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
       
  3044   ins_encode %{
       
  3045     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  3046     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
       
  3047     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3048     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
       
  3049     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3050     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
       
  3051     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3052     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
       
  3053     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
       
  3054     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
       
  3055     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3056     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
       
  3057     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3058     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
       
  3059     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3060   %}
       
  3061   ins_pipe( pipe_slow );
       
  3062 %}
       
  3063 
       
  3064 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
       
  3065   predicate(UseSSE >= 1 && UseAVX == 0);
       
  3066   match(Set dst (MulReductionVD src1 src2));
       
  3067   effect(TEMP tmp, TEMP dst);
       
  3068   format %{ "movdqu  $tmp,$src1\n\t"
       
  3069             "mulsd   $tmp,$src2\n\t"
       
  3070             "pshufd  $dst,$src2,0xE\n\t"
       
  3071             "mulsd   $dst,$tmp\t! add reduction2D" %}
       
  3072   ins_encode %{
       
  3073     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
       
  3074     __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
       
  3075     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  3076     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
       
  3077   %}
       
  3078   ins_pipe( pipe_slow );
       
  3079 %}
       
  3080 
       
  3081 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
       
  3082   predicate(UseAVX > 0);
       
  3083   match(Set dst (MulReductionVD src1 src2));
       
  3084   effect(TEMP tmp, TEMP tmp2);
       
  3085   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
       
  3086             "pshufd  $tmp,$src2,0xE\n\t"
       
  3087             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
       
  3088   ins_encode %{
       
  3089     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  3090     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  3091     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3092   %}
       
  3093   ins_pipe( pipe_slow );
       
  3094 %}
       
  3095 
       
  3096 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
       
  3097   predicate(UseAVX > 0);
       
  3098   match(Set dst (MulReductionVD src1 src2));
       
  3099   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
       
  3100   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
       
  3101             "pshufd  $tmp,$src2,0xE\n\t"
       
  3102             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
       
  3103             "vextractf128  $tmp3,$src2\n\t"
       
  3104             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
       
  3105             "pshufd  $tmp,$tmp3,0xE\n\t"
       
  3106             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
       
  3107   ins_encode %{
       
  3108     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
       
  3109     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
       
  3110     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3111     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
       
  3112     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
       
  3113     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
       
  3114     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
       
  3115   %}
       
  3116   ins_pipe( pipe_slow );
       
  3117 %}
       
  3118 
  2535 // ====================VECTOR ARITHMETIC=======================================
  3119 // ====================VECTOR ARITHMETIC=======================================
  2536 
  3120 
  2537 // --------------------------------- ADD --------------------------------------
  3121 // --------------------------------- ADD --------------------------------------
  2538 
  3122 
  2539 // Bytes vector add
  3123 // Bytes vector add