hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
changeset 33198 b37ad9fbf681
parent 33096 d38227d62ef4
parent 33176 54393049bf1e
child 34664 41c821224dd7
child 35119 7af8d9f08a25
equal deleted inserted replaced
33155:73bf16b22e89 33198:b37ad9fbf681
  2396     __ ret(lr);
  2396     __ ret(lr);
  2397 
  2397 
  2398     return start;
  2398     return start;
  2399   }
  2399   }
  2400 
  2400 
       
  2401   /***
       
  2402    *  Arguments:
       
  2403    *
       
  2404    *  Inputs:
       
  2405    *   c_rarg0   - int   adler
       
  2406    *   c_rarg1   - byte* buff
       
  2407    *   c_rarg2   - int   len
       
  2408    *
       
  2409    * Output:
       
  2410    *   c_rarg0   - int adler result
       
  2411    */
       
  2412   address generate_updateBytesAdler32() {
       
  2413     __ align(CodeEntryAlignment);
       
  2414     StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
       
  2415     address start = __ pc();
       
  2416 
       
  2417     Label L_simple_by1_loop, L_nmax, L_nmax_loop, L_by16, L_by16_loop, L_by1_loop, L_do_mod, L_combine, L_by1;
       
  2418 
       
  2419     // Aliases
       
  2420     Register adler  = c_rarg0;
       
  2421     Register s1     = c_rarg0;
       
  2422     Register s2     = c_rarg3;
       
  2423     Register buff   = c_rarg1;
       
  2424     Register len    = c_rarg2;
       
  2425     Register nmax  = r4;
       
  2426     Register base = r5;
       
  2427     Register count = r6;
       
  2428     Register temp0 = rscratch1;
       
  2429     Register temp1 = rscratch2;
       
  2430     Register temp2 = r7;
       
  2431 
       
  2432     // Max number of bytes we can process before having to take the mod
       
  2433     // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
       
  2434     unsigned long BASE = 0xfff1;
       
  2435     unsigned long NMAX = 0x15B0;
       
  2436 
       
  2437     __ mov(base, BASE);
       
  2438     __ mov(nmax, NMAX);
       
  2439 
       
  2440     // s1 is initialized to the lower 16 bits of adler
       
  2441     // s2 is initialized to the upper 16 bits of adler
       
  2442     __ ubfx(s2, adler, 16, 16);  // s2 = ((adler >> 16) & 0xffff)
       
  2443     __ uxth(s1, adler);          // s1 = (adler & 0xffff)
       
  2444 
       
  2445     // The pipelined loop needs at least 16 elements for 1 iteration
       
  2446     // It does check this, but it is more effective to skip to the cleanup loop
       
  2447     __ cmp(len, 16);
       
  2448     __ br(Assembler::HS, L_nmax);
       
  2449     __ cbz(len, L_combine);
       
  2450 
       
  2451     __ bind(L_simple_by1_loop);
       
  2452     __ ldrb(temp0, Address(__ post(buff, 1)));
       
  2453     __ add(s1, s1, temp0);
       
  2454     __ add(s2, s2, s1);
       
  2455     __ subs(len, len, 1);
       
  2456     __ br(Assembler::HI, L_simple_by1_loop);
       
  2457 
       
  2458     // s1 = s1 % BASE
       
  2459     __ subs(temp0, s1, base);
       
  2460     __ csel(s1, temp0, s1, Assembler::HS);
       
  2461 
       
  2462     // s2 = s2 % BASE
       
  2463     __ lsr(temp0, s2, 16);
       
  2464     __ lsl(temp1, temp0, 4);
       
  2465     __ sub(temp1, temp1, temp0);
       
  2466     __ add(s2, temp1, s2, ext::uxth);
       
  2467 
       
  2468     __ subs(temp0, s2, base);
       
  2469     __ csel(s2, temp0, s2, Assembler::HS);
       
  2470 
       
  2471     __ b(L_combine);
       
  2472 
       
  2473     __ bind(L_nmax);
       
  2474     __ subs(len, len, nmax);
       
  2475     __ sub(count, nmax, 16);
       
  2476     __ br(Assembler::LO, L_by16);
       
  2477 
       
  2478     __ bind(L_nmax_loop);
       
  2479 
       
  2480     __ ldp(temp0, temp1, Address(__ post(buff, 16)));
       
  2481 
       
  2482     __ add(s1, s1, temp0, ext::uxtb);
       
  2483     __ ubfx(temp2, temp0, 8, 8);
       
  2484     __ add(s2, s2, s1);
       
  2485     __ add(s1, s1, temp2);
       
  2486     __ ubfx(temp2, temp0, 16, 8);
       
  2487     __ add(s2, s2, s1);
       
  2488     __ add(s1, s1, temp2);
       
  2489     __ ubfx(temp2, temp0, 24, 8);
       
  2490     __ add(s2, s2, s1);
       
  2491     __ add(s1, s1, temp2);
       
  2492     __ ubfx(temp2, temp0, 32, 8);
       
  2493     __ add(s2, s2, s1);
       
  2494     __ add(s1, s1, temp2);
       
  2495     __ ubfx(temp2, temp0, 40, 8);
       
  2496     __ add(s2, s2, s1);
       
  2497     __ add(s1, s1, temp2);
       
  2498     __ ubfx(temp2, temp0, 48, 8);
       
  2499     __ add(s2, s2, s1);
       
  2500     __ add(s1, s1, temp2);
       
  2501     __ add(s2, s2, s1);
       
  2502     __ add(s1, s1, temp0, Assembler::LSR, 56);
       
  2503     __ add(s2, s2, s1);
       
  2504 
       
  2505     __ add(s1, s1, temp1, ext::uxtb);
       
  2506     __ ubfx(temp2, temp1, 8, 8);
       
  2507     __ add(s2, s2, s1);
       
  2508     __ add(s1, s1, temp2);
       
  2509     __ ubfx(temp2, temp1, 16, 8);
       
  2510     __ add(s2, s2, s1);
       
  2511     __ add(s1, s1, temp2);
       
  2512     __ ubfx(temp2, temp1, 24, 8);
       
  2513     __ add(s2, s2, s1);
       
  2514     __ add(s1, s1, temp2);
       
  2515     __ ubfx(temp2, temp1, 32, 8);
       
  2516     __ add(s2, s2, s1);
       
  2517     __ add(s1, s1, temp2);
       
  2518     __ ubfx(temp2, temp1, 40, 8);
       
  2519     __ add(s2, s2, s1);
       
  2520     __ add(s1, s1, temp2);
       
  2521     __ ubfx(temp2, temp1, 48, 8);
       
  2522     __ add(s2, s2, s1);
       
  2523     __ add(s1, s1, temp2);
       
  2524     __ add(s2, s2, s1);
       
  2525     __ add(s1, s1, temp1, Assembler::LSR, 56);
       
  2526     __ add(s2, s2, s1);
       
  2527 
       
  2528     __ subs(count, count, 16);
       
  2529     __ br(Assembler::HS, L_nmax_loop);
       
  2530 
       
  2531     // s1 = s1 % BASE
       
  2532     __ lsr(temp0, s1, 16);
       
  2533     __ lsl(temp1, temp0, 4);
       
  2534     __ sub(temp1, temp1, temp0);
       
  2535     __ add(temp1, temp1, s1, ext::uxth);
       
  2536 
       
  2537     __ lsr(temp0, temp1, 16);
       
  2538     __ lsl(s1, temp0, 4);
       
  2539     __ sub(s1, s1, temp0);
       
  2540     __ add(s1, s1, temp1, ext:: uxth);
       
  2541 
       
  2542     __ subs(temp0, s1, base);
       
  2543     __ csel(s1, temp0, s1, Assembler::HS);
       
  2544 
       
  2545     // s2 = s2 % BASE
       
  2546     __ lsr(temp0, s2, 16);
       
  2547     __ lsl(temp1, temp0, 4);
       
  2548     __ sub(temp1, temp1, temp0);
       
  2549     __ add(temp1, temp1, s2, ext::uxth);
       
  2550 
       
  2551     __ lsr(temp0, temp1, 16);
       
  2552     __ lsl(s2, temp0, 4);
       
  2553     __ sub(s2, s2, temp0);
       
  2554     __ add(s2, s2, temp1, ext:: uxth);
       
  2555 
       
  2556     __ subs(temp0, s2, base);
       
  2557     __ csel(s2, temp0, s2, Assembler::HS);
       
  2558 
       
  2559     __ subs(len, len, nmax);
       
  2560     __ sub(count, nmax, 16);
       
  2561     __ br(Assembler::HS, L_nmax_loop);
       
  2562 
       
  2563     __ bind(L_by16);
       
  2564     __ adds(len, len, count);
       
  2565     __ br(Assembler::LO, L_by1);
       
  2566 
       
  2567     __ bind(L_by16_loop);
       
  2568 
       
  2569     __ ldp(temp0, temp1, Address(__ post(buff, 16)));
       
  2570 
       
  2571     __ add(s1, s1, temp0, ext::uxtb);
       
  2572     __ ubfx(temp2, temp0, 8, 8);
       
  2573     __ add(s2, s2, s1);
       
  2574     __ add(s1, s1, temp2);
       
  2575     __ ubfx(temp2, temp0, 16, 8);
       
  2576     __ add(s2, s2, s1);
       
  2577     __ add(s1, s1, temp2);
       
  2578     __ ubfx(temp2, temp0, 24, 8);
       
  2579     __ add(s2, s2, s1);
       
  2580     __ add(s1, s1, temp2);
       
  2581     __ ubfx(temp2, temp0, 32, 8);
       
  2582     __ add(s2, s2, s1);
       
  2583     __ add(s1, s1, temp2);
       
  2584     __ ubfx(temp2, temp0, 40, 8);
       
  2585     __ add(s2, s2, s1);
       
  2586     __ add(s1, s1, temp2);
       
  2587     __ ubfx(temp2, temp0, 48, 8);
       
  2588     __ add(s2, s2, s1);
       
  2589     __ add(s1, s1, temp2);
       
  2590     __ add(s2, s2, s1);
       
  2591     __ add(s1, s1, temp0, Assembler::LSR, 56);
       
  2592     __ add(s2, s2, s1);
       
  2593 
       
  2594     __ add(s1, s1, temp1, ext::uxtb);
       
  2595     __ ubfx(temp2, temp1, 8, 8);
       
  2596     __ add(s2, s2, s1);
       
  2597     __ add(s1, s1, temp2);
       
  2598     __ ubfx(temp2, temp1, 16, 8);
       
  2599     __ add(s2, s2, s1);
       
  2600     __ add(s1, s1, temp2);
       
  2601     __ ubfx(temp2, temp1, 24, 8);
       
  2602     __ add(s2, s2, s1);
       
  2603     __ add(s1, s1, temp2);
       
  2604     __ ubfx(temp2, temp1, 32, 8);
       
  2605     __ add(s2, s2, s1);
       
  2606     __ add(s1, s1, temp2);
       
  2607     __ ubfx(temp2, temp1, 40, 8);
       
  2608     __ add(s2, s2, s1);
       
  2609     __ add(s1, s1, temp2);
       
  2610     __ ubfx(temp2, temp1, 48, 8);
       
  2611     __ add(s2, s2, s1);
       
  2612     __ add(s1, s1, temp2);
       
  2613     __ add(s2, s2, s1);
       
  2614     __ add(s1, s1, temp1, Assembler::LSR, 56);
       
  2615     __ add(s2, s2, s1);
       
  2616 
       
  2617     __ subs(len, len, 16);
       
  2618     __ br(Assembler::HS, L_by16_loop);
       
  2619 
       
  2620     __ bind(L_by1);
       
  2621     __ adds(len, len, 15);
       
  2622     __ br(Assembler::LO, L_do_mod);
       
  2623 
       
  2624     __ bind(L_by1_loop);
       
  2625     __ ldrb(temp0, Address(__ post(buff, 1)));
       
  2626     __ add(s1, temp0, s1);
       
  2627     __ add(s2, s2, s1);
       
  2628     __ subs(len, len, 1);
       
  2629     __ br(Assembler::HS, L_by1_loop);
       
  2630 
       
  2631     __ bind(L_do_mod);
       
  2632     // s1 = s1 % BASE
       
  2633     __ lsr(temp0, s1, 16);
       
  2634     __ lsl(temp1, temp0, 4);
       
  2635     __ sub(temp1, temp1, temp0);
       
  2636     __ add(temp1, temp1, s1, ext::uxth);
       
  2637 
       
  2638     __ lsr(temp0, temp1, 16);
       
  2639     __ lsl(s1, temp0, 4);
       
  2640     __ sub(s1, s1, temp0);
       
  2641     __ add(s1, s1, temp1, ext:: uxth);
       
  2642 
       
  2643     __ subs(temp0, s1, base);
       
  2644     __ csel(s1, temp0, s1, Assembler::HS);
       
  2645 
       
  2646     // s2 = s2 % BASE
       
  2647     __ lsr(temp0, s2, 16);
       
  2648     __ lsl(temp1, temp0, 4);
       
  2649     __ sub(temp1, temp1, temp0);
       
  2650     __ add(temp1, temp1, s2, ext::uxth);
       
  2651 
       
  2652     __ lsr(temp0, temp1, 16);
       
  2653     __ lsl(s2, temp0, 4);
       
  2654     __ sub(s2, s2, temp0);
       
  2655     __ add(s2, s2, temp1, ext:: uxth);
       
  2656 
       
  2657     __ subs(temp0, s2, base);
       
  2658     __ csel(s2, temp0, s2, Assembler::HS);
       
  2659 
       
  2660     // Combine lower bits and higher bits
       
  2661     __ bind(L_combine);
       
  2662     __ orr(s1, s1, s2, Assembler::LSL, 16); // adler = s1 | (s2 << 16)
       
  2663 
       
  2664     __ ret(lr);
       
  2665 
       
  2666     return start;
       
  2667   }
       
  2668 
  2401   /**
  2669   /**
  2402    *  Arguments:
  2670    *  Arguments:
  2403    *
  2671    *
  2404    *  Input:
  2672    *  Input:
  2405    *    c_rarg0   - x address
  2673    *    c_rarg0   - x address
  3614 
  3882 
  3615     if (UseCRC32CIntrinsics) {
  3883     if (UseCRC32CIntrinsics) {
  3616       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
  3884       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
  3617     }
  3885     }
  3618 
  3886 
       
  3887     // generate Adler32 intrinsics code
       
  3888     if (UseAdler32Intrinsics) {
       
  3889       StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
       
  3890     }
       
  3891 
  3619     // Safefetch stubs.
  3892     // Safefetch stubs.
  3620     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
  3893     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
  3621                                                        &StubRoutines::_safefetch32_fault_pc,
  3894                                                        &StubRoutines::_safefetch32_fault_pc,
  3622                                                        &StubRoutines::_safefetch32_continuation_pc);
  3895                                                        &StubRoutines::_safefetch32_continuation_pc);
  3623     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
  3896     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,