2396 __ ret(lr); |
2396 __ ret(lr); |
2397 |
2397 |
2398 return start; |
2398 return start; |
2399 } |
2399 } |
2400 |
2400 |
|
2401 /*** |
|
2402 * Arguments: |
|
2403 * |
|
2404 * Inputs: |
|
2405 * c_rarg0 - int adler |
|
2406 * c_rarg1 - byte* buff |
|
2407 * c_rarg2 - int len |
|
2408 * |
|
2409 * Output: |
|
2410 * c_rarg0 - int adler result |
|
2411 */ |
|
2412 address generate_updateBytesAdler32() { |
|
2413 __ align(CodeEntryAlignment); |
|
2414 StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32"); |
|
2415 address start = __ pc(); |
|
2416 |
|
2417 Label L_simple_by1_loop, L_nmax, L_nmax_loop, L_by16, L_by16_loop, L_by1_loop, L_do_mod, L_combine, L_by1; |
|
2418 |
|
2419 // Aliases |
|
2420 Register adler = c_rarg0; |
|
2421 Register s1 = c_rarg0; |
|
2422 Register s2 = c_rarg3; |
|
2423 Register buff = c_rarg1; |
|
2424 Register len = c_rarg2; |
|
2425 Register nmax = r4; |
|
2426 Register base = r5; |
|
2427 Register count = r6; |
|
2428 Register temp0 = rscratch1; |
|
2429 Register temp1 = rscratch2; |
|
2430 Register temp2 = r7; |
|
2431 |
|
2432 // Max number of bytes we can process before having to take the mod |
|
2433 // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 |
|
2434 unsigned long BASE = 0xfff1; |
|
2435 unsigned long NMAX = 0x15B0; |
|
2436 |
|
2437 __ mov(base, BASE); |
|
2438 __ mov(nmax, NMAX); |
|
2439 |
|
2440 // s1 is initialized to the lower 16 bits of adler |
|
2441 // s2 is initialized to the upper 16 bits of adler |
|
2442 __ ubfx(s2, adler, 16, 16); // s2 = ((adler >> 16) & 0xffff) |
|
2443 __ uxth(s1, adler); // s1 = (adler & 0xffff) |
|
2444 |
|
2445 // The pipelined loop needs at least 16 elements for 1 iteration |
|
2446 // It does check this, but it is more effective to skip to the cleanup loop |
|
2447 __ cmp(len, 16); |
|
2448 __ br(Assembler::HS, L_nmax); |
|
2449 __ cbz(len, L_combine); |
|
2450 |
|
2451 __ bind(L_simple_by1_loop); |
|
2452 __ ldrb(temp0, Address(__ post(buff, 1))); |
|
2453 __ add(s1, s1, temp0); |
|
2454 __ add(s2, s2, s1); |
|
2455 __ subs(len, len, 1); |
|
2456 __ br(Assembler::HI, L_simple_by1_loop); |
|
2457 |
|
2458 // s1 = s1 % BASE |
|
2459 __ subs(temp0, s1, base); |
|
2460 __ csel(s1, temp0, s1, Assembler::HS); |
|
2461 |
|
2462 // s2 = s2 % BASE |
|
2463 __ lsr(temp0, s2, 16); |
|
2464 __ lsl(temp1, temp0, 4); |
|
2465 __ sub(temp1, temp1, temp0); |
|
2466 __ add(s2, temp1, s2, ext::uxth); |
|
2467 |
|
2468 __ subs(temp0, s2, base); |
|
2469 __ csel(s2, temp0, s2, Assembler::HS); |
|
2470 |
|
2471 __ b(L_combine); |
|
2472 |
|
2473 __ bind(L_nmax); |
|
2474 __ subs(len, len, nmax); |
|
2475 __ sub(count, nmax, 16); |
|
2476 __ br(Assembler::LO, L_by16); |
|
2477 |
|
2478 __ bind(L_nmax_loop); |
|
2479 |
|
2480 __ ldp(temp0, temp1, Address(__ post(buff, 16))); |
|
2481 |
|
2482 __ add(s1, s1, temp0, ext::uxtb); |
|
2483 __ ubfx(temp2, temp0, 8, 8); |
|
2484 __ add(s2, s2, s1); |
|
2485 __ add(s1, s1, temp2); |
|
2486 __ ubfx(temp2, temp0, 16, 8); |
|
2487 __ add(s2, s2, s1); |
|
2488 __ add(s1, s1, temp2); |
|
2489 __ ubfx(temp2, temp0, 24, 8); |
|
2490 __ add(s2, s2, s1); |
|
2491 __ add(s1, s1, temp2); |
|
2492 __ ubfx(temp2, temp0, 32, 8); |
|
2493 __ add(s2, s2, s1); |
|
2494 __ add(s1, s1, temp2); |
|
2495 __ ubfx(temp2, temp0, 40, 8); |
|
2496 __ add(s2, s2, s1); |
|
2497 __ add(s1, s1, temp2); |
|
2498 __ ubfx(temp2, temp0, 48, 8); |
|
2499 __ add(s2, s2, s1); |
|
2500 __ add(s1, s1, temp2); |
|
2501 __ add(s2, s2, s1); |
|
2502 __ add(s1, s1, temp0, Assembler::LSR, 56); |
|
2503 __ add(s2, s2, s1); |
|
2504 |
|
2505 __ add(s1, s1, temp1, ext::uxtb); |
|
2506 __ ubfx(temp2, temp1, 8, 8); |
|
2507 __ add(s2, s2, s1); |
|
2508 __ add(s1, s1, temp2); |
|
2509 __ ubfx(temp2, temp1, 16, 8); |
|
2510 __ add(s2, s2, s1); |
|
2511 __ add(s1, s1, temp2); |
|
2512 __ ubfx(temp2, temp1, 24, 8); |
|
2513 __ add(s2, s2, s1); |
|
2514 __ add(s1, s1, temp2); |
|
2515 __ ubfx(temp2, temp1, 32, 8); |
|
2516 __ add(s2, s2, s1); |
|
2517 __ add(s1, s1, temp2); |
|
2518 __ ubfx(temp2, temp1, 40, 8); |
|
2519 __ add(s2, s2, s1); |
|
2520 __ add(s1, s1, temp2); |
|
2521 __ ubfx(temp2, temp1, 48, 8); |
|
2522 __ add(s2, s2, s1); |
|
2523 __ add(s1, s1, temp2); |
|
2524 __ add(s2, s2, s1); |
|
2525 __ add(s1, s1, temp1, Assembler::LSR, 56); |
|
2526 __ add(s2, s2, s1); |
|
2527 |
|
2528 __ subs(count, count, 16); |
|
2529 __ br(Assembler::HS, L_nmax_loop); |
|
2530 |
|
2531 // s1 = s1 % BASE |
|
2532 __ lsr(temp0, s1, 16); |
|
2533 __ lsl(temp1, temp0, 4); |
|
2534 __ sub(temp1, temp1, temp0); |
|
2535 __ add(temp1, temp1, s1, ext::uxth); |
|
2536 |
|
2537 __ lsr(temp0, temp1, 16); |
|
2538 __ lsl(s1, temp0, 4); |
|
2539 __ sub(s1, s1, temp0); |
|
2540 __ add(s1, s1, temp1, ext:: uxth); |
|
2541 |
|
2542 __ subs(temp0, s1, base); |
|
2543 __ csel(s1, temp0, s1, Assembler::HS); |
|
2544 |
|
2545 // s2 = s2 % BASE |
|
2546 __ lsr(temp0, s2, 16); |
|
2547 __ lsl(temp1, temp0, 4); |
|
2548 __ sub(temp1, temp1, temp0); |
|
2549 __ add(temp1, temp1, s2, ext::uxth); |
|
2550 |
|
2551 __ lsr(temp0, temp1, 16); |
|
2552 __ lsl(s2, temp0, 4); |
|
2553 __ sub(s2, s2, temp0); |
|
2554 __ add(s2, s2, temp1, ext:: uxth); |
|
2555 |
|
2556 __ subs(temp0, s2, base); |
|
2557 __ csel(s2, temp0, s2, Assembler::HS); |
|
2558 |
|
2559 __ subs(len, len, nmax); |
|
2560 __ sub(count, nmax, 16); |
|
2561 __ br(Assembler::HS, L_nmax_loop); |
|
2562 |
|
2563 __ bind(L_by16); |
|
2564 __ adds(len, len, count); |
|
2565 __ br(Assembler::LO, L_by1); |
|
2566 |
|
2567 __ bind(L_by16_loop); |
|
2568 |
|
2569 __ ldp(temp0, temp1, Address(__ post(buff, 16))); |
|
2570 |
|
2571 __ add(s1, s1, temp0, ext::uxtb); |
|
2572 __ ubfx(temp2, temp0, 8, 8); |
|
2573 __ add(s2, s2, s1); |
|
2574 __ add(s1, s1, temp2); |
|
2575 __ ubfx(temp2, temp0, 16, 8); |
|
2576 __ add(s2, s2, s1); |
|
2577 __ add(s1, s1, temp2); |
|
2578 __ ubfx(temp2, temp0, 24, 8); |
|
2579 __ add(s2, s2, s1); |
|
2580 __ add(s1, s1, temp2); |
|
2581 __ ubfx(temp2, temp0, 32, 8); |
|
2582 __ add(s2, s2, s1); |
|
2583 __ add(s1, s1, temp2); |
|
2584 __ ubfx(temp2, temp0, 40, 8); |
|
2585 __ add(s2, s2, s1); |
|
2586 __ add(s1, s1, temp2); |
|
2587 __ ubfx(temp2, temp0, 48, 8); |
|
2588 __ add(s2, s2, s1); |
|
2589 __ add(s1, s1, temp2); |
|
2590 __ add(s2, s2, s1); |
|
2591 __ add(s1, s1, temp0, Assembler::LSR, 56); |
|
2592 __ add(s2, s2, s1); |
|
2593 |
|
2594 __ add(s1, s1, temp1, ext::uxtb); |
|
2595 __ ubfx(temp2, temp1, 8, 8); |
|
2596 __ add(s2, s2, s1); |
|
2597 __ add(s1, s1, temp2); |
|
2598 __ ubfx(temp2, temp1, 16, 8); |
|
2599 __ add(s2, s2, s1); |
|
2600 __ add(s1, s1, temp2); |
|
2601 __ ubfx(temp2, temp1, 24, 8); |
|
2602 __ add(s2, s2, s1); |
|
2603 __ add(s1, s1, temp2); |
|
2604 __ ubfx(temp2, temp1, 32, 8); |
|
2605 __ add(s2, s2, s1); |
|
2606 __ add(s1, s1, temp2); |
|
2607 __ ubfx(temp2, temp1, 40, 8); |
|
2608 __ add(s2, s2, s1); |
|
2609 __ add(s1, s1, temp2); |
|
2610 __ ubfx(temp2, temp1, 48, 8); |
|
2611 __ add(s2, s2, s1); |
|
2612 __ add(s1, s1, temp2); |
|
2613 __ add(s2, s2, s1); |
|
2614 __ add(s1, s1, temp1, Assembler::LSR, 56); |
|
2615 __ add(s2, s2, s1); |
|
2616 |
|
2617 __ subs(len, len, 16); |
|
2618 __ br(Assembler::HS, L_by16_loop); |
|
2619 |
|
2620 __ bind(L_by1); |
|
2621 __ adds(len, len, 15); |
|
2622 __ br(Assembler::LO, L_do_mod); |
|
2623 |
|
2624 __ bind(L_by1_loop); |
|
2625 __ ldrb(temp0, Address(__ post(buff, 1))); |
|
2626 __ add(s1, temp0, s1); |
|
2627 __ add(s2, s2, s1); |
|
2628 __ subs(len, len, 1); |
|
2629 __ br(Assembler::HS, L_by1_loop); |
|
2630 |
|
2631 __ bind(L_do_mod); |
|
2632 // s1 = s1 % BASE |
|
2633 __ lsr(temp0, s1, 16); |
|
2634 __ lsl(temp1, temp0, 4); |
|
2635 __ sub(temp1, temp1, temp0); |
|
2636 __ add(temp1, temp1, s1, ext::uxth); |
|
2637 |
|
2638 __ lsr(temp0, temp1, 16); |
|
2639 __ lsl(s1, temp0, 4); |
|
2640 __ sub(s1, s1, temp0); |
|
2641 __ add(s1, s1, temp1, ext:: uxth); |
|
2642 |
|
2643 __ subs(temp0, s1, base); |
|
2644 __ csel(s1, temp0, s1, Assembler::HS); |
|
2645 |
|
2646 // s2 = s2 % BASE |
|
2647 __ lsr(temp0, s2, 16); |
|
2648 __ lsl(temp1, temp0, 4); |
|
2649 __ sub(temp1, temp1, temp0); |
|
2650 __ add(temp1, temp1, s2, ext::uxth); |
|
2651 |
|
2652 __ lsr(temp0, temp1, 16); |
|
2653 __ lsl(s2, temp0, 4); |
|
2654 __ sub(s2, s2, temp0); |
|
2655 __ add(s2, s2, temp1, ext:: uxth); |
|
2656 |
|
2657 __ subs(temp0, s2, base); |
|
2658 __ csel(s2, temp0, s2, Assembler::HS); |
|
2659 |
|
2660 // Combine lower bits and higher bits |
|
2661 __ bind(L_combine); |
|
2662 __ orr(s1, s1, s2, Assembler::LSL, 16); // adler = s1 | (s2 << 16) |
|
2663 |
|
2664 __ ret(lr); |
|
2665 |
|
2666 return start; |
|
2667 } |
|
2668 |
2401 /** |
2669 /** |
2402 * Arguments: |
2670 * Arguments: |
2403 * |
2671 * |
2404 * Input: |
2672 * Input: |
2405 * c_rarg0 - x address |
2673 * c_rarg0 - x address |