hotspot/src/os_cpu/linux_arm/vm/linux_arm_32.s
changeset 42664 29142a56c193
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_arm/vm/linux_arm_32.s	Mon Dec 19 12:39:01 2016 -0500
@@ -0,0 +1,513 @@
+# 
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+# 
+
+	
+        # NOTE WELL!  The _Copy functions are called directly
+	# from server-compiler-generated code via CallLeafNoFP,
+	# which means that they *must* either not use floating
+	# point or use it in the same manner as does the server
+	# compiler.
+	
+        .globl _Copy_conjoint_bytes
+	.type _Copy_conjoint_bytes, %function
+        .globl _Copy_arrayof_conjoint_bytes
+	.type _Copy_arrayof_conjoint_bytes, %function
+	.globl _Copy_disjoint_words
+	.type _Copy_disjoint_words, %function
+	.globl _Copy_conjoint_words
+	.type _Copy_conjoint_words, %function
+        .globl _Copy_conjoint_jshorts_atomic
+	.type _Copy_conjoint_jshorts_atomic, %function
+	.globl _Copy_arrayof_conjoint_jshorts
+	.type _Copy_arrayof_conjoint_jshorts, %function
+        .globl _Copy_conjoint_jints_atomic
+	.type _Copy_conjoint_jints_atomic, %function
+        .globl _Copy_arrayof_conjoint_jints
+	.type _Copy_arrayof_conjoint_jints, %function
+	.globl _Copy_conjoint_jlongs_atomic
+	.type _Copy_conjoint_jlongs_atomic, %function
+	.globl _Copy_arrayof_conjoint_jlongs
+	.type _Copy_arrayof_conjoint_jlongs, %function
+
+	.text
+        .globl  SpinPause
+        .type SpinPause, %function
+SpinPause:
+        bx      LR
+
+        # Support for void Copy::conjoint_bytes(void* from,
+        #                                       void* to,
+        #                                       size_t count)
+_Copy_conjoint_bytes:
+        swi     0x9f0001
+
+        # Support for void Copy::arrayof_conjoint_bytes(void* from,
+        #                                               void* to,
+        #                                               size_t count)
+_Copy_arrayof_conjoint_bytes:
+        swi     0x9f0001
+
+
+        # Support for void Copy::disjoint_words(void* from,
+        #                                       void* to,
+        #                                       size_t count)
+_Copy_disjoint_words:
+        stmdb    sp!, {r3 - r9, ip}
+ 
+        cmp     r2, #0
+        beq     disjoint_words_finish
+
+        pld     [r1, #0]
+        cmp     r2, #12
+        ble disjoint_words_small
+
+        .align 3
+dw_f2b_loop_32:
+        subs    r2, #32
+	blt	dw_f2b_loop_32_finish
+        ldmia r1!, {r3 - r9, ip}
+        nop
+	pld     [r1]
+        stmia r0!, {r3 - r9, ip}
+        bgt     dw_f2b_loop_32
+dw_f2b_loop_32_finish:
+        addlts  r2, #32
+        beq     disjoint_words_finish
+        cmp     r2, #16
+	blt	disjoint_words_small
+        ldmia r1!, {r3 - r6}
+        subge   r2, r2, #16
+        stmia r0!, {r3 - r6}
+        beq     disjoint_words_finish
+disjoint_words_small:
+        cmp     r2, #8
+        ldr     r7, [r1], #4
+        ldrge   r8, [r1], #4
+        ldrgt   r9, [r1], #4
+        str     r7, [r0], #4
+        strge   r8, [r0], #4
+        strgt   r9, [r0], #4
+
+disjoint_words_finish:
+        ldmia   sp!, {r3 - r9, ip}
+        bx      lr
+
+
+        # Support for void Copy::conjoint_words(void* from,
+        #                                       void* to,
+        #                                       size_t count)
+_Copy_conjoint_words:
+        stmdb    sp!, {r3 - r9, ip}
+
+	cmp	r2, #0
+	beq	conjoint_words_finish
+
+        pld     [r1, #0]
+        cmp     r2, #12
+        ble conjoint_words_small
+
+        subs    r3, r0, r1
+        cmphi   r2, r3
+        bhi     cw_b2f_copy
+        .align 3
+cw_f2b_loop_32:
+        subs    r2, #32
+	blt	cw_f2b_loop_32_finish
+        ldmia r1!, {r3 - r9, ip}
+        nop
+	pld     [r1]
+        stmia r0!, {r3 - r9, ip}
+        bgt     cw_f2b_loop_32
+cw_f2b_loop_32_finish:
+        addlts  r2, #32
+        beq     conjoint_words_finish
+        cmp     r2, #16
+	blt	conjoint_words_small
+        ldmia r1!, {r3 - r6}
+        subge   r2, r2, #16
+        stmia r0!, {r3 - r6}
+        beq     conjoint_words_finish
+conjoint_words_small:
+        cmp     r2, #8
+        ldr     r7, [r1], #4
+        ldrge   r8, [r1], #4
+        ldrgt   r9, [r1], #4
+        str     r7, [r0], #4
+        strge   r8, [r0], #4
+        strgt   r9, [r0], #4
+        b       conjoint_words_finish
+
+	# Src and dest overlap, copy in a descending order
+cw_b2f_copy:
+        add     r1, r2
+        pld     [r1, #-32]
+        add     r0, r2
+        .align 3
+cw_b2f_loop_32:
+        subs    r2, #32
+	blt	cw_b2f_loop_32_finish
+        ldmdb r1!, {r3-r9,ip}
+        nop
+	pld     [r1, #-32]
+        stmdb r0!, {r3-r9,ip}
+        bgt     cw_b2f_loop_32
+cw_b2f_loop_32_finish:
+        addlts  r2, #32
+        beq     conjoint_words_finish
+        cmp     r2, #16
+	blt	cw_b2f_copy_small
+        ldmdb r1!, {r3 - r6}
+        subge   r2, r2, #16
+        stmdb r0!, {r3 - r6}
+        beq     conjoint_words_finish
+cw_b2f_copy_small:
+        cmp     r2, #8
+        ldr     r7, [r1, #-4]!
+        ldrge   r8, [r1, #-4]!
+        ldrgt   r9, [r1, #-4]!
+        str     r7, [r0, #-4]!
+        strge   r8, [r0, #-4]!
+        strgt   r9, [r0, #-4]!
+
+conjoint_words_finish:
+        ldmia   sp!, {r3 - r9, ip}
+        bx      lr
+
+        # Support for void Copy::conjoint_jshorts_atomic(void* from,
+        #                                                void* to,
+        #                                                size_t count)
+_Copy_conjoint_jshorts_atomic:
+        stmdb   sp!, {r3 - r9, ip}
+
+	cmp	r2, #0
+	beq	conjoint_shorts_finish	
+
+        subs    r3, r0, r1
+        cmphi   r2, r3
+        bhi     cs_b2f_copy
+
+        pld     [r1]
+
+        ands    r3, r0, #3
+        bne     cs_f2b_dest_u
+        ands    r3, r1, #3
+        bne     cs_f2b_src_u
+
+	# Aligned source address
+        .align 3
+cs_f2b_loop_32:
+        subs    r2, #32
+	blt	cs_f2b_loop_32_finish
+        ldmia r1!, {r3 - r9, ip}
+        nop
+        pld     [r1]
+        stmia r0!, {r3 - r9, ip}
+        bgt     cs_f2b_loop_32
+cs_f2b_loop_32_finish:
+        addlts  r2, #32
+        beq     conjoint_shorts_finish
+        movs    r6, r2, lsr #3
+        .align 3
+cs_f2b_8_loop:
+        beq     cs_f2b_4
+        ldmia   r1!, {r4-r5}
+        subs    r6, #1
+        stmia   r0!, {r4-r5}
+        bgt     cs_f2b_8_loop
+
+cs_f2b_4:
+        ands    r2, #7
+        beq     conjoint_shorts_finish
+        cmp     r2, #4
+        ldrh    r3, [r1], #2
+        ldrgeh  r4, [r1], #2
+        ldrgth  r5, [r1], #2
+        strh    r3, [r0], #2
+        strgeh  r4, [r0], #2
+        strgth  r5, [r0], #2
+        b       conjoint_shorts_finish
+
+	# Destination not aligned
+cs_f2b_dest_u:
+        ldrh    r3, [r1], #2
+        subs    r2, #2
+        strh    r3, [r0], #2
+        beq     conjoint_shorts_finish
+
+	# Check to see if source is not aligned ether
+        ands    r3, r1, #3
+        beq     cs_f2b_loop_32
+
+cs_f2b_src_u:
+        cmp     r2, #16
+        blt     cs_f2b_8_u
+
+	# Load 2 first bytes to r7 and make src ptr word aligned
+        bic     r1, #3
+        ldr     r7, [r1], #4
+
+	# Destination aligned, source not
+        mov     r8, r2, lsr #4
+        .align 3
+cs_f2b_16_u_loop:
+        mov     r3, r7, lsr #16
+        ldmia   r1!, {r4 - r7}
+        orr     r3, r3, r4, lsl #16
+        mov     r4, r4, lsr #16
+        pld     [r1]
+        orr     r4, r4, r5, lsl #16
+        mov     r5, r5, lsr #16
+        orr     r5, r5, r6, lsl #16
+        mov     r6, r6, lsr #16
+        orr     r6, r6, r7, lsl #16
+        stmia   r0!, {r3 - r6}
+        subs    r8, #1
+        bgt     cs_f2b_16_u_loop
+        ands    r2, #0xf
+        beq     conjoint_shorts_finish
+        sub     r1, #2
+
+cs_f2b_8_u:
+        cmp     r2, #8
+        blt     cs_f2b_4_u
+        ldrh    r4, [r1], #2
+        ldr     r5, [r1], #4
+        ldrh    r6, [r1], #2
+        orr     r4, r4, r5, lsl #16
+        mov     r5, r5, lsr #16
+        orr     r5, r5, r6, lsl #16
+        subs    r2, #8
+        stmia	r0!, {r4 - r5}
+cs_f2b_4_u:
+        beq     conjoint_shorts_finish
+        cmp     r2, #4
+        ldrh    r3, [r1], #2
+        ldrgeh  r4, [r1], #2
+        ldrgth  r5, [r1], #2
+        strh    r3, [r0], #2
+        strgeh  r4, [r0], #2
+        strgth  r5, [r0], #2
+        b       conjoint_shorts_finish
+
+	# Src and dest overlap, copy in a descending order
+cs_b2f_copy:
+        add     r1, r2
+        pld     [r1, #-32]
+        add     r0, r2
+
+        ands    r3, r0, #3
+        bne     cs_b2f_dest_u
+        ands    r3, r1, #3
+        bne     cs_b2f_src_u
+        .align 3
+cs_b2f_loop_32:
+        subs    r2, #32
+	blt	cs_b2f_loop_32_finish
+        ldmdb r1!, {r3-r9,ip}
+        nop
+        pld     [r1, #-32]
+        stmdb r0!, {r3-r9,ip}
+        bgt     cs_b2f_loop_32
+cs_b2f_loop_32_finish:
+        addlts  r2, #32
+        beq     conjoint_shorts_finish
+        cmp     r2, #24
+        blt     cs_b2f_16
+        ldmdb   r1!, {r3-r8}
+        sub     r2, #24
+        stmdb   r0!, {r3-r8}
+        beq     conjoint_shorts_finish
+cs_b2f_16:
+        cmp     r2, #16
+        blt     cs_b2f_8
+        ldmdb   r1!, {r3-r6}
+        sub     r2, #16
+        stmdb   r0!, {r3-r6}
+        beq     conjoint_shorts_finish
+cs_b2f_8:
+        cmp     r2, #8
+        blt     cs_b2f_all_copy
+        ldmdb   r1!, {r3-r4}
+        sub     r2, #8
+        stmdb   r0!, {r3-r4}
+        beq     conjoint_shorts_finish
+
+cs_b2f_all_copy:
+        cmp     r2, #4
+        ldrh    r3, [r1, #-2]!
+        ldrgeh  r4, [r1, #-2]!
+        ldrgth  r5, [r1, #-2]!
+        strh    r3, [r0, #-2]!
+        strgeh  r4, [r0, #-2]!
+        strgth  r5, [r0, #-2]!
+        b       conjoint_shorts_finish
+
+	# Destination not aligned
+cs_b2f_dest_u:
+        ldrh    r3, [r1, #-2]!
+        strh    r3, [r0, #-2]!
+        sub     r2, #2
+	# Check source alignment as well
+        ands    r3, r1, #3
+        beq     cs_b2f_loop_32
+
+	# Source not aligned
+cs_b2f_src_u:
+        bic     r1, #3
+        .align 3
+cs_b2f_16_loop_u:
+        subs    r2, #16
+        blt     cs_b2f_16_loop_u_finished
+        ldr     r7, [r1]
+        mov     r3, r7
+        ldmdb   r1!, {r4 - r7}
+        mov     r4, r4, lsr #16
+        orr     r4, r4, r5, lsl #16
+        pld     [r1, #-32]
+        mov     r5, r5, lsr #16
+        orr     r5, r5, r6, lsl #16
+        mov     r6, r6, lsr #16
+        orr     r6, r6, r7, lsl #16
+        mov     r7, r7, lsr #16
+        orr     r7, r7, r3, lsl #16
+        stmdb   r0!, {r4 - r7}
+        bgt     cs_b2f_16_loop_u
+        beq     conjoint_shorts_finish
+cs_b2f_16_loop_u_finished:
+        addlts  r2, #16
+        ldr     r3, [r1]
+	cmp     r2, #10
+        blt     cs_b2f_2_u_loop
+        ldmdb   r1!, {r4 - r5}
+        mov     r6, r4, lsr #16
+        orr     r6, r6, r5, lsl #16
+        mov     r7, r5, lsr #16
+        orr     r7, r7, r3, lsl #16
+        stmdb   r0!, {r6-r7}
+        sub     r2, #8
+	.align 3
+cs_b2f_2_u_loop:
+        subs    r2, #2
+        ldrh    r3, [r1], #-2
+        strh    r3, [r0, #-2]!
+        bgt     cs_b2f_2_u_loop
+
+conjoint_shorts_finish:
+        ldmia   sp!, {r3 - r9, ip}
+        bx      lr
+
+
+        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
+        #                                                 void* to,
+        #                                                 size_t count)
+_Copy_arrayof_conjoint_jshorts:
+        swi     0x9f0001
+
+        # Support for void Copy::conjoint_jints_atomic(void* from,
+        #                                              void* to,
+        #                                              size_t count)
+_Copy_conjoint_jints_atomic:
+_Copy_arrayof_conjoint_jints:
+        swi     0x9f0001
+	
+        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
+        #                                               jlong* to,
+        #                                               size_t count)
+_Copy_conjoint_jlongs_atomic:
+_Copy_arrayof_conjoint_jlongs:
+        stmdb    sp!, {r3 - r9, ip}
+
+	cmp	r2, #0
+	beq	conjoint_longs_finish
+
+        pld     [r1, #0]
+        cmp     r2, #24
+        ble conjoint_longs_small
+
+        subs    r3, r0, r1
+        cmphi   r2, r3
+        bhi     cl_b2f_copy
+        .align 3
+cl_f2b_loop_32:
+        subs    r2, #32
+	blt	cl_f2b_loop_32_finish
+        ldmia r1!, {r3 - r9, ip}
+        nop
+	pld     [r1]
+        stmia r0!, {r3 - r9, ip}
+        bgt     cl_f2b_loop_32
+cl_f2b_loop_32_finish:
+        addlts  r2, #32
+        beq     conjoint_longs_finish
+conjoint_longs_small:
+        cmp     r2, #16
+	blt	cl_f2b_copy_8
+	bgt	cl_f2b_copy_24
+        ldmia 	r1!, {r3 - r6}
+        stmia 	r0!, {r3 - r6}
+	b	conjoint_longs_finish
+cl_f2b_copy_8:
+        ldmia   r1!, {r3 - r4}
+        stmia   r0!, {r3 - r4}
+        b       conjoint_longs_finish
+cl_f2b_copy_24:
+	ldmia   r1!, {r3 - r8}
+        stmia   r0!, {r3 - r8}
+        b       conjoint_longs_finish
+
+	# Src and dest overlap, copy in a descending order
+cl_b2f_copy:
+        add     r1, r2
+        pld     [r1, #-32]
+        add     r0, r2
+        .align 3
+cl_b2f_loop_32:
+        subs    r2, #32
+	blt	cl_b2f_loop_32_finish
+        ldmdb 	r1!, {r3 - r9, ip}
+        nop
+	pld     [r1]
+        stmdb 	r0!, {r3 - r9, ip}
+        bgt     cl_b2f_loop_32
+cl_b2f_loop_32_finish:
+        addlts  r2, #32
+        beq     conjoint_longs_finish
+        cmp     r2, #16
+	blt	cl_b2f_copy_8
+	bgt	cl_b2f_copy_24
+        ldmdb   r1!, {r3 - r6}
+        stmdb   r0!, {r3 - r6}
+        b       conjoint_longs_finish
+cl_b2f_copy_8:
+	ldmdb   r1!, {r3 - r4}
+        stmdb   r0!, {r3 - r4}
+        b       conjoint_longs_finish
+cl_b2f_copy_24:
+	ldmdb   r1!, {r3 - r8}
+        stmdb   r0!, {r3 - r8}
+
+conjoint_longs_finish:
+        ldmia   sp!, {r3 - r9, ip}
+        bx      lr
+
+