src/hotspot/os_cpu/linux_arm/linux_arm_32.s
changeset 49450 5d2adef239d6
parent 47216 71c04702a3d5
child 52351 0ecb4e520110
equal deleted inserted replaced
49449:ef5d5d343e2a 49450:5d2adef239d6
    47 	.globl _Copy_conjoint_jlongs_atomic
    47 	.globl _Copy_conjoint_jlongs_atomic
    48 	.type _Copy_conjoint_jlongs_atomic, %function
    48 	.type _Copy_conjoint_jlongs_atomic, %function
    49 	.globl _Copy_arrayof_conjoint_jlongs
    49 	.globl _Copy_arrayof_conjoint_jlongs
    50 	.type _Copy_arrayof_conjoint_jlongs, %function
    50 	.type _Copy_arrayof_conjoint_jlongs, %function
    51 
    51 
       
    52 from	.req	r0
       
    53 to	.req	r1
       
    54 
    52 	.text
    55 	.text
    53         .globl  SpinPause
    56         .globl  SpinPause
    54         .type SpinPause, %function
    57         .type SpinPause, %function
    55 SpinPause:
    58 SpinPause:
    56         bx      LR
    59         bx      LR
    75         stmdb    sp!, {r3 - r9, ip}
    78         stmdb    sp!, {r3 - r9, ip}
    76  
    79  
    77         cmp     r2, #0
    80         cmp     r2, #0
    78         beq     disjoint_words_finish
    81         beq     disjoint_words_finish
    79 
    82 
    80         pld     [r1, #0]
    83         pld     [from, #0]
    81         cmp     r2, #12
    84         cmp     r2, #12
    82         ble disjoint_words_small
    85         ble disjoint_words_small
    83 
    86 
    84         .align 3
    87         .align 3
    85 dw_f2b_loop_32:
    88 dw_f2b_loop_32:
    86         subs    r2, #32
    89         subs    r2, #32
    87 	blt	dw_f2b_loop_32_finish
    90 	blt	dw_f2b_loop_32_finish
    88         ldmia r1!, {r3 - r9, ip}
    91         ldmia from!, {r3 - r9, ip}
    89         nop
    92         nop
    90 	pld     [r1]
    93 	pld     [from]
    91         stmia r0!, {r3 - r9, ip}
    94         stmia to!, {r3 - r9, ip}
    92         bgt     dw_f2b_loop_32
    95         bgt     dw_f2b_loop_32
    93 dw_f2b_loop_32_finish:
    96 dw_f2b_loop_32_finish:
    94         addlts  r2, #32
    97         addlts  r2, #32
    95         beq     disjoint_words_finish
    98         beq     disjoint_words_finish
    96         cmp     r2, #16
    99         cmp     r2, #16
    97 	blt	disjoint_words_small
   100 	blt	disjoint_words_small
    98         ldmia r1!, {r3 - r6}
   101         ldmia from!, {r3 - r6}
    99         subge   r2, r2, #16
   102         subge   r2, r2, #16
   100         stmia r0!, {r3 - r6}
   103         stmia to!, {r3 - r6}
   101         beq     disjoint_words_finish
   104         beq     disjoint_words_finish
   102 disjoint_words_small:
   105 disjoint_words_small:
   103         cmp     r2, #8
   106         cmp     r2, #8
   104         ldr     r7, [r1], #4
   107         ldr     r7, [from], #4
   105         ldrge   r8, [r1], #4
   108         ldrge   r8, [from], #4
   106         ldrgt   r9, [r1], #4
   109         ldrgt   r9, [from], #4
   107         str     r7, [r0], #4
   110         str     r7, [to], #4
   108         strge   r8, [r0], #4
   111         strge   r8, [to], #4
   109         strgt   r9, [r0], #4
   112         strgt   r9, [to], #4
   110 
   113 
   111 disjoint_words_finish:
   114 disjoint_words_finish:
   112         ldmia   sp!, {r3 - r9, ip}
   115         ldmia   sp!, {r3 - r9, ip}
   113         bx      lr
   116         bx      lr
   114 
   117 
   120         stmdb    sp!, {r3 - r9, ip}
   123         stmdb    sp!, {r3 - r9, ip}
   121 
   124 
   122 	cmp	r2, #0
   125 	cmp	r2, #0
   123 	beq	conjoint_words_finish
   126 	beq	conjoint_words_finish
   124 
   127 
   125         pld     [r1, #0]
   128         pld     [from, #0]
   126         cmp     r2, #12
   129         cmp     r2, #12
   127         ble conjoint_words_small
   130         ble conjoint_words_small
   128 
   131 
   129         subs    r3, r0, r1
   132         subs    r3, to, from
   130         cmphi   r2, r3
   133         cmphi   r2, r3
   131         bhi     cw_b2f_copy
   134         bhi     cw_b2f_copy
   132         .align 3
   135         .align 3
   133 cw_f2b_loop_32:
   136 cw_f2b_loop_32:
   134         subs    r2, #32
   137         subs    r2, #32
   135 	blt	cw_f2b_loop_32_finish
   138 	blt	cw_f2b_loop_32_finish
   136         ldmia r1!, {r3 - r9, ip}
   139         ldmia from!, {r3 - r9, ip}
   137         nop
   140         nop
   138 	pld     [r1]
   141 	pld     [from]
   139         stmia r0!, {r3 - r9, ip}
   142         stmia to!, {r3 - r9, ip}
   140         bgt     cw_f2b_loop_32
   143         bgt     cw_f2b_loop_32
   141 cw_f2b_loop_32_finish:
   144 cw_f2b_loop_32_finish:
   142         addlts  r2, #32
   145         addlts  r2, #32
   143         beq     conjoint_words_finish
   146         beq     conjoint_words_finish
   144         cmp     r2, #16
   147         cmp     r2, #16
   145 	blt	conjoint_words_small
   148 	blt	conjoint_words_small
   146         ldmia r1!, {r3 - r6}
   149         ldmia from!, {r3 - r6}
   147         subge   r2, r2, #16
   150         subge   r2, r2, #16
   148         stmia r0!, {r3 - r6}
   151         stmia to!, {r3 - r6}
   149         beq     conjoint_words_finish
   152         beq     conjoint_words_finish
   150 conjoint_words_small:
   153 conjoint_words_small:
   151         cmp     r2, #8
   154         cmp     r2, #8
   152         ldr     r7, [r1], #4
   155         ldr     r7, [from], #4
   153         ldrge   r8, [r1], #4
   156         ldrge   r8, [from], #4
   154         ldrgt   r9, [r1], #4
   157         ldrgt   r9, [from], #4
   155         str     r7, [r0], #4
   158         str     r7, [to], #4
   156         strge   r8, [r0], #4
   159         strge   r8, [to], #4
   157         strgt   r9, [r0], #4
   160         strgt   r9, [to], #4
   158         b       conjoint_words_finish
   161         b       conjoint_words_finish
   159 
   162 
   160 	# Src and dest overlap, copy in a descending order
   163 	# Src and dest overlap, copy in a descending order
   161 cw_b2f_copy:
   164 cw_b2f_copy:
   162         add     r1, r2
   165         add     from, r2
   163         pld     [r1, #-32]
   166         pld     [from, #-32]
   164         add     r0, r2
   167         add     to, r2
   165         .align 3
   168         .align 3
   166 cw_b2f_loop_32:
   169 cw_b2f_loop_32:
   167         subs    r2, #32
   170         subs    r2, #32
   168 	blt	cw_b2f_loop_32_finish
   171 	blt	cw_b2f_loop_32_finish
   169         ldmdb r1!, {r3-r9,ip}
   172         ldmdb from!, {r3-r9,ip}
   170         nop
   173         nop
   171 	pld     [r1, #-32]
   174 	pld     [from, #-32]
   172         stmdb r0!, {r3-r9,ip}
   175         stmdb to!, {r3-r9,ip}
   173         bgt     cw_b2f_loop_32
   176         bgt     cw_b2f_loop_32
   174 cw_b2f_loop_32_finish:
   177 cw_b2f_loop_32_finish:
   175         addlts  r2, #32
   178         addlts  r2, #32
   176         beq     conjoint_words_finish
   179         beq     conjoint_words_finish
   177         cmp     r2, #16
   180         cmp     r2, #16
   178 	blt	cw_b2f_copy_small
   181 	blt	cw_b2f_copy_small
   179         ldmdb r1!, {r3 - r6}
   182         ldmdb from!, {r3 - r6}
   180         subge   r2, r2, #16
   183         subge   r2, r2, #16
   181         stmdb r0!, {r3 - r6}
   184         stmdb to!, {r3 - r6}
   182         beq     conjoint_words_finish
   185         beq     conjoint_words_finish
   183 cw_b2f_copy_small:
   186 cw_b2f_copy_small:
   184         cmp     r2, #8
   187         cmp     r2, #8
   185         ldr     r7, [r1, #-4]!
   188         ldr     r7, [from, #-4]!
   186         ldrge   r8, [r1, #-4]!
   189         ldrge   r8, [from, #-4]!
   187         ldrgt   r9, [r1, #-4]!
   190         ldrgt   r9, [from, #-4]!
   188         str     r7, [r0, #-4]!
   191         str     r7, [to, #-4]!
   189         strge   r8, [r0, #-4]!
   192         strge   r8, [to, #-4]!
   190         strgt   r9, [r0, #-4]!
   193         strgt   r9, [to, #-4]!
   191 
   194 
   192 conjoint_words_finish:
   195 conjoint_words_finish:
   193         ldmia   sp!, {r3 - r9, ip}
   196         ldmia   sp!, {r3 - r9, ip}
   194         bx      lr
   197         bx      lr
   195 
   198 
   200         stmdb   sp!, {r3 - r9, ip}
   203         stmdb   sp!, {r3 - r9, ip}
   201 
   204 
   202 	cmp	r2, #0
   205 	cmp	r2, #0
   203 	beq	conjoint_shorts_finish	
   206 	beq	conjoint_shorts_finish	
   204 
   207 
   205         subs    r3, r0, r1
   208         subs    r3, to, from
   206         cmphi   r2, r3
   209         cmphi   r2, r3
   207         bhi     cs_b2f_copy
   210         bhi     cs_b2f_copy
   208 
   211 
   209         pld     [r1]
   212         pld     [from]
   210 
   213 
   211         ands    r3, r0, #3
   214         ands    r3, to, #3
   212         bne     cs_f2b_dest_u
   215         bne     cs_f2b_dest_u
   213         ands    r3, r1, #3
   216         ands    r3, from, #3
   214         bne     cs_f2b_src_u
   217         bne     cs_f2b_src_u
   215 
   218 
   216 	# Aligned source address
   219 	# Aligned source address
   217         .align 3
   220         .align 3
   218 cs_f2b_loop_32:
   221 cs_f2b_loop_32:
   219         subs    r2, #32
   222         subs    r2, #32
   220 	blt	cs_f2b_loop_32_finish
   223 	blt	cs_f2b_loop_32_finish
   221         ldmia r1!, {r3 - r9, ip}
   224         ldmia from!, {r3 - r9, ip}
   222         nop
   225         nop
   223         pld     [r1]
   226         pld     [from]
   224         stmia r0!, {r3 - r9, ip}
   227         stmia to!, {r3 - r9, ip}
   225         bgt     cs_f2b_loop_32
   228         bgt     cs_f2b_loop_32
   226 cs_f2b_loop_32_finish:
   229 cs_f2b_loop_32_finish:
   227         addlts  r2, #32
   230         addlts  r2, #32
   228         beq     conjoint_shorts_finish
   231         beq     conjoint_shorts_finish
   229         movs    r6, r2, lsr #3
   232         movs    r6, r2, lsr #3
   230         .align 3
   233         .align 3
   231 cs_f2b_8_loop:
   234 cs_f2b_8_loop:
   232         beq     cs_f2b_4
   235         beq     cs_f2b_4
   233         ldmia   r1!, {r4-r5}
   236         ldmia   from!, {r4-r5}
   234         subs    r6, #1
   237         subs    r6, #1
   235         stmia   r0!, {r4-r5}
   238         stmia   to!, {r4-r5}
   236         bgt     cs_f2b_8_loop
   239         bgt     cs_f2b_8_loop
   237 
   240 
   238 cs_f2b_4:
   241 cs_f2b_4:
   239         ands    r2, #7
   242         ands    r2, #7
   240         beq     conjoint_shorts_finish
   243         beq     conjoint_shorts_finish
   241         cmp     r2, #4
   244         cmp     r2, #4
   242         ldrh    r3, [r1], #2
   245         ldrh    r3, [from], #2
   243         ldrgeh  r4, [r1], #2
   246         ldrgeh  r4, [from], #2
   244         ldrgth  r5, [r1], #2
   247         ldrgth  r5, [from], #2
   245         strh    r3, [r0], #2
   248         strh    r3, [to], #2
   246         strgeh  r4, [r0], #2
   249         strgeh  r4, [to], #2
   247         strgth  r5, [r0], #2
   250         strgth  r5, [to], #2
   248         b       conjoint_shorts_finish
   251         b       conjoint_shorts_finish
   249 
   252 
   250 	# Destination not aligned
   253 	# Destination not aligned
   251 cs_f2b_dest_u:
   254 cs_f2b_dest_u:
   252         ldrh    r3, [r1], #2
   255         ldrh    r3, [from], #2
   253         subs    r2, #2
   256         subs    r2, #2
   254         strh    r3, [r0], #2
   257         strh    r3, [to], #2
   255         beq     conjoint_shorts_finish
   258         beq     conjoint_shorts_finish
   256 
   259 
   257 	# Check to see if source is not aligned ether
   260 	# Check to see if source is not aligned ether
   258         ands    r3, r1, #3
   261         ands    r3, from, #3
   259         beq     cs_f2b_loop_32
   262         beq     cs_f2b_loop_32
   260 
   263 
   261 cs_f2b_src_u:
   264 cs_f2b_src_u:
   262         cmp     r2, #16
   265         cmp     r2, #16
   263         blt     cs_f2b_8_u
   266         blt     cs_f2b_8_u
   264 
   267 
   265 	# Load 2 first bytes to r7 and make src ptr word aligned
   268 	# Load 2 first bytes to r7 and make src ptr word aligned
   266         bic     r1, #3
   269         bic     from, #3
   267         ldr     r7, [r1], #4
   270         ldr     r7, [from], #4
   268 
   271 
   269 	# Destination aligned, source not
   272 	# Destination aligned, source not
   270         mov     r8, r2, lsr #4
   273         mov     r8, r2, lsr #4
   271         .align 3
   274         .align 3
   272 cs_f2b_16_u_loop:
   275 cs_f2b_16_u_loop:
   273         mov     r3, r7, lsr #16
   276         mov     r3, r7, lsr #16
   274         ldmia   r1!, {r4 - r7}
   277         ldmia   from!, {r4 - r7}
   275         orr     r3, r3, r4, lsl #16
   278         orr     r3, r3, r4, lsl #16
   276         mov     r4, r4, lsr #16
   279         mov     r4, r4, lsr #16
   277         pld     [r1]
   280         pld     [from]
   278         orr     r4, r4, r5, lsl #16
   281         orr     r4, r4, r5, lsl #16
   279         mov     r5, r5, lsr #16
   282         mov     r5, r5, lsr #16
   280         orr     r5, r5, r6, lsl #16
   283         orr     r5, r5, r6, lsl #16
   281         mov     r6, r6, lsr #16
   284         mov     r6, r6, lsr #16
   282         orr     r6, r6, r7, lsl #16
   285         orr     r6, r6, r7, lsl #16
   283         stmia   r0!, {r3 - r6}
   286         stmia   to!, {r3 - r6}
   284         subs    r8, #1
   287         subs    r8, #1
   285         bgt     cs_f2b_16_u_loop
   288         bgt     cs_f2b_16_u_loop
   286         ands    r2, #0xf
   289         ands    r2, #0xf
   287         beq     conjoint_shorts_finish
   290         beq     conjoint_shorts_finish
   288         sub     r1, #2
   291         sub     from, #2
   289 
   292 
   290 cs_f2b_8_u:
   293 cs_f2b_8_u:
   291         cmp     r2, #8
   294         cmp     r2, #8
   292         blt     cs_f2b_4_u
   295         blt     cs_f2b_4_u
   293         ldrh    r4, [r1], #2
   296         ldrh    r4, [from], #2
   294         ldr     r5, [r1], #4
   297         ldr     r5, [from], #4
   295         ldrh    r6, [r1], #2
   298         ldrh    r6, [from], #2
   296         orr     r4, r4, r5, lsl #16
   299         orr     r4, r4, r5, lsl #16
   297         mov     r5, r5, lsr #16
   300         mov     r5, r5, lsr #16
   298         orr     r5, r5, r6, lsl #16
   301         orr     r5, r5, r6, lsl #16
   299         subs    r2, #8
   302         subs    r2, #8
   300         stmia	r0!, {r4 - r5}
   303         stmia	to!, {r4 - r5}
   301 cs_f2b_4_u:
   304 cs_f2b_4_u:
   302         beq     conjoint_shorts_finish
   305         beq     conjoint_shorts_finish
   303         cmp     r2, #4
   306         cmp     r2, #4
   304         ldrh    r3, [r1], #2
   307         ldrh    r3, [from], #2
   305         ldrgeh  r4, [r1], #2
   308         ldrgeh  r4, [from], #2
   306         ldrgth  r5, [r1], #2
   309         ldrgth  r5, [from], #2
   307         strh    r3, [r0], #2
   310         strh    r3, [to], #2
   308         strgeh  r4, [r0], #2
   311         strgeh  r4, [to], #2
   309         strgth  r5, [r0], #2
   312         strgth  r5, [to], #2
   310         b       conjoint_shorts_finish
   313         b       conjoint_shorts_finish
   311 
   314 
   312 	# Src and dest overlap, copy in a descending order
   315 	# Src and dest overlap, copy in a descending order
   313 cs_b2f_copy:
   316 cs_b2f_copy:
   314         add     r1, r2
   317         add     from, r2
   315         pld     [r1, #-32]
   318         pld     [from, #-32]
   316         add     r0, r2
   319         add     to, r2
   317 
   320 
   318         ands    r3, r0, #3
   321         ands    r3, to, #3
   319         bne     cs_b2f_dest_u
   322         bne     cs_b2f_dest_u
   320         ands    r3, r1, #3
   323         ands    r3, from, #3
   321         bne     cs_b2f_src_u
   324         bne     cs_b2f_src_u
   322         .align 3
   325         .align 3
   323 cs_b2f_loop_32:
   326 cs_b2f_loop_32:
   324         subs    r2, #32
   327         subs    r2, #32
   325 	blt	cs_b2f_loop_32_finish
   328 	blt	cs_b2f_loop_32_finish
   326         ldmdb r1!, {r3-r9,ip}
   329         ldmdb from!, {r3-r9,ip}
   327         nop
   330         nop
   328         pld     [r1, #-32]
   331         pld     [from, #-32]
   329         stmdb r0!, {r3-r9,ip}
   332         stmdb to!, {r3-r9,ip}
   330         bgt     cs_b2f_loop_32
   333         bgt     cs_b2f_loop_32
   331 cs_b2f_loop_32_finish:
   334 cs_b2f_loop_32_finish:
   332         addlts  r2, #32
   335         addlts  r2, #32
   333         beq     conjoint_shorts_finish
   336         beq     conjoint_shorts_finish
   334         cmp     r2, #24
   337         cmp     r2, #24
   335         blt     cs_b2f_16
   338         blt     cs_b2f_16
   336         ldmdb   r1!, {r3-r8}
   339         ldmdb   from!, {r3-r8}
   337         sub     r2, #24
   340         sub     r2, #24
   338         stmdb   r0!, {r3-r8}
   341         stmdb   to!, {r3-r8}
   339         beq     conjoint_shorts_finish
   342         beq     conjoint_shorts_finish
   340 cs_b2f_16:
   343 cs_b2f_16:
   341         cmp     r2, #16
   344         cmp     r2, #16
   342         blt     cs_b2f_8
   345         blt     cs_b2f_8
   343         ldmdb   r1!, {r3-r6}
   346         ldmdb   from!, {r3-r6}
   344         sub     r2, #16
   347         sub     r2, #16
   345         stmdb   r0!, {r3-r6}
   348         stmdb   to!, {r3-r6}
   346         beq     conjoint_shorts_finish
   349         beq     conjoint_shorts_finish
   347 cs_b2f_8:
   350 cs_b2f_8:
   348         cmp     r2, #8
   351         cmp     r2, #8
   349         blt     cs_b2f_all_copy
   352         blt     cs_b2f_all_copy
   350         ldmdb   r1!, {r3-r4}
   353         ldmdb   from!, {r3-r4}
   351         sub     r2, #8
   354         sub     r2, #8
   352         stmdb   r0!, {r3-r4}
   355         stmdb   to!, {r3-r4}
   353         beq     conjoint_shorts_finish
   356         beq     conjoint_shorts_finish
   354 
   357 
   355 cs_b2f_all_copy:
   358 cs_b2f_all_copy:
   356         cmp     r2, #4
   359         cmp     r2, #4
   357         ldrh    r3, [r1, #-2]!
   360         ldrh    r3, [from, #-2]!
   358         ldrgeh  r4, [r1, #-2]!
   361         ldrgeh  r4, [from, #-2]!
   359         ldrgth  r5, [r1, #-2]!
   362         ldrgth  r5, [from, #-2]!
   360         strh    r3, [r0, #-2]!
   363         strh    r3, [to, #-2]!
   361         strgeh  r4, [r0, #-2]!
   364         strgeh  r4, [to, #-2]!
   362         strgth  r5, [r0, #-2]!
   365         strgth  r5, [to, #-2]!
   363         b       conjoint_shorts_finish
   366         b       conjoint_shorts_finish
   364 
   367 
   365 	# Destination not aligned
   368 	# Destination not aligned
   366 cs_b2f_dest_u:
   369 cs_b2f_dest_u:
   367         ldrh    r3, [r1, #-2]!
   370         ldrh    r3, [from, #-2]!
   368         strh    r3, [r0, #-2]!
   371         strh    r3, [to, #-2]!
   369         sub     r2, #2
   372         sub     r2, #2
   370 	# Check source alignment as well
   373 	# Check source alignment as well
   371         ands    r3, r1, #3
   374         ands    r3, from, #3
   372         beq     cs_b2f_loop_32
   375         beq     cs_b2f_loop_32
   373 
   376 
   374 	# Source not aligned
   377 	# Source not aligned
   375 cs_b2f_src_u:
   378 cs_b2f_src_u:
   376         bic     r1, #3
   379         bic     from, #3
   377         .align 3
   380         .align 3
   378 cs_b2f_16_loop_u:
   381 cs_b2f_16_loop_u:
   379         subs    r2, #16
   382         subs    r2, #16
   380         blt     cs_b2f_16_loop_u_finished
   383         blt     cs_b2f_16_loop_u_finished
   381         ldr     r7, [r1]
   384         ldr     r7, [from]
   382         mov     r3, r7
   385         mov     r3, r7
   383         ldmdb   r1!, {r4 - r7}
   386         ldmdb   from!, {r4 - r7}
   384         mov     r4, r4, lsr #16
   387         mov     r4, r4, lsr #16
   385         orr     r4, r4, r5, lsl #16
   388         orr     r4, r4, r5, lsl #16
   386         pld     [r1, #-32]
   389         pld     [from, #-32]
   387         mov     r5, r5, lsr #16
   390         mov     r5, r5, lsr #16
   388         orr     r5, r5, r6, lsl #16
   391         orr     r5, r5, r6, lsl #16
   389         mov     r6, r6, lsr #16
   392         mov     r6, r6, lsr #16
   390         orr     r6, r6, r7, lsl #16
   393         orr     r6, r6, r7, lsl #16
   391         mov     r7, r7, lsr #16
   394         mov     r7, r7, lsr #16
   392         orr     r7, r7, r3, lsl #16
   395         orr     r7, r7, r3, lsl #16
   393         stmdb   r0!, {r4 - r7}
   396         stmdb   to!, {r4 - r7}
   394         bgt     cs_b2f_16_loop_u
   397         bgt     cs_b2f_16_loop_u
   395         beq     conjoint_shorts_finish
   398         beq     conjoint_shorts_finish
   396 cs_b2f_16_loop_u_finished:
   399 cs_b2f_16_loop_u_finished:
   397         addlts  r2, #16
   400         addlts  r2, #16
   398         ldr     r3, [r1]
   401         ldr     r3, [from]
   399 	cmp     r2, #10
   402 	cmp     r2, #10
   400         blt     cs_b2f_2_u_loop
   403         blt     cs_b2f_2_u_loop
   401         ldmdb   r1!, {r4 - r5}
   404         ldmdb   from!, {r4 - r5}
   402         mov     r6, r4, lsr #16
   405         mov     r6, r4, lsr #16
   403         orr     r6, r6, r5, lsl #16
   406         orr     r6, r6, r5, lsl #16
   404         mov     r7, r5, lsr #16
   407         mov     r7, r5, lsr #16
   405         orr     r7, r7, r3, lsl #16
   408         orr     r7, r7, r3, lsl #16
   406         stmdb   r0!, {r6-r7}
   409         stmdb   to!, {r6-r7}
   407         sub     r2, #8
   410         sub     r2, #8
   408 	.align 3
   411 	.align 3
   409 cs_b2f_2_u_loop:
   412 cs_b2f_2_u_loop:
   410         subs    r2, #2
   413         subs    r2, #2
   411         ldrh    r3, [r1], #-2
   414         ldrh    r3, [from], #-2
   412         strh    r3, [r0, #-2]!
   415         strh    r3, [to, #-2]!
   413         bgt     cs_b2f_2_u_loop
   416         bgt     cs_b2f_2_u_loop
   414 
   417 
   415 conjoint_shorts_finish:
   418 conjoint_shorts_finish:
   416         ldmia   sp!, {r3 - r9, ip}
   419         ldmia   sp!, {r3 - r9, ip}
   417         bx      lr
   420         bx      lr
   438         stmdb    sp!, {r3 - r9, ip}
   441         stmdb    sp!, {r3 - r9, ip}
   439 
   442 
   440 	cmp	r2, #0
   443 	cmp	r2, #0
   441 	beq	conjoint_longs_finish
   444 	beq	conjoint_longs_finish
   442 
   445 
   443         pld     [r1, #0]
   446         pld     [from, #0]
   444         cmp     r2, #24
   447         cmp     r2, #24
   445         ble conjoint_longs_small
   448         ble conjoint_longs_small
   446 
   449 
   447         subs    r3, r0, r1
   450         subs    r3, to, from
   448         cmphi   r2, r3
   451         cmphi   r2, r3
   449         bhi     cl_b2f_copy
   452         bhi     cl_b2f_copy
   450         .align 3
   453         .align 3
   451 cl_f2b_loop_32:
   454 cl_f2b_loop_32:
   452         subs    r2, #32
   455         subs    r2, #32
   453 	blt	cl_f2b_loop_32_finish
   456 	blt	cl_f2b_loop_32_finish
   454         ldmia r1!, {r3 - r9, ip}
   457         ldmia from!, {r3 - r9, ip}
   455         nop
   458         nop
   456 	pld     [r1]
   459 	pld     [from]
   457         stmia r0!, {r3 - r9, ip}
   460         stmia to!, {r3 - r9, ip}
   458         bgt     cl_f2b_loop_32
   461         bgt     cl_f2b_loop_32
   459 cl_f2b_loop_32_finish:
   462 cl_f2b_loop_32_finish:
   460         addlts  r2, #32
   463         addlts  r2, #32
   461         beq     conjoint_longs_finish
   464         beq     conjoint_longs_finish
   462 conjoint_longs_small:
   465 conjoint_longs_small:
   463         cmp     r2, #16
   466         cmp     r2, #16
   464 	blt	cl_f2b_copy_8
   467 	blt	cl_f2b_copy_8
   465 	bgt	cl_f2b_copy_24
   468 	bgt	cl_f2b_copy_24
   466         ldmia 	r1!, {r3 - r6}
   469         ldmia 	from!, {r3 - r6}
   467         stmia 	r0!, {r3 - r6}
   470         stmia 	to!, {r3 - r6}
   468 	b	conjoint_longs_finish
   471 	b	conjoint_longs_finish
   469 cl_f2b_copy_8:
   472 cl_f2b_copy_8:
   470         ldmia   r1!, {r3 - r4}
   473         ldmia   from!, {r3 - r4}
   471         stmia   r0!, {r3 - r4}
   474         stmia   to!, {r3 - r4}
   472         b       conjoint_longs_finish
   475         b       conjoint_longs_finish
   473 cl_f2b_copy_24:
   476 cl_f2b_copy_24:
   474 	ldmia   r1!, {r3 - r8}
   477 	ldmia   from!, {r3 - r8}
   475         stmia   r0!, {r3 - r8}
   478         stmia   to!, {r3 - r8}
   476         b       conjoint_longs_finish
   479         b       conjoint_longs_finish
   477 
   480 
   478 	# Src and dest overlap, copy in a descending order
   481 	# Src and dest overlap, copy in a descending order
   479 cl_b2f_copy:
   482 cl_b2f_copy:
   480         add     r1, r2
   483         add     from, r2
   481         pld     [r1, #-32]
   484         pld     [from, #-32]
   482         add     r0, r2
   485         add     to, r2
   483         .align 3
   486         .align 3
   484 cl_b2f_loop_32:
   487 cl_b2f_loop_32:
   485         subs    r2, #32
   488         subs    r2, #32
   486 	blt	cl_b2f_loop_32_finish
   489 	blt	cl_b2f_loop_32_finish
   487         ldmdb 	r1!, {r3 - r9, ip}
   490         ldmdb 	from!, {r3 - r9, ip}
   488         nop
   491         nop
   489 	pld     [r1]
   492 	pld     [from]
   490         stmdb 	r0!, {r3 - r9, ip}
   493         stmdb 	to!, {r3 - r9, ip}
   491         bgt     cl_b2f_loop_32
   494         bgt     cl_b2f_loop_32
   492 cl_b2f_loop_32_finish:
   495 cl_b2f_loop_32_finish:
   493         addlts  r2, #32
   496         addlts  r2, #32
   494         beq     conjoint_longs_finish
   497         beq     conjoint_longs_finish
   495         cmp     r2, #16
   498         cmp     r2, #16
   496 	blt	cl_b2f_copy_8
   499 	blt	cl_b2f_copy_8
   497 	bgt	cl_b2f_copy_24
   500 	bgt	cl_b2f_copy_24
   498         ldmdb   r1!, {r3 - r6}
   501         ldmdb   from!, {r3 - r6}
   499         stmdb   r0!, {r3 - r6}
   502         stmdb   to!, {r3 - r6}
   500         b       conjoint_longs_finish
   503         b       conjoint_longs_finish
   501 cl_b2f_copy_8:
   504 cl_b2f_copy_8:
   502 	ldmdb   r1!, {r3 - r4}
   505 	ldmdb   from!, {r3 - r4}
   503         stmdb   r0!, {r3 - r4}
   506         stmdb   to!, {r3 - r4}
   504         b       conjoint_longs_finish
   507         b       conjoint_longs_finish
   505 cl_b2f_copy_24:
   508 cl_b2f_copy_24:
   506 	ldmdb   r1!, {r3 - r8}
   509 	ldmdb   from!, {r3 - r8}
   507         stmdb   r0!, {r3 - r8}
   510         stmdb   to!, {r3 - r8}
   508 
   511 
   509 conjoint_longs_finish:
   512 conjoint_longs_finish:
   510         ldmia   sp!, {r3 - r9, ip}
   513         ldmia   sp!, {r3 - r9, ip}
   511         bx      lr
   514         bx      lr
   512 
   515