src/hotspot/os_cpu/linux_x86/linux_x86_32.s
changeset 47216 71c04702a3d5
parent 46523 cbcc0ebaa044
child 59252 623722a6aeb9
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 # 
       
     2 # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
       
     3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4 #
       
     5 # This code is free software; you can redistribute it and/or modify it
       
     6 # under the terms of the GNU General Public License version 2 only, as
       
     7 # published by the Free Software Foundation.
       
     8 #
       
     9 # This code is distributed in the hope that it will be useful, but WITHOUT
       
    10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12 # version 2 for more details (a copy is included in the LICENSE file that
       
    13 # accompanied this code).
       
    14 #
       
    15 # You should have received a copy of the GNU General Public License version
       
    16 # 2 along with this work; if not, write to the Free Software Foundation,
       
    17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18 #
       
    19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20 # or visit www.oracle.com if you need additional information or have any
       
    21 # questions.
       
    22 # 
       
    23 
       
    24 	
       
    25         # NOTE WELL!  The _Copy functions are called directly
       
    26 	# from server-compiler-generated code via CallLeafNoFP,
       
    27 	# which means that they *must* either not use floating
       
    28 	# point or use it in the same manner as does the server
       
    29 	# compiler.
       
    30 	
       
    31         .globl _Copy_conjoint_bytes
       
    32         .globl _Copy_arrayof_conjoint_bytes
       
    33         .globl _Copy_conjoint_jshorts_atomic
       
    34 	.globl _Copy_arrayof_conjoint_jshorts
       
    35         .globl _Copy_conjoint_jints_atomic
       
    36         .globl _Copy_arrayof_conjoint_jints
       
    37 	.globl _Copy_conjoint_jlongs_atomic
       
    38 	.globl _mmx_Copy_arrayof_conjoint_jshorts
       
    39 
       
    40         .globl _Atomic_cmpxchg_long
       
    41         .globl _Atomic_move_long
       
    42 
       
    43 	.text
       
    44 
       
    45         .globl  SpinPause
       
    46 	.type   SpinPause,@function
       
    47         .p2align 4,,15
       
    48 SpinPause:
       
    49         rep
       
    50         nop
       
    51         movl    $1, %eax
       
    52         ret
       
    53 
       
    54         # Support for void Copy::conjoint_bytes(void* from,
       
    55         #                                       void* to,
       
    56         #                                       size_t count)
       
    57         .p2align 4,,15
       
    58 	.type    _Copy_conjoint_bytes,@function
       
    59 _Copy_conjoint_bytes:
       
    60         pushl    %esi
       
    61         movl     4+12(%esp),%ecx      # count
       
    62         pushl    %edi
       
    63         movl     8+ 4(%esp),%esi      # from
       
    64         movl     8+ 8(%esp),%edi      # to
       
    65         cmpl     %esi,%edi
       
    66         leal     -1(%esi,%ecx),%eax   # from + count - 1
       
    67         jbe      cb_CopyRight
       
    68         cmpl     %eax,%edi
       
    69         jbe      cb_CopyLeft
       
    70         # copy from low to high
       
    71 cb_CopyRight:
       
    72         cmpl     $3,%ecx
       
    73         jbe      5f                   # <= 3 bytes
       
    74         # align source address at dword address boundary
       
    75         movl     %ecx,%eax            # original count
       
    76         movl     $4,%ecx
       
    77         subl     %esi,%ecx
       
    78         andl     $3,%ecx              # prefix byte count
       
    79         jz       1f                   # no prefix
       
    80         subl     %ecx,%eax            # byte count less prefix
       
    81         # copy prefix
       
    82         subl     %esi,%edi
       
    83 0:      movb     (%esi),%dl
       
    84         movb     %dl,(%edi,%esi,1)
       
    85         addl     $1,%esi
       
    86         subl     $1,%ecx
       
    87         jnz      0b
       
    88         addl     %esi,%edi
       
    89 1:      movl     %eax,%ecx            # byte count less prefix
       
    90         shrl     $2,%ecx              # dword count
       
    91         jz       4f                   # no dwords to move
       
    92         cmpl     $32,%ecx
       
    93         jbe      2f                   # <= 32 dwords
       
    94         # copy aligned dwords
       
    95         rep;     smovl
       
    96         jmp      4f
       
    97         # copy aligned dwords
       
    98 2:      subl     %esi,%edi
       
    99         .p2align 4,,15
       
   100 3:      movl     (%esi),%edx
       
   101         movl     %edx,(%edi,%esi,1)
       
   102         addl     $4,%esi
       
   103         subl     $1,%ecx
       
   104         jnz      3b
       
   105         addl     %esi,%edi
       
   106 4:      movl     %eax,%ecx            # byte count less prefix
       
   107 5:      andl     $3,%ecx              # suffix byte count
       
   108         jz       7f                   # no suffix
       
   109         # copy suffix
       
   110         xorl     %eax,%eax
       
   111 6:      movb     (%esi,%eax,1),%dl
       
   112         movb     %dl,(%edi,%eax,1)
       
   113         addl     $1,%eax
       
   114         subl     $1,%ecx
       
   115         jnz      6b
       
   116 7:      popl     %edi
       
   117         popl     %esi
       
   118         ret
       
   119         # copy from high to low
       
   120 cb_CopyLeft:
       
   121         std
       
   122         leal     -4(%edi,%ecx),%edi   # to + count - 4
       
   123         movl     %eax,%esi            # from + count - 1
       
   124         movl     %ecx,%eax
       
   125         subl     $3,%esi              # from + count - 4
       
   126         cmpl     $3,%ecx
       
   127         jbe      5f                   # <= 3 bytes
       
   128 1:      shrl     $2,%ecx              # dword count
       
   129         jz       4f                   # no dwords to move
       
   130         cmpl     $32,%ecx
       
   131         ja       3f                   # > 32 dwords
       
   132         # copy dwords, aligned or not
       
   133         subl     %esi,%edi
       
   134         .p2align 4,,15
       
   135 2:      movl     (%esi),%edx
       
   136         movl     %edx,(%edi,%esi,1)
       
   137         subl     $4,%esi
       
   138         subl     $1,%ecx
       
   139         jnz      2b
       
   140         addl     %esi,%edi
       
   141         jmp      4f
       
   142         # copy dwords, aligned or not
       
   143 3:      rep;     smovl
       
   144 4:      movl     %eax,%ecx            # byte count
       
   145 5:      andl     $3,%ecx              # suffix byte count
       
   146         jz       7f                   # no suffix
       
   147         # copy suffix
       
   148         subl     %esi,%edi
       
   149         addl     $3,%esi
       
   150 6:      movb     (%esi),%dl
       
   151         movb     %dl,(%edi,%esi,1)
       
   152 	subl     $1,%esi
       
   153         subl     $1,%ecx
       
   154         jnz      6b
       
   155 7:      cld
       
   156         popl     %edi
       
   157         popl     %esi
       
   158         ret
       
   159 
       
   160         # Support for void Copy::arrayof_conjoint_bytes(void* from,
       
   161         #                                               void* to,
       
   162         #                                               size_t count)
       
   163         #
       
   164         # Same as _Copy_conjoint_bytes, except no source alignment check.
       
   165         .p2align 4,,15
       
   166 	.type    _Copy_arrayof_conjoint_bytes,@function
       
   167 _Copy_arrayof_conjoint_bytes:
       
   168         pushl    %esi
       
   169         movl     4+12(%esp),%ecx      # count
       
   170         pushl    %edi
       
   171         movl     8+ 4(%esp),%esi      # from
       
   172         movl     8+ 8(%esp),%edi      # to
       
   173         cmpl     %esi,%edi
       
   174         leal     -1(%esi,%ecx),%eax   # from + count - 1
       
   175         jbe      acb_CopyRight
       
   176         cmpl     %eax,%edi
       
   177         jbe      acb_CopyLeft 
       
   178         # copy from low to high
       
   179 acb_CopyRight:
       
   180         cmpl     $3,%ecx
       
   181         jbe      5f
       
   182 1:      movl     %ecx,%eax
       
   183         shrl     $2,%ecx
       
   184         jz       4f
       
   185         cmpl     $32,%ecx
       
   186         ja       3f
       
   187         # copy aligned dwords
       
   188         subl     %esi,%edi
       
   189         .p2align 4,,15
       
   190 2:      movl     (%esi),%edx
       
   191         movl     %edx,(%edi,%esi,1)
       
   192         addl     $4,%esi
       
   193         subl     $1,%ecx
       
   194         jnz      2b
       
   195         addl     %esi,%edi
       
   196         jmp      4f
       
   197         # copy aligned dwords
       
   198 3:      rep;     smovl
       
   199 4:      movl     %eax,%ecx
       
   200 5:      andl     $3,%ecx
       
   201         jz       7f
       
   202         # copy suffix
       
   203         xorl     %eax,%eax
       
   204 6:      movb     (%esi,%eax,1),%dl
       
   205         movb     %dl,(%edi,%eax,1)
       
   206         addl     $1,%eax
       
   207         subl     $1,%ecx
       
   208         jnz      6b
       
   209 7:      popl     %edi
       
   210         popl     %esi
       
   211         ret
       
   212 acb_CopyLeft:
       
   213         std
       
   214         leal     -4(%edi,%ecx),%edi   # to + count - 4
       
   215         movl     %eax,%esi            # from + count - 1
       
   216         movl     %ecx,%eax
       
   217         subl     $3,%esi              # from + count - 4
       
   218         cmpl     $3,%ecx
       
   219         jbe      5f
       
   220 1:      shrl     $2,%ecx
       
   221         jz       4f
       
   222         cmpl     $32,%ecx
       
   223         jbe      2f                   # <= 32 dwords
       
   224         rep;     smovl
       
   225         jmp      4f
       
   226 	.space 8
       
   227 2:      subl     %esi,%edi
       
   228         .p2align 4,,15
       
   229 3:      movl     (%esi),%edx
       
   230         movl     %edx,(%edi,%esi,1)
       
   231         subl     $4,%esi
       
   232         subl     $1,%ecx
       
   233         jnz      3b
       
   234         addl     %esi,%edi
       
   235 4:      movl     %eax,%ecx
       
   236 5:      andl     $3,%ecx
       
   237         jz       7f
       
   238         subl     %esi,%edi
       
   239         addl     $3,%esi
       
   240 6:      movb     (%esi),%dl
       
   241         movb     %dl,(%edi,%esi,1)
       
   242 	subl     $1,%esi
       
   243         subl     $1,%ecx
       
   244         jnz      6b
       
   245 7:      cld
       
   246         popl     %edi
       
   247         popl     %esi
       
   248         ret
       
   249 
       
   250         # Support for void Copy::conjoint_jshorts_atomic(void* from,
       
   251         #                                                void* to,
       
   252         #                                                size_t count)
       
   253         .p2align 4,,15
       
   254 	.type    _Copy_conjoint_jshorts_atomic,@function
       
   255 _Copy_conjoint_jshorts_atomic:
       
   256         pushl    %esi
       
   257         movl     4+12(%esp),%ecx      # count
       
   258         pushl    %edi
       
   259         movl     8+ 4(%esp),%esi      # from
       
   260         movl     8+ 8(%esp),%edi      # to
       
   261         cmpl     %esi,%edi
       
   262         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
       
   263         jbe      cs_CopyRight
       
   264         cmpl     %eax,%edi
       
   265         jbe      cs_CopyLeft 
       
   266         # copy from low to high
       
   267 cs_CopyRight:
       
   268         # align source address at dword address boundary
       
   269         movl     %esi,%eax            # original from
       
   270         andl     $3,%eax              # either 0 or 2
       
   271         jz       1f                   # no prefix
       
   272         # copy prefix
       
   273         subl     $1,%ecx
       
   274         jl       5f                   # zero count
       
   275         movw     (%esi),%dx
       
   276         movw     %dx,(%edi)
       
   277         addl     %eax,%esi            # %eax == 2
       
   278         addl     %eax,%edi
       
   279 1:      movl     %ecx,%eax            # word count less prefix
       
   280         sarl     %ecx                 # dword count
       
   281         jz       4f                   # no dwords to move
       
   282         cmpl     $32,%ecx
       
   283         jbe      2f                   # <= 32 dwords
       
   284         # copy aligned dwords
       
   285         rep;     smovl
       
   286         jmp      4f 
       
   287         # copy aligned dwords
       
   288 2:      subl     %esi,%edi
       
   289         .p2align 4,,15
       
   290 3:      movl     (%esi),%edx
       
   291         movl     %edx,(%edi,%esi,1)
       
   292         addl     $4,%esi
       
   293         subl     $1,%ecx
       
   294         jnz      3b
       
   295         addl     %esi,%edi
       
   296 4:      andl     $1,%eax              # suffix count
       
   297         jz       5f                   # no suffix
       
   298         # copy suffix
       
   299         movw     (%esi),%dx
       
   300         movw     %dx,(%edi)
       
   301 5:      popl     %edi
       
   302         popl     %esi
       
   303         ret
       
   304         # copy from high to low
       
   305 cs_CopyLeft:
       
   306         std
       
   307         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
       
   308         movl     %eax,%esi            # from + count*2 - 2
       
   309         movl     %ecx,%eax
       
   310         subl     $2,%esi              # from + count*2 - 4
       
   311 1:      sarl     %ecx                 # dword count
       
   312         jz       4f                   # no dwords to move
       
   313         cmpl     $32,%ecx
       
   314         ja       3f                   # > 32 dwords
       
   315         subl     %esi,%edi
       
   316         .p2align 4,,15
       
   317 2:      movl     (%esi),%edx
       
   318         movl     %edx,(%edi,%esi,1)
       
   319         subl     $4,%esi
       
   320         subl     $1,%ecx
       
   321         jnz      2b
       
   322         addl     %esi,%edi
       
   323         jmp      4f
       
   324 3:      rep;     smovl
       
   325 4:      andl     $1,%eax              # suffix count
       
   326         jz       5f                   # no suffix
       
   327         # copy suffix
       
   328         addl     $2,%esi
       
   329         addl     $2,%edi
       
   330         movw     (%esi),%dx
       
   331         movw     %dx,(%edi)
       
   332 5:      cld
       
   333         popl     %edi
       
   334         popl     %esi
       
   335         ret
       
   336 
       
   337         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
       
   338         #                                                 void* to,
       
   339         #                                                 size_t count)
       
   340         .p2align 4,,15
       
   341 	.type    _Copy_arrayof_conjoint_jshorts,@function
       
   342 _Copy_arrayof_conjoint_jshorts:
       
   343         pushl    %esi
       
   344         movl     4+12(%esp),%ecx      # count
       
   345         pushl    %edi
       
   346         movl     8+ 4(%esp),%esi      # from
       
   347         movl     8+ 8(%esp),%edi      # to
       
   348         cmpl     %esi,%edi
       
   349         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
       
   350         jbe      acs_CopyRight
       
   351         cmpl     %eax,%edi
       
   352         jbe      acs_CopyLeft 
       
   353 acs_CopyRight:
       
   354         movl     %ecx,%eax            # word count
       
   355         sarl     %ecx                 # dword count
       
   356         jz       4f                   # no dwords to move
       
   357         cmpl     $32,%ecx
       
   358         jbe      2f                   # <= 32 dwords
       
   359         # copy aligned dwords
       
   360         rep;     smovl
       
   361         jmp      4f 
       
   362         # copy aligned dwords
       
   363         .space 5
       
   364 2:      subl     %esi,%edi 
       
   365         .p2align 4,,15
       
   366 3:      movl     (%esi),%edx
       
   367         movl     %edx,(%edi,%esi,1)
       
   368         addl     $4,%esi
       
   369         subl     $1,%ecx
       
   370         jnz      3b
       
   371         addl     %esi,%edi
       
   372 4:      andl     $1,%eax              # suffix count
       
   373         jz       5f                   # no suffix
       
   374         # copy suffix
       
   375         movw     (%esi),%dx
       
   376         movw     %dx,(%edi)
       
   377 5:      popl     %edi
       
   378         popl     %esi
       
   379         ret
       
   380 acs_CopyLeft:
       
   381         std
       
   382         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
       
   383         movl     %eax,%esi            # from + count*2 - 2
       
   384         movl     %ecx,%eax
       
   385         subl     $2,%esi              # from + count*2 - 4
       
   386         sarl     %ecx                 # dword count
       
   387         jz       4f                   # no dwords to move
       
   388         cmpl     $32,%ecx
       
   389         ja       3f                   # > 32 dwords
       
   390         subl     %esi,%edi
       
   391         .p2align 4,,15
       
   392 2:      movl     (%esi),%edx
       
   393         movl     %edx,(%edi,%esi,1)
       
   394         subl     $4,%esi
       
   395         subl     $1,%ecx
       
   396         jnz      2b
       
   397         addl     %esi,%edi
       
   398         jmp      4f
       
   399 3:      rep;     smovl
       
   400 4:      andl     $1,%eax              # suffix count
       
   401         jz       5f                   # no suffix
       
   402         # copy suffix
       
   403         addl     $2,%esi
       
   404         addl     $2,%edi
       
   405         movw     (%esi),%dx
       
   406         movw     %dx,(%edi)
       
   407 5:      cld
       
   408         popl     %edi
       
   409         popl     %esi
       
   410         ret
       
   411 
       
   412         # Support for void Copy::conjoint_jints_atomic(void* from,
       
   413         #                                              void* to,
       
   414         #                                              size_t count)
       
   415         # Equivalent to
       
   416         #   arrayof_conjoint_jints
       
   417         .p2align 4,,15
       
   418 	.type    _Copy_conjoint_jints_atomic,@function
       
   419 	.type    _Copy_arrayof_conjoint_jints,@function
       
   420 _Copy_conjoint_jints_atomic:
       
   421 _Copy_arrayof_conjoint_jints:
       
   422         pushl    %esi
       
   423         movl     4+12(%esp),%ecx      # count
       
   424         pushl    %edi
       
   425         movl     8+ 4(%esp),%esi      # from
       
   426         movl     8+ 8(%esp),%edi      # to
       
   427         cmpl     %esi,%edi
       
   428         leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
       
   429         jbe      ci_CopyRight
       
   430         cmpl     %eax,%edi
       
   431         jbe      ci_CopyLeft 
       
   432 ci_CopyRight:
       
   433         cmpl     $32,%ecx
       
   434         jbe      2f                   # <= 32 dwords
       
   435         rep;     smovl
       
   436         popl     %edi
       
   437         popl     %esi
       
   438         ret
       
   439         .space 10
       
   440 2:      subl     %esi,%edi
       
   441         jmp      4f
       
   442         .p2align 4,,15
       
   443 3:      movl     (%esi),%edx
       
   444         movl     %edx,(%edi,%esi,1)
       
   445         addl     $4,%esi
       
   446 4:      subl     $1,%ecx
       
   447         jge      3b
       
   448         popl     %edi
       
   449         popl     %esi
       
   450         ret
       
   451 ci_CopyLeft:
       
   452         std
       
   453         leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
       
   454         cmpl     $32,%ecx
       
   455         ja       4f                   # > 32 dwords
       
   456         subl     %eax,%edi            # eax == from + count*4 - 4
       
   457         jmp      3f
       
   458         .p2align 4,,15
       
   459 2:      movl     (%eax),%edx
       
   460         movl     %edx,(%edi,%eax,1)
       
   461         subl     $4,%eax
       
   462 3:      subl     $1,%ecx
       
   463         jge      2b
       
   464         cld
       
   465         popl     %edi
       
   466         popl     %esi
       
   467         ret
       
   468 4:      movl     %eax,%esi            # from + count*4 - 4
       
   469         rep;     smovl
       
   470         cld
       
   471         popl     %edi
       
   472         popl     %esi
       
   473         ret
       
   474 	
       
   475         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
       
   476         #                                               jlong* to,
       
   477         #                                               size_t count)
       
   478         #
       
   479         # 32-bit
       
   480         #
       
   481         # count treated as signed
       
   482         #
       
   483         # if (from > to) {
       
   484         #   while (--count >= 0) {
       
   485         #     *to++ = *from++;
       
   486         #   }
       
   487         # } else {
       
   488         #   while (--count >= 0) {
       
   489         #     to[count] = from[count];
       
   490         #   }
       
   491         # }
       
   492         .p2align 4,,15
       
   493 	.type    _Copy_conjoint_jlongs_atomic,@function
       
   494 _Copy_conjoint_jlongs_atomic:
       
   495         movl     4+8(%esp),%ecx       # count
       
   496         movl     4+0(%esp),%eax       # from
       
   497         movl     4+4(%esp),%edx       # to
       
   498         cmpl     %eax,%edx
       
   499         jae      cla_CopyLeft
       
   500 cla_CopyRight:
       
   501         subl     %eax,%edx
       
   502         jmp      2f
       
   503         .p2align 4,,15
       
   504 1:      fildll   (%eax)
       
   505         fistpll  (%edx,%eax,1)
       
   506         addl     $8,%eax
       
   507 2:      subl     $1,%ecx
       
   508         jge      1b
       
   509         ret
       
   510         .p2align 4,,15
       
   511 3:      fildll   (%eax,%ecx,8)
       
   512         fistpll  (%edx,%ecx,8)
       
   513 cla_CopyLeft:
       
   514         subl     $1,%ecx
       
   515         jge      3b
       
   516         ret
       
   517 
       
   518         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
       
   519         #                                                 void* to,
       
   520         #                                                 size_t count)
       
   521         .p2align 4,,15
       
   522 	.type    _mmx_Copy_arrayof_conjoint_jshorts,@function
       
   523 _mmx_Copy_arrayof_conjoint_jshorts:
       
   524         pushl    %esi
       
   525         movl     4+12(%esp),%ecx
       
   526         pushl    %edi
       
   527         movl     8+ 4(%esp),%esi
       
   528         movl     8+ 8(%esp),%edi
       
   529         cmpl     %esi,%edi
       
   530         leal     -2(%esi,%ecx,2),%eax
       
   531         jbe      mmx_acs_CopyRight
       
   532         cmpl     %eax,%edi
       
   533         jbe      mmx_acs_CopyLeft
       
   534 mmx_acs_CopyRight:
       
   535         movl     %ecx,%eax
       
   536         sarl     %ecx
       
   537         je       5f
       
   538         cmpl     $33,%ecx
       
   539         jae      3f
       
   540 1:      subl     %esi,%edi 
       
   541         .p2align 4,,15
       
   542 2:      movl     (%esi),%edx
       
   543         movl     %edx,(%edi,%esi,1)
       
   544         addl     $4,%esi
       
   545         subl     $1,%ecx
       
   546         jnz      2b
       
   547         addl     %esi,%edi
       
   548         jmp      5f 
       
   549 3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
       
   550         subl     $1,%ecx
       
   551 4:      .p2align 4,,15
       
   552         movq     0(%esi),%mm0
       
   553         addl     $64,%edi
       
   554         movq     8(%esi),%mm1
       
   555         subl     $16,%ecx
       
   556         movq     16(%esi),%mm2
       
   557         movq     %mm0,-64(%edi)
       
   558         movq     24(%esi),%mm0
       
   559         movq     %mm1,-56(%edi)
       
   560         movq     32(%esi),%mm1
       
   561         movq     %mm2,-48(%edi)
       
   562         movq     40(%esi),%mm2
       
   563         movq     %mm0,-40(%edi)
       
   564         movq     48(%esi),%mm0
       
   565         movq     %mm1,-32(%edi)
       
   566         movq     56(%esi),%mm1
       
   567         movq     %mm2,-24(%edi)
       
   568         movq     %mm0,-16(%edi)
       
   569         addl     $64,%esi
       
   570         movq     %mm1,-8(%edi)
       
   571         cmpl     $16,%ecx
       
   572         jge      4b
       
   573         emms
       
   574 	testl    %ecx,%ecx
       
   575 	ja       1b
       
   576 5:      andl     $1,%eax
       
   577         je       7f
       
   578 6:      movw     (%esi),%dx
       
   579         movw     %dx,(%edi)
       
   580 7:	popl     %edi
       
   581         popl     %esi
       
   582         ret
       
   583 mmx_acs_CopyLeft:
       
   584         std
       
   585         leal     -4(%edi,%ecx,2),%edi
       
   586         movl     %eax,%esi
       
   587         movl     %ecx,%eax
       
   588         subl     $2,%esi
       
   589         sarl     %ecx
       
   590         je       4f
       
   591         cmpl     $32,%ecx
       
   592         ja       3f
       
   593         subl     %esi,%edi
       
   594         .p2align 4,,15
       
   595 2:      movl     (%esi),%edx
       
   596         movl     %edx,(%edi,%esi,1)
       
   597         subl     $4,%esi
       
   598         subl     $1,%ecx
       
   599         jnz      2b
       
   600         addl     %esi,%edi
       
   601         jmp      4f
       
   602 3:      rep;     smovl
       
   603 4:      andl     $1,%eax
       
   604         je       6f
       
   605         addl     $2,%esi
       
   606         addl     $2,%edi
       
   607 5:      movw     (%esi),%dx
       
   608         movw     %dx,(%edi)
       
   609 6:      cld
       
   610         popl     %edi
       
   611         popl     %esi
       
   612         ret
       
   613 
       
   614 
       
   615         # Support for jlong Atomic::cmpxchg(jlong exchange_value,
       
   616         #                                   volatile jlong* dest,
       
   617         #                                   jlong compare_value)
       
   618         #
       
   619         .p2align 4,,15
       
   620 	.type    _Atomic_cmpxchg_long,@function
       
   621 _Atomic_cmpxchg_long:
       
   622                                    #  8(%esp) : return PC
       
   623         pushl    %ebx              #  4(%esp) : old %ebx
       
   624         pushl    %edi              #  0(%esp) : old %edi
       
   625         movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
       
   626         movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
       
   627         movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
       
   628         movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
       
   629         movl     20(%esp), %edi    # 20(%esp) : dest
       
   630         lock cmpxchg8b (%edi)
       
   631         popl     %edi
       
   632         popl     %ebx
       
   633         ret
       
   634 
       
   635 
       
   636         # Support for jlong Atomic::load and Atomic::store.
       
   637         # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)
       
   638         .p2align 4,,15
       
   639 	.type    _Atomic_move_long,@function
       
   640 _Atomic_move_long:
       
   641         movl     4(%esp), %eax   # src
       
   642         fildll    (%eax)
       
   643         movl     8(%esp), %eax   # dest
       
   644         fistpll   (%eax)
       
   645         ret
       
   646