hotspot/src/os_cpu/linux_x86/vm/linux_x86_32.s
changeset 1 489c9b5090e2
child 5547 f4b087cbb361
equal deleted inserted replaced
0:fd16c54261b3 1:489c9b5090e2
       
     1 # 
       
     2 # Copyright 2004-2007 Sun Microsystems, Inc.  All Rights Reserved.
       
     3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4 #
       
     5 # This code is free software; you can redistribute it and/or modify it
       
     6 # under the terms of the GNU General Public License version 2 only, as
       
     7 # published by the Free Software Foundation.
       
     8 #
       
     9 # This code is distributed in the hope that it will be useful, but WITHOUT
       
    10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12 # version 2 for more details (a copy is included in the LICENSE file that
       
    13 # accompanied this code).
       
    14 #
       
    15 # You should have received a copy of the GNU General Public License version
       
    16 # 2 along with this work; if not, write to the Free Software Foundation,
       
    17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18 #
       
    19 # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    20 # CA 95054 USA or visit www.sun.com if you need additional information or
       
    21 # have any questions.
       
    22 # 
       
    23 
       
    24 	
       
    25         # NOTE WELL!  The _Copy functions are called directly
       
    26 	# from server-compiler-generated code via CallLeafNoFP,
       
    27 	# which means that they *must* either not use floating
       
    28 	# point or use it in the same manner as does the server
       
    29 	# compiler.
       
    30 	
       
    31         .globl _Copy_conjoint_bytes
       
    32         .globl _Copy_arrayof_conjoint_bytes
       
    33         .globl _Copy_conjoint_jshorts_atomic
       
    34 	.globl _Copy_arrayof_conjoint_jshorts
       
    35         .globl _Copy_conjoint_jints_atomic
       
    36         .globl _Copy_arrayof_conjoint_jints
       
    37 	.globl _Copy_conjoint_jlongs_atomic
       
    38 	.globl _mmx_Copy_arrayof_conjoint_jshorts
       
    39 
       
    40         .globl _Atomic_cmpxchg_long
       
    41 
       
    42 	.text
       
    43 
       
    44         .globl  SafeFetch32, Fetch32PFI, Fetch32Resume
       
    45         .globl  SafeFetchN
       
    46         ## TODO: avoid exposing Fetch32PFI and Fetch32Resume.
       
    47         ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP)
       
    48         ## routine to vet the address.  If the address is the faulting LD then
       
    49         ## SafeFetchTriage() would return the resume-at EIP, otherwise null.
       
    50 	.type    SafeFetch32,@function
       
    51         .p2align 4,,15
       
    52 SafeFetch32:
       
    53 SafeFetchN:
       
    54          movl    0x8(%esp), %eax
       
    55          movl    0x4(%esp), %ecx
       
    56 Fetch32PFI:
       
    57          movl    (%ecx), %eax
       
    58 Fetch32Resume:
       
    59          ret
       
    60 
       
    61 
       
    62         .globl  SpinPause
       
    63 	.type   SpinPause,@function
       
    64         .p2align 4,,15
       
    65 SpinPause:
       
    66         rep
       
    67         nop
       
    68         movl    $1, %eax
       
    69         ret
       
    70 
       
    71         # Support for void Copy::conjoint_bytes(void* from,
       
    72         #                                       void* to,
       
    73         #                                       size_t count)
       
    74         .p2align 4,,15
       
    75 	.type    _Copy_conjoint_bytes,@function
       
    76 _Copy_conjoint_bytes:
       
    77         pushl    %esi
       
    78         movl     4+12(%esp),%ecx      # count
       
    79         pushl    %edi
       
    80         movl     8+ 4(%esp),%esi      # from
       
    81         movl     8+ 8(%esp),%edi      # to
       
    82         cmpl     %esi,%edi
       
    83         leal     -1(%esi,%ecx),%eax   # from + count - 1
       
    84         jbe      cb_CopyRight
       
    85         cmpl     %eax,%edi
       
    86         jbe      cb_CopyLeft
       
    87         # copy from low to high
       
    88 cb_CopyRight:
       
    89         cmpl     $3,%ecx
       
    90         jbe      5f                   # <= 3 bytes
       
    91         # align source address at dword address boundary
       
    92         movl     %ecx,%eax            # original count
       
    93         movl     $4,%ecx
       
    94         subl     %esi,%ecx
       
    95         andl     $3,%ecx              # prefix byte count
       
    96         jz       1f                   # no prefix
       
    97         subl     %ecx,%eax            # byte count less prefix
       
    98         # copy prefix
       
    99         subl     %esi,%edi
       
   100 0:      movb     (%esi),%dl
       
   101         movb     %dl,(%edi,%esi,1)
       
   102         addl     $1,%esi
       
   103         subl     $1,%ecx
       
   104         jnz      0b
       
   105         addl     %esi,%edi
       
   106 1:      movl     %eax,%ecx            # byte count less prefix
       
   107         shrl     $2,%ecx              # dword count
       
   108         jz       4f                   # no dwords to move
       
   109         cmpl     $32,%ecx
       
   110         jbe      2f                   # <= 32 dwords
       
   111         # copy aligned dwords
       
   112         rep;     smovl
       
   113         jmp      4f
       
   114         # copy aligned dwords
       
   115 2:      subl     %esi,%edi
       
   116         .p2align 4,,15
       
   117 3:      movl     (%esi),%edx
       
   118         movl     %edx,(%edi,%esi,1)
       
   119         addl     $4,%esi
       
   120         subl     $1,%ecx
       
   121         jnz      3b
       
   122         addl     %esi,%edi
       
   123 4:      movl     %eax,%ecx            # byte count less prefix
       
   124         andl     $3,%ecx              # suffix byte count
       
   125         jz       7f                   # no suffix
       
   126         # copy suffix
       
   127 5:      xorl     %eax,%eax
       
   128 6:      movb     (%esi,%eax,1),%dl
       
   129         movb     %dl,(%edi,%eax,1)
       
   130         addl     $1,%eax
       
   131         subl     $1,%ecx
       
   132         jnz      6b
       
   133 7:      popl     %edi
       
   134         popl     %esi
       
   135         ret
       
   136         # copy from high to low
       
   137 cb_CopyLeft:
       
   138         std
       
   139         leal     -4(%edi,%ecx),%edi   # to + count - 4
       
   140         movl     %eax,%esi            # from + count - 1
       
   141         movl     %ecx,%eax
       
   142         subl     $3,%esi              # from + count - 4
       
   143         cmpl     $3,%ecx
       
   144         jbe      5f                   # <= 3 bytes
       
   145 1:      shrl     $2,%ecx              # dword count
       
   146         jz       4f                   # no dwords to move
       
   147         cmpl     $32,%ecx
       
   148         ja       3f                   # > 32 dwords
       
   149         # copy dwords, aligned or not
       
   150         subl     %esi,%edi
       
   151         .p2align 4,,15
       
   152 2:      movl     (%esi),%edx
       
   153         movl     %edx,(%edi,%esi,1)
       
   154         subl     $4,%esi
       
   155         subl     $1,%ecx
       
   156         jnz      2b
       
   157         addl     %esi,%edi
       
   158         jmp      4f
       
   159         # copy dwords, aligned or not
       
   160 3:      rep;     smovl
       
   161 4:      movl     %eax,%ecx            # byte count
       
   162         andl     $3,%ecx              # suffix byte count
       
   163         jz       7f                   # no suffix
       
   164         # copy suffix
       
   165 5:      subl     %esi,%edi
       
   166         addl     $3,%esi
       
   167 6:      movb     (%esi),%dl
       
   168         movb     %dl,(%edi,%esi,1)
       
   169 	subl     $1,%esi
       
   170         subl     $1,%ecx
       
   171         jnz      6b
       
   172 7:      cld
       
   173         popl     %edi
       
   174         popl     %esi
       
   175         ret
       
   176 
       
   177         # Support for void Copy::arrayof_conjoint_bytes(void* from,
       
   178         #                                               void* to,
       
   179         #                                               size_t count)
       
   180         #
       
   181         # Same as _Copy_conjoint_bytes, except no source alignment check.
       
   182         .p2align 4,,15
       
   183 	.type    _Copy_arrayof_conjoint_bytes,@function
       
   184 _Copy_arrayof_conjoint_bytes:
       
   185         pushl    %esi
       
   186         movl     4+12(%esp),%ecx      # count
       
   187         pushl    %edi
       
   188         movl     8+ 4(%esp),%esi      # from
       
   189         movl     8+ 8(%esp),%edi      # to
       
   190         cmpl     %esi,%edi
       
   191         leal     -1(%esi,%ecx),%eax   # from + count - 1
       
   192         jbe      acb_CopyRight
       
   193         cmpl     %eax,%edi
       
   194         jbe      acb_CopyLeft 
       
   195         # copy from low to high
       
   196 acb_CopyRight:
       
   197         cmpl     $3,%ecx
       
   198         jbe      5f
       
   199 1:      movl     %ecx,%eax
       
   200         shrl     $2,%ecx
       
   201         jz       4f
       
   202         cmpl     $32,%ecx
       
   203         ja       3f
       
   204         # copy aligned dwords
       
   205         subl     %esi,%edi
       
   206         .p2align 4,,15
       
   207 2:      movl     (%esi),%edx
       
   208         movl     %edx,(%edi,%esi,1)
       
   209         addl     $4,%esi
       
   210         subl     $1,%ecx
       
   211         jnz      2b
       
   212         addl     %esi,%edi
       
   213         jmp      4f
       
   214         # copy aligned dwords
       
   215 3:      rep;     smovl
       
   216 4:      movl     %eax,%ecx
       
   217         andl     $3,%ecx
       
   218         jz       7f
       
   219         # copy suffix
       
   220 5:      xorl     %eax,%eax
       
   221 6:      movb     (%esi,%eax,1),%dl
       
   222         movb     %dl,(%edi,%eax,1)
       
   223         addl     $1,%eax
       
   224         subl     $1,%ecx
       
   225         jnz      6b
       
   226 7:      popl     %edi
       
   227         popl     %esi
       
   228         ret
       
   229 acb_CopyLeft:
       
   230         std
       
   231         leal     -4(%edi,%ecx),%edi   # to + count - 4
       
   232         movl     %eax,%esi            # from + count - 1
       
   233         movl     %ecx,%eax
       
   234         subl     $3,%esi              # from + count - 4
       
   235         cmpl     $3,%ecx
       
   236         jbe      5f
       
   237 1:      shrl     $2,%ecx
       
   238         jz       4f
       
   239         cmpl     $32,%ecx
       
   240         jbe      2f                   # <= 32 dwords
       
   241         rep;     smovl
       
   242         jmp      4f
       
   243 	.=.+8
       
   244 2:      subl     %esi,%edi
       
   245         .p2align 4,,15
       
   246 3:      movl     (%esi),%edx
       
   247         movl     %edx,(%edi,%esi,1)
       
   248         subl     $4,%esi
       
   249         subl     $1,%ecx
       
   250         jnz      3b
       
   251         addl     %esi,%edi
       
   252 4:      movl     %eax,%ecx
       
   253         andl     $3,%ecx
       
   254         jz       7f
       
   255 5:      subl     %esi,%edi
       
   256         addl     $3,%esi
       
   257 6:      movb     (%esi),%dl
       
   258         movb     %dl,(%edi,%esi,1)
       
   259 	subl     $1,%esi
       
   260         subl     $1,%ecx
       
   261         jnz      6b
       
   262 7:      cld
       
   263         popl     %edi
       
   264         popl     %esi
       
   265         ret
       
   266 
       
   267         # Support for void Copy::conjoint_jshorts_atomic(void* from,
       
   268         #                                                void* to,
       
   269         #                                                size_t count)
       
   270         .p2align 4,,15
       
   271 	.type    _Copy_conjoint_jshorts_atomic,@function
       
   272 _Copy_conjoint_jshorts_atomic:
       
   273         pushl    %esi
       
   274         movl     4+12(%esp),%ecx      # count
       
   275         pushl    %edi
       
   276         movl     8+ 4(%esp),%esi      # from
       
   277         movl     8+ 8(%esp),%edi      # to
       
   278         cmpl     %esi,%edi
       
   279         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
       
   280         jbe      cs_CopyRight
       
   281         cmpl     %eax,%edi
       
   282         jbe      cs_CopyLeft 
       
   283         # copy from low to high
       
   284 cs_CopyRight:
       
   285         # align source address at dword address boundary
       
   286         movl     %esi,%eax            # original from
       
   287         andl     $3,%eax              # either 0 or 2
       
   288         jz       1f                   # no prefix
       
   289         # copy prefix
       
   290         movw     (%esi),%dx
       
   291         movw     %dx,(%edi)
       
   292         addl     %eax,%esi            # %eax == 2
       
   293         addl     %eax,%edi
       
   294         subl     $1,%ecx
       
   295 1:      movl     %ecx,%eax            # word count less prefix
       
   296         sarl     %ecx                 # dword count
       
   297         jz       4f                   # no dwords to move
       
   298         cmpl     $32,%ecx
       
   299         jbe      2f                   # <= 32 dwords
       
   300         # copy aligned dwords
       
   301         rep;     smovl
       
   302         jmp      4f 
       
   303         # copy aligned dwords
       
   304 2:      subl     %esi,%edi
       
   305         .p2align 4,,15
       
   306 3:      movl     (%esi),%edx
       
   307         movl     %edx,(%edi,%esi,1)
       
   308         addl     $4,%esi
       
   309         subl     $1,%ecx
       
   310         jnz      3b
       
   311         addl     %esi,%edi
       
   312 4:      andl     $1,%eax              # suffix count
       
   313         jz       5f                   # no suffix
       
   314         # copy suffix
       
   315         movw     (%esi),%dx
       
   316         movw     %dx,(%edi)
       
   317 5:      popl     %edi
       
   318         popl     %esi
       
   319         ret
       
   320         # copy from high to low
       
   321 cs_CopyLeft:
       
   322         std
       
   323         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
       
   324         movl     %eax,%esi            # from + count*2 - 2
       
   325         movl     %ecx,%eax
       
   326         subl     $2,%esi              # from + count*2 - 4
       
   327 1:      sarl     %ecx                 # dword count
       
   328         jz       4f                   # no dwords to move
       
   329         cmpl     $32,%ecx
       
   330         ja       3f                   # > 32 dwords
       
   331         subl     %esi,%edi
       
   332         .p2align 4,,15
       
   333 2:      movl     (%esi),%edx
       
   334         movl     %edx,(%edi,%esi,1)
       
   335         subl     $4,%esi
       
   336         subl     $1,%ecx
       
   337         jnz      2b
       
   338         addl     %esi,%edi
       
   339         jmp      4f
       
   340 3:      rep;     smovl
       
   341 4:      andl     $1,%eax              # suffix count
       
   342         jz       5f                   # no suffix
       
   343         # copy suffix
       
   344         addl     $2,%esi
       
   345         addl     $2,%edi
       
   346         movw     (%esi),%dx
       
   347         movw     %dx,(%edi)
       
   348 5:      cld
       
   349         popl     %edi
       
   350         popl     %esi
       
   351         ret
       
   352 
       
   353         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
       
   354         #                                                 void* to,
       
   355         #                                                 size_t count)
       
   356         .p2align 4,,15
       
   357 	.type    _Copy_arrayof_conjoint_jshorts,@function
       
   358 _Copy_arrayof_conjoint_jshorts:
       
   359         pushl    %esi
       
   360         movl     4+12(%esp),%ecx      # count
       
   361         pushl    %edi
       
   362         movl     8+ 4(%esp),%esi      # from
       
   363         movl     8+ 8(%esp),%edi      # to
       
   364         cmpl     %esi,%edi
       
   365         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
       
   366         jbe      acs_CopyRight
       
   367         cmpl     %eax,%edi
       
   368         jbe      acs_CopyLeft 
       
   369 acs_CopyRight:
       
   370         movl     %ecx,%eax            # word count
       
   371         sarl     %ecx                 # dword count
       
   372         jz       4f                   # no dwords to move
       
   373         cmpl     $32,%ecx
       
   374         jbe      2f                   # <= 32 dwords
       
   375         # copy aligned dwords
       
   376         rep;     smovl
       
   377         jmp      4f 
       
   378         # copy aligned dwords
       
   379         .=.+5
       
   380 2:      subl     %esi,%edi 
       
   381         .p2align 4,,15
       
   382 3:      movl     (%esi),%edx
       
   383         movl     %edx,(%edi,%esi,1)
       
   384         addl     $4,%esi
       
   385         subl     $1,%ecx
       
   386         jnz      3b
       
   387         addl     %esi,%edi
       
   388 4:      andl     $1,%eax              # suffix count
       
   389         jz       5f                   # no suffix
       
   390         # copy suffix
       
   391         movw     (%esi),%dx
       
   392         movw     %dx,(%edi)
       
   393 5:      popl     %edi
       
   394         popl     %esi
       
   395         ret
       
   396 acs_CopyLeft:
       
   397         std
       
   398         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
       
   399         movl     %eax,%esi            # from + count*2 - 2
       
   400         movl     %ecx,%eax
       
   401         subl     $2,%esi              # from + count*2 - 4
       
   402         sarl     %ecx                 # dword count
       
   403         jz       4f                   # no dwords to move
       
   404         cmpl     $32,%ecx
       
   405         ja       3f                   # > 32 dwords
       
   406         subl     %esi,%edi
       
   407         .p2align 4,,15
       
   408 2:      movl     (%esi),%edx
       
   409         movl     %edx,(%edi,%esi,1)
       
   410         subl     $4,%esi
       
   411         subl     $1,%ecx
       
   412         jnz      2b
       
   413         addl     %esi,%edi
       
   414         jmp      4f
       
   415 3:      rep;     smovl
       
   416 4:      andl     $1,%eax              # suffix count
       
   417         jz       5f                   # no suffix
       
   418         # copy suffix
       
   419         addl     $2,%esi
       
   420         addl     $2,%edi
       
   421         movw     (%esi),%dx
       
   422         movw     %dx,(%edi)
       
   423 5:      cld
       
   424         popl     %edi
       
   425         popl     %esi
       
   426         ret
       
   427 
       
   428         # Support for void Copy::conjoint_jints_atomic(void* from,
       
   429         #                                              void* to,
       
   430         #                                              size_t count)
       
   431         # Equivalent to
       
   432         #   arrayof_conjoint_jints
       
   433         .p2align 4,,15
       
   434 	.type    _Copy_conjoint_jints_atomic,@function
       
   435 	.type    _Copy_arrayof_conjoint_jints,@function
       
   436 _Copy_conjoint_jints_atomic:
       
   437 _Copy_arrayof_conjoint_jints:
       
   438         pushl    %esi
       
   439         movl     4+12(%esp),%ecx      # count
       
   440         pushl    %edi
       
   441         movl     8+ 4(%esp),%esi      # from
       
   442         movl     8+ 8(%esp),%edi      # to
       
   443         cmpl     %esi,%edi
       
   444         leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
       
   445         jbe      ci_CopyRight
       
   446         cmpl     %eax,%edi
       
   447         jbe      ci_CopyLeft 
       
   448 ci_CopyRight:
       
   449         cmpl     $32,%ecx
       
   450         jbe      2f                   # <= 32 dwords
       
   451         rep;     smovl
       
   452         popl     %edi
       
   453         popl     %esi
       
   454         ret
       
   455         .=.+10
       
   456 2:      subl     %esi,%edi
       
   457         .p2align 4,,15
       
   458 3:      movl     (%esi),%edx
       
   459         movl     %edx,(%edi,%esi,1)
       
   460         addl     $4,%esi
       
   461         subl     $1,%ecx
       
   462         jnz      3b
       
   463         popl     %edi
       
   464         popl     %esi
       
   465         ret
       
   466 ci_CopyLeft:
       
   467         std
       
   468         leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
       
   469         cmpl     $32,%ecx
       
   470         ja       3f                   # > 32 dwords
       
   471         subl     %eax,%edi            # eax == from + count*4 - 4
       
   472         .p2align 4,,15
       
   473 2:      movl     (%eax),%edx
       
   474         movl     %edx,(%edi,%eax,1)
       
   475         subl     $4,%eax
       
   476         subl     $1,%ecx
       
   477         jnz      2b
       
   478         cld
       
   479         popl     %edi
       
   480         popl     %esi
       
   481         ret
       
   482 3:      movl     %eax,%esi            # from + count*4 - 4
       
   483         rep;     smovl
       
   484         cld
       
   485         popl     %edi
       
   486         popl     %esi
       
   487         ret
       
   488 	
       
   489         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
       
   490         #                                               jlong* to,
       
   491         #                                               size_t count)
       
   492         #
       
   493         # 32-bit
       
   494         #
       
   495         # count treated as signed
       
   496         #
       
   497         # if (from > to) {
       
   498         #   while (--count >= 0) {
       
   499         #     *to++ = *from++;
       
   500         #   }
       
   501         # } else {
       
   502         #   while (--count >= 0) {
       
   503         #     to[count] = from[count];
       
   504         #   }
       
   505         # }
       
   506         .p2align 4,,15
       
   507 	.type    _Copy_conjoint_jlongs_atomic,@function
       
   508 _Copy_conjoint_jlongs_atomic:
       
   509         movl     4+8(%esp),%ecx       # count
       
   510         movl     4+0(%esp),%eax       # from
       
   511         movl     4+4(%esp),%edx       # to
       
   512         cmpl     %eax,%edx
       
   513         jae      cla_CopyLeft
       
   514 cla_CopyRight:
       
   515         subl     %eax,%edx
       
   516         jmp      2f
       
   517         .p2align 4,,15
       
   518 1:      fildll   (%eax)
       
   519         fistpll  (%edx,%eax,1)
       
   520         addl     $8,%eax
       
   521 2:      subl     $1,%ecx
       
   522         jge      1b
       
   523         ret
       
   524         .p2align 4,,15
       
   525 3:      fildll   (%eax,%ecx,8)
       
   526         fistpll  (%edx,%ecx,8)
       
   527 cla_CopyLeft:
       
   528         subl     $1,%ecx
       
   529         jge      3b
       
   530         ret
       
   531 
       
   532         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
       
   533         #                                                 void* to,
       
   534         #                                                 size_t count)
       
   535         .p2align 4,,15
       
   536 	.type    _mmx_Copy_arrayof_conjoint_jshorts,@function
       
   537 _mmx_Copy_arrayof_conjoint_jshorts:
       
   538         pushl    %esi
       
   539         movl     4+12(%esp),%ecx
       
   540         pushl    %edi
       
   541         movl     8+ 4(%esp),%esi
       
   542         movl     8+ 8(%esp),%edi
       
   543         cmpl     %esi,%edi
       
   544         leal     -2(%esi,%ecx,2),%eax
       
   545         jbe      mmx_acs_CopyRight
       
   546         cmpl     %eax,%edi
       
   547         jbe      mmx_acs_CopyLeft
       
   548 mmx_acs_CopyRight:
       
   549         movl     %ecx,%eax
       
   550         sarl     %ecx
       
   551         je       5f
       
   552         cmpl     $33,%ecx
       
   553         jae      3f
       
   554 1:      subl     %esi,%edi 
       
   555         .p2align 4,,15
       
   556 2:      movl     (%esi),%edx
       
   557         movl     %edx,(%edi,%esi,1)
       
   558         addl     $4,%esi
       
   559         subl     $1,%ecx
       
   560         jnz      2b
       
   561         addl     %esi,%edi
       
   562         jmp      5f 
       
   563 3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
       
   564         subl     $1,%ecx
       
   565 4:      .p2align 4,,15
       
   566         movq     0(%esi),%mm0
       
   567         addl     $64,%edi
       
   568         movq     8(%esi),%mm1
       
   569         subl     $16,%ecx
       
   570         movq     16(%esi),%mm2
       
   571         movq     %mm0,-64(%edi)
       
   572         movq     24(%esi),%mm0
       
   573         movq     %mm1,-56(%edi)
       
   574         movq     32(%esi),%mm1
       
   575         movq     %mm2,-48(%edi)
       
   576         movq     40(%esi),%mm2
       
   577         movq     %mm0,-40(%edi)
       
   578         movq     48(%esi),%mm0
       
   579         movq     %mm1,-32(%edi)
       
   580         movq     56(%esi),%mm1
       
   581         movq     %mm2,-24(%edi)
       
   582         movq     %mm0,-16(%edi)
       
   583         addl     $64,%esi
       
   584         movq     %mm1,-8(%edi)
       
   585         cmpl     $16,%ecx
       
   586         jge      4b
       
   587         emms
       
   588 	testl    %ecx,%ecx
       
   589 	ja       1b
       
   590 5:      andl     $1,%eax
       
   591         je       7f
       
   592 6:      movw     (%esi),%dx
       
   593         movw     %dx,(%edi)
       
   594 7:	popl     %edi
       
   595         popl     %esi
       
   596         ret
       
   597 mmx_acs_CopyLeft:
       
   598         std
       
   599         leal     -4(%edi,%ecx,2),%edi
       
   600         movl     %eax,%esi
       
   601         movl     %ecx,%eax
       
   602         subl     $2,%esi
       
   603         sarl     %ecx
       
   604         je       4f
       
   605         cmpl     $32,%ecx
       
   606         ja       3f
       
   607         subl     %esi,%edi
       
   608         .p2align 4,,15
       
   609 2:      movl     (%esi),%edx
       
   610         movl     %edx,(%edi,%esi,1)
       
   611         subl     $4,%esi
       
   612         subl     $1,%ecx
       
   613         jnz      2b
       
   614         addl     %esi,%edi
       
   615         jmp      4f
       
   616 3:      rep;     smovl
       
   617 4:      andl     $1,%eax
       
   618         je       6f
       
   619         addl     $2,%esi
       
   620         addl     $2,%edi
       
   621 5:      movw     (%esi),%dx
       
   622         movw     %dx,(%edi)
       
   623 6:      cld
       
   624         popl     %edi
       
   625         popl     %esi
       
   626         ret
       
   627 
       
   628 
       
   629         # Support for jlong Atomic::cmpxchg(jlong exchange_value,
       
   630         #                                   volatile jlong* dest,
       
   631         #                                   jlong compare_value,
       
   632         #                                   bool is_MP)
       
   633         #
       
   634         .p2align 4,,15
       
   635 	.type    _Atomic_cmpxchg_long,@function
       
   636 _Atomic_cmpxchg_long:
       
   637                                    #  8(%esp) : return PC
       
   638         pushl    %ebx              #  4(%esp) : old %ebx
       
   639         pushl    %edi              #  0(%esp) : old %edi
       
   640         movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
       
   641         movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
       
   642         movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
       
   643         movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
       
   644         movl     20(%esp), %edi    # 20(%esp) : dest
       
   645         cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
       
   646         je       1f
       
   647         lock
       
   648 1:      cmpxchg8b (%edi)
       
   649         popl     %edi
       
   650         popl     %ebx
       
   651         ret
       
   652