src/hotspot/os_cpu/bsd_x86/bsd_x86_64.s
changeset 47216 71c04702a3d5
parent 22234 da823d78ad65
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 # 
       
     2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
       
     3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4 #
       
     5 # This code is free software; you can redistribute it and/or modify it
       
     6 # under the terms of the GNU General Public License version 2 only, as
       
     7 # published by the Free Software Foundation.
       
     8 #
       
     9 # This code is distributed in the hope that it will be useful, but WITHOUT
       
    10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12 # version 2 for more details (a copy is included in the LICENSE file that
       
    13 # accompanied this code).
       
    14 #
       
    15 # You should have received a copy of the GNU General Public License version
       
    16 # 2 along with this work; if not, write to the Free Software Foundation,
       
    17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18 #
       
    19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20 # or visit www.oracle.com if you need additional information or have any
       
    21 # questions.
       
    22 #
       
    23 
       
    24 #ifdef __APPLE__
       
    25 # Darwin uses _ prefixed global symbols
       
    26 #define SYMBOL(s) _ ## s
       
    27 #define ELF_TYPE(name, description)
       
    28 #else
       
    29 #define SYMBOL(s) s
       
    30 #define ELF_TYPE(name, description) .type name,description
       
    31 #endif
       
    32 
       
    33         # NOTE WELL!  The _Copy functions are called directly
       
    34 	# from server-compiler-generated code via CallLeafNoFP,
       
    35 	# which means that they *must* either not use floating
       
    36 	# point or use it in the same manner as does the server
       
    37 	# compiler.
       
    38 	
       
    39         .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
       
    40 	.globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
       
    41         .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
       
    42         .globl SYMBOL(_Copy_arrayof_conjoint_jints)
       
    43         .globl SYMBOL(_Copy_conjoint_jints_atomic)
       
    44         .globl SYMBOL(_Copy_arrayof_conjoint_jlongs)
       
    45         .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
       
    46 
       
    47 	.text
       
    48 
       
    49         .globl SYMBOL(SpinPause)
       
    50         .p2align 4,,15
       
    51         ELF_TYPE(SpinPause,@function)
       
    52 SYMBOL(SpinPause):
       
    53         rep
       
    54         nop
       
    55         movq   $1, %rax
       
    56         ret
       
    57 
       
    58         # Support for void Copy::arrayof_conjoint_bytes(void* from,
       
    59         #                                               void* to,
       
    60         #                                               size_t count)
       
    61         # rdi - from
       
    62         # rsi - to
       
    63         # rdx - count, treated as ssize_t
       
    64         #
       
    65         .p2align 4,,15
       
    66 	ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
       
    67 SYMBOL(_Copy_arrayof_conjoint_bytes):
       
    68         movq     %rdx,%r8             # byte count
       
    69         shrq     $3,%rdx              # qword count
       
    70         cmpq     %rdi,%rsi
       
    71         leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
       
    72         jbe      acb_CopyRight
       
    73         cmpq     %rax,%rsi
       
    74         jbe      acb_CopyLeft 
       
    75 acb_CopyRight:
       
    76         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
       
    77         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
       
    78         negq     %rdx
       
    79         jmp      7f
       
    80         .p2align 4,,15
       
    81 1:      movq     8(%rax,%rdx,8),%rsi
       
    82         movq     %rsi,8(%rcx,%rdx,8)
       
    83         addq     $1,%rdx
       
    84         jnz      1b
       
    85 2:      testq    $4,%r8               # check for trailing dword
       
    86         jz       3f
       
    87         movl     8(%rax),%esi         # copy trailing dword
       
    88         movl     %esi,8(%rcx)
       
    89         addq     $4,%rax
       
    90         addq     $4,%rcx              # original %rsi is trashed, so we
       
    91                                       #  can't use it as a base register
       
    92 3:      testq    $2,%r8               # check for trailing word
       
    93         jz       4f
       
    94         movw     8(%rax),%si          # copy trailing word
       
    95         movw     %si,8(%rcx)
       
    96         addq     $2,%rcx
       
    97 4:      testq    $1,%r8               # check for trailing byte
       
    98         jz       5f
       
    99         movb     -1(%rdi,%r8,1),%al   # copy trailing byte
       
   100         movb     %al,8(%rcx)
       
   101 5:      ret
       
   102         .p2align 4,,15
       
   103 6:      movq     -24(%rax,%rdx,8),%rsi
       
   104         movq     %rsi,-24(%rcx,%rdx,8)
       
   105         movq     -16(%rax,%rdx,8),%rsi
       
   106         movq     %rsi,-16(%rcx,%rdx,8)
       
   107         movq     -8(%rax,%rdx,8),%rsi
       
   108         movq     %rsi,-8(%rcx,%rdx,8)
       
   109         movq     (%rax,%rdx,8),%rsi
       
   110         movq     %rsi,(%rcx,%rdx,8)
       
   111 7:      addq     $4,%rdx
       
   112         jle      6b
       
   113         subq     $4,%rdx
       
   114         jl       1b
       
   115         jmp      2b
       
   116 acb_CopyLeft:
       
   117         testq    $1,%r8               # check for trailing byte
       
   118         jz       1f
       
   119         movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
       
   120         movb     %cl,-1(%rsi,%r8,1)
       
   121         subq     $1,%r8               # adjust for possible trailing word
       
   122 1:      testq    $2,%r8               # check for trailing word
       
   123         jz       2f
       
   124         movw     -2(%rdi,%r8,1),%cx   # copy trailing word
       
   125         movw     %cx,-2(%rsi,%r8,1)
       
   126 2:      testq    $4,%r8               # check for trailing dword
       
   127         jz       5f
       
   128         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
       
   129         movl     %ecx,(%rsi,%rdx,8)
       
   130         jmp      5f
       
   131         .p2align 4,,15
       
   132 3:      movq     -8(%rdi,%rdx,8),%rcx
       
   133         movq     %rcx,-8(%rsi,%rdx,8)
       
   134         subq     $1,%rdx
       
   135         jnz      3b
       
   136         ret
       
   137         .p2align 4,,15
       
   138 4:      movq     24(%rdi,%rdx,8),%rcx
       
   139         movq     %rcx,24(%rsi,%rdx,8)
       
   140         movq     16(%rdi,%rdx,8),%rcx
       
   141         movq     %rcx,16(%rsi,%rdx,8)
       
   142         movq     8(%rdi,%rdx,8),%rcx
       
   143         movq     %rcx,8(%rsi,%rdx,8)
       
   144         movq     (%rdi,%rdx,8),%rcx
       
   145         movq     %rcx,(%rsi,%rdx,8)
       
   146 5:      subq     $4,%rdx
       
   147         jge      4b
       
   148         addq     $4,%rdx
       
   149         jg       3b
       
   150         ret
       
   151 
       
   152         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
       
   153         #                                                 void* to,
       
   154         #                                                 size_t count)
       
   155         # Equivalent to
       
   156         #   conjoint_jshorts_atomic
       
   157         #
       
   158         # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
       
   159         # let the hardware handle it.  The tow or four words within dwords
       
   160         # or qwords that span cache line boundaries will still be loaded
       
   161         # and stored atomically.
       
   162         #
       
   163         # rdi - from
       
   164         # rsi - to
       
   165         # rdx - count, treated as ssize_t
       
   166         #
       
   167         .p2align 4,,15
       
   168 	ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
       
   169 	ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
       
   170 SYMBOL(_Copy_arrayof_conjoint_jshorts):
       
   171 SYMBOL(_Copy_conjoint_jshorts_atomic):
       
   172         movq     %rdx,%r8             # word count
       
   173         shrq     $2,%rdx              # qword count
       
   174         cmpq     %rdi,%rsi
       
   175         leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
       
   176         jbe      acs_CopyRight
       
   177         cmpq     %rax,%rsi
       
   178         jbe      acs_CopyLeft 
       
   179 acs_CopyRight:
       
   180         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
       
   181         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
       
   182         negq     %rdx
       
   183         jmp      6f
       
   184 1:      movq     8(%rax,%rdx,8),%rsi
       
   185         movq     %rsi,8(%rcx,%rdx,8)
       
   186         addq     $1,%rdx
       
   187         jnz      1b
       
   188 2:      testq    $2,%r8               # check for trailing dword
       
   189         jz       3f
       
   190         movl     8(%rax),%esi         # copy trailing dword
       
   191         movl     %esi,8(%rcx)
       
   192         addq     $4,%rcx              # original %rsi is trashed, so we
       
   193                                       #  can't use it as a base register
       
   194 3:      testq    $1,%r8               # check for trailing word
       
   195         jz       4f
       
   196         movw     -2(%rdi,%r8,2),%si   # copy trailing word
       
   197         movw     %si,8(%rcx)
       
   198 4:      ret
       
   199         .p2align 4,,15
       
   200 5:      movq     -24(%rax,%rdx,8),%rsi
       
   201         movq     %rsi,-24(%rcx,%rdx,8)
       
   202         movq     -16(%rax,%rdx,8),%rsi
       
   203         movq     %rsi,-16(%rcx,%rdx,8)
       
   204         movq     -8(%rax,%rdx,8),%rsi
       
   205         movq     %rsi,-8(%rcx,%rdx,8)
       
   206         movq     (%rax,%rdx,8),%rsi
       
   207         movq     %rsi,(%rcx,%rdx,8)
       
   208 6:      addq     $4,%rdx
       
   209         jle      5b
       
   210         subq     $4,%rdx
       
   211         jl       1b
       
   212         jmp      2b
       
   213 acs_CopyLeft:
       
   214         testq    $1,%r8               # check for trailing word
       
   215         jz       1f
       
   216         movw     -2(%rdi,%r8,2),%cx   # copy trailing word
       
   217         movw     %cx,-2(%rsi,%r8,2)
       
   218 1:      testq    $2,%r8               # check for trailing dword
       
   219         jz       4f
       
   220         movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
       
   221         movl     %ecx,(%rsi,%rdx,8)
       
   222         jmp      4f
       
   223 2:      movq     -8(%rdi,%rdx,8),%rcx
       
   224         movq     %rcx,-8(%rsi,%rdx,8)
       
   225         subq     $1,%rdx
       
   226         jnz      2b
       
   227         ret
       
   228         .p2align 4,,15
       
   229 3:      movq     24(%rdi,%rdx,8),%rcx
       
   230         movq     %rcx,24(%rsi,%rdx,8)
       
   231         movq     16(%rdi,%rdx,8),%rcx
       
   232         movq     %rcx,16(%rsi,%rdx,8)
       
   233         movq     8(%rdi,%rdx,8),%rcx
       
   234         movq     %rcx,8(%rsi,%rdx,8)
       
   235         movq     (%rdi,%rdx,8),%rcx
       
   236         movq     %rcx,(%rsi,%rdx,8)
       
   237 4:      subq     $4,%rdx
       
   238         jge      3b
       
   239         addq     $4,%rdx
       
   240         jg       2b
       
   241         ret
       
   242 
       
   243         # Support for void Copy::arrayof_conjoint_jints(jint* from,
       
   244         #                                               jint* to,
       
   245         #                                               size_t count)
       
   246         # Equivalent to
       
   247         #   conjoint_jints_atomic
       
   248         #
       
   249         # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
       
   250         # the hardware handle it.  The two dwords within qwords that span
       
   251         # cache line boundaries will still be loaded and stored atomically.
       
   252         #
       
   253         # rdi - from
       
   254         # rsi - to
       
   255         # rdx - count, treated as ssize_t
       
   256         #
       
   257         .p2align 4,,15
       
   258 	ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
       
   259 	ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
       
   260 SYMBOL(_Copy_arrayof_conjoint_jints):
       
   261 SYMBOL(_Copy_conjoint_jints_atomic):
       
   262         movq     %rdx,%r8             # dword count
       
   263         shrq     %rdx                 # qword count
       
   264         cmpq     %rdi,%rsi
       
   265         leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
       
   266         jbe      aci_CopyRight
       
   267         cmpq     %rax,%rsi
       
   268         jbe      aci_CopyLeft 
       
   269 aci_CopyRight:
       
   270         leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
       
   271         leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
       
   272         negq     %rdx
       
   273         jmp      5f
       
   274         .p2align 4,,15
       
   275 1:      movq     8(%rax,%rdx,8),%rsi
       
   276         movq     %rsi,8(%rcx,%rdx,8)
       
   277         addq     $1,%rdx
       
   278         jnz       1b
       
   279 2:      testq    $1,%r8               # check for trailing dword
       
   280         jz       3f
       
   281         movl     8(%rax),%esi         # copy trailing dword
       
   282         movl     %esi,8(%rcx)
       
   283 3:      ret
       
   284         .p2align 4,,15
       
   285 4:      movq     -24(%rax,%rdx,8),%rsi
       
   286         movq     %rsi,-24(%rcx,%rdx,8)
       
   287         movq     -16(%rax,%rdx,8),%rsi
       
   288         movq     %rsi,-16(%rcx,%rdx,8)
       
   289         movq     -8(%rax,%rdx,8),%rsi
       
   290         movq     %rsi,-8(%rcx,%rdx,8)
       
   291         movq     (%rax,%rdx,8),%rsi
       
   292         movq     %rsi,(%rcx,%rdx,8)
       
   293 5:      addq     $4,%rdx
       
   294         jle      4b
       
   295         subq     $4,%rdx
       
   296         jl       1b
       
   297         jmp      2b
       
   298 aci_CopyLeft:
       
   299         testq    $1,%r8               # check for trailing dword
       
   300         jz       3f
       
   301         movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
       
   302         movl     %ecx,-4(%rsi,%r8,4)
       
   303         jmp      3f
       
   304 1:      movq     -8(%rdi,%rdx,8),%rcx
       
   305         movq     %rcx,-8(%rsi,%rdx,8)
       
   306         subq     $1,%rdx
       
   307         jnz      1b
       
   308         ret
       
   309         .p2align 4,,15
       
   310 2:      movq     24(%rdi,%rdx,8),%rcx
       
   311         movq     %rcx,24(%rsi,%rdx,8)
       
   312         movq     16(%rdi,%rdx,8),%rcx
       
   313         movq     %rcx,16(%rsi,%rdx,8)
       
   314         movq     8(%rdi,%rdx,8),%rcx
       
   315         movq     %rcx,8(%rsi,%rdx,8)
       
   316         movq     (%rdi,%rdx,8),%rcx
       
   317         movq     %rcx,(%rsi,%rdx,8)
       
   318 3:      subq     $4,%rdx
       
   319         jge      2b
       
   320         addq     $4,%rdx
       
   321         jg       1b
       
   322         ret
       
   323 
       
   324         # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
       
   325         #                                                jlong* to,
       
   326         #                                                size_t count)
       
   327         # Equivalent to
       
   328         #   conjoint_jlongs_atomic
       
   329         #   arrayof_conjoint_oops
       
   330         #   conjoint_oops_atomic
       
   331         #
       
   332         # rdi - from
       
   333         # rsi - to
       
   334         # rdx - count, treated as ssize_t
       
   335         #
       
   336         .p2align 4,,15
       
   337 	ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function)
       
   338 	ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
       
   339 SYMBOL(_Copy_arrayof_conjoint_jlongs):
       
   340 SYMBOL(_Copy_conjoint_jlongs_atomic):
       
   341         cmpq     %rdi,%rsi
       
   342         leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
       
   343         jbe      acl_CopyRight
       
   344         cmpq     %rax,%rsi
       
   345         jbe      acl_CopyLeft 
       
   346 acl_CopyRight:
       
   347         leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
       
   348         negq     %rdx
       
   349         jmp      3f
       
   350 1:      movq     8(%rax,%rdx,8),%rsi
       
   351         movq     %rsi,8(%rcx,%rdx,8)
       
   352         addq     $1,%rdx
       
   353         jnz      1b
       
   354         ret
       
   355         .p2align 4,,15
       
   356 2:      movq     -24(%rax,%rdx,8),%rsi
       
   357         movq     %rsi,-24(%rcx,%rdx,8)
       
   358         movq     -16(%rax,%rdx,8),%rsi
       
   359         movq     %rsi,-16(%rcx,%rdx,8)
       
   360         movq     -8(%rax,%rdx,8),%rsi
       
   361         movq     %rsi,-8(%rcx,%rdx,8)
       
   362         movq     (%rax,%rdx,8),%rsi
       
   363         movq     %rsi,(%rcx,%rdx,8)
       
   364 3:      addq     $4,%rdx
       
   365         jle      2b
       
   366         subq     $4,%rdx
       
   367         jl       1b
       
   368         ret
       
   369 4:      movq     -8(%rdi,%rdx,8),%rcx
       
   370         movq     %rcx,-8(%rsi,%rdx,8)
       
   371         subq     $1,%rdx
       
   372         jnz      4b
       
   373         ret
       
   374         .p2align 4,,15
       
   375 5:      movq     24(%rdi,%rdx,8),%rcx
       
   376         movq     %rcx,24(%rsi,%rdx,8)
       
   377         movq     16(%rdi,%rdx,8),%rcx
       
   378         movq     %rcx,16(%rsi,%rdx,8)
       
   379         movq     8(%rdi,%rdx,8),%rcx
       
   380         movq     %rcx,8(%rsi,%rdx,8)
       
   381         movq     (%rdi,%rdx,8),%rcx
       
   382         movq     %rcx,(%rsi,%rdx,8)
       
   383 acl_CopyLeft:
       
   384         subq     $4,%rdx
       
   385         jge      5b
       
   386         addq     $4,%rdx
       
   387         jg       4b
       
   388         ret