hotspot/src/os_cpu/linux_arm/vm/linux_arm_64.s
author vdeshpande
Wed, 07 Jun 2017 13:09:46 -0700
changeset 46528 cf0da758e7b5
parent 42664 29142a56c193
permissions -rw-r--r--
8181616: FMA Vectorization on x86 Reviewed-by: kvn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
42664
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     1
# 
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     2
# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     3
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     4
#
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     5
# This code is free software; you can redistribute it and/or modify it
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     6
# under the terms of the GNU General Public License version 2 only, as
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     7
# published by the Free Software Foundation.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     8
#
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
     9
# This code is distributed in the hope that it will be useful, but WITHOUT
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    10
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    11
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    12
# version 2 for more details (a copy is included in the LICENSE file that
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    13
# accompanied this code).
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    14
#
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    15
# You should have received a copy of the GNU General Public License version
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    16
# 2 along with this work; if not, write to the Free Software Foundation,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    17
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    18
#
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    19
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    20
# or visit www.oracle.com if you need additional information or have any
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    21
# questions.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    22
# 
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    23
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    24
        # TODO-AARCH64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    25
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    26
        # NOTE WELL!  The _Copy functions are called directly
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    27
        # from server-compiler-generated code via CallLeafNoFP,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    28
        # which means that they *must* either not use floating
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    29
        # point or use it in the same manner as does the server
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    30
        # compiler.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    31
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    32
        .globl _Copy_conjoint_bytes
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    33
        .type _Copy_conjoint_bytes, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    34
        .globl _Copy_arrayof_conjoint_bytes
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    35
        .type _Copy_arrayof_conjoint_bytes, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    36
        .globl _Copy_disjoint_words
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    37
        .type _Copy_disjoint_words, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    38
        .globl _Copy_conjoint_words
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    39
        .type _Copy_conjoint_words, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    40
        .globl _Copy_conjoint_jshorts_atomic
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    41
        .type _Copy_conjoint_jshorts_atomic, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    42
        .globl _Copy_arrayof_conjoint_jshorts
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    43
        .type _Copy_arrayof_conjoint_jshorts, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    44
        .globl _Copy_conjoint_jints_atomic
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    45
        .type _Copy_conjoint_jints_atomic, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    46
        .globl _Copy_arrayof_conjoint_jints
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    47
        .type _Copy_arrayof_conjoint_jints, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    48
        .globl _Copy_conjoint_jlongs_atomic
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    49
        .type _Copy_conjoint_jlongs_atomic, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    50
        .globl _Copy_arrayof_conjoint_jlongs
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    51
        .type _Copy_arrayof_conjoint_jlongs, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    52
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    53
        .text
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    54
        .globl  SpinPause
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    55
        .type SpinPause, %function
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    56
SpinPause:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    57
        yield
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    58
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    59
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    60
        # Support for void Copy::conjoint_bytes(void* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    61
        #                                       void* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    62
        #                                       size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    63
_Copy_conjoint_bytes:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    64
        hlt 1002
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    65
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    66
        # Support for void Copy::arrayof_conjoint_bytes(void* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    67
        #                                               void* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    68
        #                                               size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    69
_Copy_arrayof_conjoint_bytes:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    70
        hlt 1003
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    71
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    72
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    73
        # Support for void Copy::disjoint_words(void* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    74
        #                                       void* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    75
        #                                       size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    76
_Copy_disjoint_words:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    77
        # These and further memory prefetches may hit out of array ranges.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    78
        # Experiments showed that prefetching of inaccessible memory doesn't result in exceptions.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    79
        prfm    pldl1keep,  [x0, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    80
        prfm    pstl1keep,  [x1, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    81
        prfm    pldl1keep,  [x0, #64]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    82
        prfm    pstl1keep,  [x1, #64]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    83
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    84
        subs    x18, x2,  #128
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    85
        b.ge    dw_large
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    86
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    87
dw_lt_128:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    88
        # Copy [x0, x0 + x2) to [x1, x1 + x2)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    89
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    90
        adr     x15,  dw_tail_table_base
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    91
        and     x16,  x2,  #~8
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    92
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    93
        # Calculate address to jump and store it to x15:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    94
        #   Each pair of instructions before dw_tail_table_base copies 16 bytes.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    95
        #   x16 is count of bytes to copy aligned down by 16.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    96
        #   So x16/16 pairs of instructions should be executed. 
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    97
        #   Each pair takes 8 bytes, so x15 = dw_tail_table_base - (x16/16)*8 = x15 - x16/2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    98
        sub     x15,  x15, x16, lsr #1
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
    99
        prfm    plil1keep, [x15]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   100
    
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   101
        add     x17,  x0,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   102
        add     x18,  x1,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   103
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   104
        # If x2 = x16 + 8, then copy 8 bytes and x16 bytes after that.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   105
        # Otherwise x2 = x16, so proceed to copy x16 bytes.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   106
        tbz     x2, #3, dw_lt_128_even
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   107
        ldr     x3, [x0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   108
        str     x3, [x1]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   109
dw_lt_128_even:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   110
        # Copy [x17 - x16, x17) to [x18 - x16, x18)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   111
        # x16 is aligned by 16 and less than 128
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   112
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   113
        # Execute (x16/16) ldp-stp pairs; each pair copies 16 bytes
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   114
        br      x15
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   115
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   116
        ldp     x3,  x4,  [x17, #-112]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   117
        stp     x3,  x4,  [x18, #-112]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   118
        ldp     x5,  x6,  [x17, #-96]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   119
        stp     x5,  x6,  [x18, #-96]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   120
        ldp     x7,  x8,  [x17, #-80]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   121
        stp     x7,  x8,  [x18, #-80]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   122
        ldp     x9,  x10, [x17, #-64]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   123
        stp     x9,  x10, [x18, #-64]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   124
        ldp     x11, x12, [x17, #-48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   125
        stp     x11, x12, [x18, #-48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   126
        ldp     x13, x14, [x17, #-32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   127
        stp     x13, x14, [x18, #-32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   128
        ldp     x15, x16, [x17, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   129
        stp     x15, x16, [x18, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   130
dw_tail_table_base:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   131
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   132
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   133
.p2align  6
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   134
.rept   12
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   135
        nop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   136
.endr
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   137
dw_large:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   138
        # x18 >= 0;
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   139
        # Copy [x0, x0 + x18 + 128) to [x1, x1 + x18 + 128)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   140
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   141
        ldp     x3,  x4,  [x0], #64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   142
        ldp     x5,  x6,  [x0, #-48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   143
        ldp     x7,  x8,  [x0, #-32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   144
        ldp     x9,  x10, [x0, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   145
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   146
        # Before and after each iteration of loop registers x3-x10 contain [x0 - 64, x0),
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   147
        # and x1 is a place to copy this data;
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   148
        # x18 contains number of bytes to be stored minus 128
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   149
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   150
        # Exactly 16 instructions from p2align, so dw_loop starts from cache line boundary
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   151
        # Checking it explictly by aligning with "hlt 1000" instructions 
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   152
.p2alignl  6, 0xd4407d00
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   153
dw_loop:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   154
        prfm    pldl1keep,  [x0, #64]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   155
        # Next line actually hurted memory copy performance (for interpreter) - JDK-8078120
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   156
        # prfm    pstl1keep,  [x1, #64]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   157
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   158
        subs    x18, x18, #64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   159
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   160
        stp     x3,  x4,  [x1, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   161
        ldp     x3,  x4,  [x0, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   162
        stp     x5,  x6,  [x1, #16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   163
        ldp     x5,  x6,  [x0, #16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   164
        stp     x7,  x8,  [x1, #32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   165
        ldp     x7,  x8,  [x0, #32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   166
        stp     x9,  x10, [x1, #48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   167
        ldp     x9,  x10, [x0, #48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   168
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   169
        add     x1,  x1,  #64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   170
        add     x0,  x0,  #64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   171
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   172
        b.ge    dw_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   173
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   174
        # 13 instructions from dw_loop, so the loop body hits into one cache line
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   175
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   176
dw_loop_end:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   177
        adds    x2,  x18, #64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   178
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   179
        stp     x3,  x4,  [x1], #64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   180
        stp     x5,  x6,  [x1, #-48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   181
        stp     x7,  x8,  [x1, #-32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   182
        stp     x9,  x10, [x1, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   183
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   184
        # Increased x18 by 64, but stored 64 bytes, so x2 contains exact number of bytes to be stored
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   185
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   186
        # If this number is not zero, also copy remaining bytes
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   187
        b.ne    dw_lt_128
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   188
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   189
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   190
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   191
        # Support for void Copy::conjoint_words(void* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   192
        #                                       void* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   193
        #                                       size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   194
_Copy_conjoint_words:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   195
        subs    x3, x1, x0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   196
        # hi condition is met <=> from < to
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   197
        ccmp    x2, x3, #0, hi
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   198
        # hi condition is met <=> (from < to) and (to - from < count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   199
        # otherwise _Copy_disjoint_words may be used, because it performs forward copying,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   200
        # so it also works when ranges overlap but to <= from
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   201
        b.ls    _Copy_disjoint_words
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   202
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   203
        # Overlapping case should be the rare one, it does not worth optimizing
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   204
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   205
        ands    x3,  x2,  #~8
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   206
        # x3 is count aligned down by 2*wordSize
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   207
        add     x0,  x0,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   208
        add     x1,  x1,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   209
        sub     x3,  x3,  #16
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   210
        # Skip loop if 0 or 1 words
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   211
        b.eq    cw_backward_loop_end
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   212
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   213
        # x3 >= 0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   214
        # Copy [x0 - x3 - 16, x0) to [x1 - x3 - 16, x1) backward
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   215
cw_backward_loop:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   216
        subs    x3,  x3,  #16
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   217
        ldp     x4,  x5,  [x0, #-16]!
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   218
        stp     x4,  x5,  [x1, #-16]!
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   219
        b.ge    cw_backward_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   220
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   221
cw_backward_loop_end:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   222
        # Copy remaining 0 or 1 words
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   223
        tbz     x2,  #3,  cw_finish
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   224
        ldr     x3, [x0, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   225
        str     x3, [x1, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   226
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   227
cw_finish:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   228
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   229
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   230
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   231
        # Support for void Copy::conjoint_jshorts_atomic(void* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   232
        #                                                void* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   233
        #                                                size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   234
_Copy_conjoint_jshorts_atomic:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   235
        add     x17, x0, x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   236
        add     x18, x1, x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   237
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   238
        subs    x3, x1, x0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   239
        # hi is met <=> (from < to) and (to - from < count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   240
        ccmp    x2, x3, #0, hi
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   241
        b.hi    cs_backward
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   242
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   243
        subs    x3, x2, #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   244
        b.ge    cs_forward_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   245
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   246
        # Copy x2 < 14 bytes from x0 to x1
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   247
cs_forward_lt14:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   248
        ands    x7, x2, #7
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   249
        tbz     x2, #3, cs_forward_lt8
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   250
        ldrh    w3, [x0, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   251
        ldrh    w4, [x0, #2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   252
        ldrh    w5, [x0, #4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   253
        ldrh    w6, [x0, #6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   254
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   255
        strh    w3, [x1, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   256
        strh    w4, [x1, #2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   257
        strh    w5, [x1, #4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   258
        strh    w6, [x1, #6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   259
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   260
        # Copy x7 < 8 bytes from x17 - x7 to x18 - x7
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   261
cs_forward_lt8:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   262
        b.eq    cs_forward_0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   263
        cmp     x7, #4
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   264
        b.lt    cs_forward_2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   265
        b.eq    cs_forward_4
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   266
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   267
cs_forward_6:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   268
        ldrh    w3, [x17, #-6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   269
        strh    w3, [x18, #-6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   270
cs_forward_4:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   271
        ldrh    w4, [x17, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   272
        strh    w4, [x18, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   273
cs_forward_2:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   274
        ldrh    w5, [x17, #-2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   275
        strh    w5, [x18, #-2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   276
cs_forward_0:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   277
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   278
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   279
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   280
        # Copy [x0, x0 + x3 + 14) to [x1, x1 + x3 + 14)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   281
        # x3 >= 0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   282
.p2align 6
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   283
cs_forward_loop:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   284
        subs    x3, x3, #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   285
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   286
        ldrh    w4, [x0], #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   287
        ldrh    w5, [x0, #-12]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   288
        ldrh    w6, [x0, #-10]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   289
        ldrh    w7, [x0, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   290
        ldrh    w8, [x0, #-6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   291
        ldrh    w9, [x0, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   292
        ldrh    w10, [x0, #-2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   293
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   294
        strh    w4, [x1], #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   295
        strh    w5, [x1, #-12]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   296
        strh    w6, [x1, #-10]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   297
        strh    w7, [x1, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   298
        strh    w8, [x1, #-6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   299
        strh    w9, [x1, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   300
        strh    w10, [x1, #-2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   301
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   302
        b.ge    cs_forward_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   303
        # Exactly 16 instruction from cs_forward_loop, so loop fits into one cache line
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   304
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   305
        adds    x2, x3, #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   306
        # x2 bytes should be copied from x0 to x1
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   307
        b.ne    cs_forward_lt14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   308
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   309
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   310
        # Very similar to forward copying
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   311
cs_backward:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   312
        subs    x3, x2, #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   313
        b.ge    cs_backward_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   314
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   315
cs_backward_lt14:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   316
        ands    x7, x2, #7
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   317
        tbz     x2, #3, cs_backward_lt8
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   318
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   319
        ldrh    w3, [x17, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   320
        ldrh    w4, [x17, #-6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   321
        ldrh    w5, [x17, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   322
        ldrh    w6, [x17, #-2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   323
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   324
        strh    w3, [x18, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   325
        strh    w4, [x18, #-6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   326
        strh    w5, [x18, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   327
        strh    w6, [x18, #-2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   328
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   329
cs_backward_lt8:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   330
        b.eq    cs_backward_0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   331
        cmp     x7, #4
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   332
        b.lt    cs_backward_2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   333
        b.eq    cs_backward_4
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   334
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   335
cs_backward_6:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   336
        ldrh    w3, [x0, #4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   337
        strh    w3, [x1, #4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   338
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   339
cs_backward_4:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   340
        ldrh    w4, [x0, #2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   341
        strh    w4, [x1, #2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   342
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   343
cs_backward_2:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   344
        ldrh    w5, [x0, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   345
        strh    w5, [x1, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   346
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   347
cs_backward_0:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   348
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   349
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   350
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   351
.p2align 6
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   352
cs_backward_loop:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   353
        subs    x3, x3, #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   354
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   355
        ldrh    w4, [x17, #-14]!
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   356
        ldrh    w5, [x17, #2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   357
        ldrh    w6, [x17, #4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   358
        ldrh    w7, [x17, #6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   359
        ldrh    w8, [x17, #8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   360
        ldrh    w9, [x17, #10]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   361
        ldrh    w10, [x17, #12]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   362
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   363
        strh    w4, [x18, #-14]!
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   364
        strh    w5, [x18, #2]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   365
        strh    w6, [x18, #4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   366
        strh    w7, [x18, #6]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   367
        strh    w8, [x18, #8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   368
        strh    w9, [x18, #10]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   369
        strh    w10, [x18, #12]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   370
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   371
        b.ge    cs_backward_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   372
        adds    x2, x3, #14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   373
        b.ne    cs_backward_lt14
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   374
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   375
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   376
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   377
        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   378
        #                                                 void* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   379
        #                                                 size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   380
_Copy_arrayof_conjoint_jshorts:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   381
        hlt 1007
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   382
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   383
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   384
        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   385
        #                                               jlong* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   386
        #                                               size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   387
_Copy_conjoint_jlongs_atomic:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   388
_Copy_arrayof_conjoint_jlongs:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   389
        hlt 1009
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   390
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   391
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   392
        # Support for void Copy::conjoint_jints_atomic(void* from,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   393
        #                                              void* to,
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   394
        #                                              size_t count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   395
_Copy_conjoint_jints_atomic:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   396
_Copy_arrayof_conjoint_jints:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   397
        # These and further memory prefetches may hit out of array ranges.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   398
        # Experiments showed that prefetching of inaccessible memory doesn't result in exceptions.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   399
        prfm    pldl1keep,  [x0, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   400
        prfm    pstl1keep,  [x1, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   401
        prfm    pldl1keep,  [x0, #32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   402
        prfm    pstl1keep,  [x1, #32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   403
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   404
        subs    x3, x1, x0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   405
        # hi condition is met <=> from < to
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   406
        ccmp    x2, x3, #0, hi
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   407
        # hi condition is met <=> (from < to) and (to - from < count)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   408
        b.hi    ci_backward
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   409
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   410
        subs    x18, x2,  #64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   411
        b.ge    ci_forward_large
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   412
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   413
ci_forward_lt_64:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   414
        # Copy [x0, x0 + x2) to [x1, x1 + x2)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   415
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   416
        adr     x15,  ci_forward_tail_table_base
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   417
        and     x16,  x2,  #~4
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   418
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   419
        # Calculate address to jump and store it to x15:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   420
        #   Each pair of instructions before ci_forward_tail_table_base copies 8 bytes.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   421
        #   x16 is count of bytes to copy aligned down by 8.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   422
        #   So x16/8 pairs of instructions should be executed. 
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   423
        #   Each pair takes 8 bytes, so x15 = ci_forward_tail_table_base - (x16/8)*8 = x15 - x16
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   424
        sub     x15,  x15, x16
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   425
        prfm    plil1keep, [x15]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   426
    
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   427
        add     x17,  x0,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   428
        add     x18,  x1,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   429
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   430
        # If x2 = x16 + 4, then copy 4 bytes and x16 bytes after that.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   431
        # Otherwise x2 = x16, so proceed to copy x16 bytes.
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   432
        tbz     x2, #2, ci_forward_lt_64_even
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   433
        ldr     w3, [x0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   434
        str     w3, [x1]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   435
ci_forward_lt_64_even:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   436
        # Copy [x17 - x16, x17) to [x18 - x16, x18)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   437
        # x16 is aligned by 8 and less than 64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   438
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   439
        # Execute (x16/8) ldp-stp pairs; each pair copies 8 bytes
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   440
        br      x15
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   441
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   442
        ldp     w3,  w4,  [x17, #-56]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   443
        stp     w3,  w4,  [x18, #-56]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   444
        ldp     w5,  w6,  [x17, #-48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   445
        stp     w5,  w6,  [x18, #-48]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   446
        ldp     w7,  w8,  [x17, #-40]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   447
        stp     w7,  w8,  [x18, #-40]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   448
        ldp     w9,  w10, [x17, #-32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   449
        stp     w9,  w10, [x18, #-32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   450
        ldp     w11, w12, [x17, #-24]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   451
        stp     w11, w12, [x18, #-24]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   452
        ldp     w13, w14, [x17, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   453
        stp     w13, w14, [x18, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   454
        ldp     w15, w16, [x17, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   455
        stp     w15, w16, [x18, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   456
ci_forward_tail_table_base:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   457
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   458
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   459
.p2align  6
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   460
.rept   12
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   461
        nop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   462
.endr
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   463
ci_forward_large:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   464
        # x18 >= 0;
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   465
        # Copy [x0, x0 + x18 + 64) to [x1, x1 + x18 + 64)
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   466
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   467
        ldp     w3,  w4,  [x0], #32
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   468
        ldp     w5,  w6,  [x0, #-24]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   469
        ldp     w7,  w8,  [x0, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   470
        ldp     w9,  w10, [x0, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   471
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   472
        # Before and after each iteration of loop registers w3-w10 contain [x0 - 32, x0),
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   473
        # and x1 is a place to copy this data;
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   474
        # x18 contains number of bytes to be stored minus 64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   475
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   476
        # Exactly 16 instructions from p2align, so ci_forward_loop starts from cache line boundary
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   477
        # Checking it explictly by aligning with "hlt 1000" instructions 
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   478
.p2alignl  6, 0xd4407d00
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   479
ci_forward_loop:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   480
        prfm    pldl1keep,  [x0, #32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   481
        prfm    pstl1keep,  [x1, #32]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   482
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   483
        subs    x18, x18, #32
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   484
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   485
        stp     w3,  w4,  [x1, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   486
        ldp     w3,  w4,  [x0, #0]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   487
        stp     w5,  w6,  [x1, #8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   488
        ldp     w5,  w6,  [x0, #8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   489
        stp     w7,  w8,  [x1, #16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   490
        ldp     w7,  w8,  [x0, #16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   491
        stp     w9,  w10, [x1, #24]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   492
        ldp     w9,  w10, [x0, #24]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   493
        
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   494
        add     x1,  x1,  #32
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   495
        add     x0,  x0,  #32
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   496
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   497
        b.ge    ci_forward_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   498
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   499
        # 14 instructions from ci_forward_loop, so the loop body hits into one cache line
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   500
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   501
ci_forward_loop_end:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   502
        adds    x2,  x18, #32
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   503
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   504
        stp     w3,  w4,  [x1], #32
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   505
        stp     w5,  w6,  [x1, #-24]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   506
        stp     w7,  w8,  [x1, #-16]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   507
        stp     w9,  w10, [x1, #-8]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   508
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   509
        # Increased x18 by 32, but stored 32 bytes, so x2 contains exact number of bytes to be stored
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   510
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   511
        # If this number is not zero, also copy remaining bytes
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   512
        b.ne    ci_forward_lt_64
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   513
        ret
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   514
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   515
ci_backward:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   516
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   517
        # Overlapping case should be the rare one, it does not worth optimizing
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   518
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   519
        ands    x3,  x2,  #~4
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   520
        # x3 is count aligned down by 2*jintSize
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   521
        add     x0,  x0,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   522
        add     x1,  x1,  x2
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   523
        sub     x3,  x3,  #8
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   524
        # Skip loop if 0 or 1 jints
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   525
        b.eq    ci_backward_loop_end
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   526
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   527
        # x3 >= 0
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   528
        # Copy [x0 - x3 - 8, x0) to [x1 - x3 - 8, x1) backward
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   529
ci_backward_loop:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   530
        subs    x3,  x3,  #8
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   531
        ldp     w4,  w5,  [x0, #-8]!
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   532
        stp     w4,  w5,  [x1, #-8]!
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   533
        b.ge    ci_backward_loop
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   534
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   535
ci_backward_loop_end:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   536
        # Copy remaining 0 or 1 jints
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   537
        tbz     x2,  #2,  ci_backward_finish
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   538
        ldr     w3, [x0, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   539
        str     w3, [x1, #-4]
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   540
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   541
ci_backward_finish:
29142a56c193 8168503: JEP 297: Unified arm32/arm64 Port
bobv
parents:
diff changeset
   542
        ret