src/hotspot/os_cpu/linux_aarch64/copy_linux_aarch64.s
author jwilhelm
Thu, 12 Sep 2019 03:21:11 +0200
changeset 58094 0f6c749acd15
parent 55398 e53ec3b362f4
permissions -rw-r--r--
Added tag jdk-14+14 for changeset cddef3bde924
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
36595
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     1
/*
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     2
 * Copyright (c) 2016, Linaro Ltd. All rights reserved.
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     4
 *
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     7
 * published by the Free Software Foundation.
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     8
 *
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    13
 * accompanied this code).
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    14
 *
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    18
 *
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    21
 * questions.
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    22
 *
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    23
 */
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    24
        .global _Copy_conjoint_words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    25
        .global _Copy_disjoint_words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    26
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    27
s       .req    x0
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    28
d       .req    x1
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    29
count   .req    x2
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    30
t0      .req    x3
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    31
t1      .req    x4
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    32
t2      .req    x5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    33
t3      .req    x6
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    34
t4      .req    x7
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    35
t5      .req    x8
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    36
t6      .req    x9
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    37
t7      .req    x10
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    38
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    39
        .align  6
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    40
_Copy_disjoint_words:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    41
        // Ensure 2 word aligned
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    42
        tbz     s, #3, fwd_copy_aligned
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    43
        ldr     t0, [s], #8
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    44
        str     t0, [d], #8
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    45
        sub     count, count, #1
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    46
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    47
fwd_copy_aligned:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    48
        // Bias s & d so we only pre index on the last copy
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    49
        sub     s, s, #16
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    50
        sub     d, d, #16
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    51
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    52
        ldp     t0, t1, [s, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    53
        ldp     t2, t3, [s, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    54
        ldp     t4, t5, [s, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    55
        ldp     t6, t7, [s, #64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    56
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    57
        subs    count, count, #16
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    58
        blo     fwd_copy_drain
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    59
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    60
fwd_copy_again:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    61
        prfm    pldl1keep, [s, #256]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    62
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    63
        ldp     t0, t1, [s, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    64
        stp     t2, t3, [d, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    65
        ldp     t2, t3, [s, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    66
        stp     t4, t5, [d, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    67
        ldp     t4, t5, [s, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    68
        stp     t6, t7, [d, #64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    69
        ldp     t6, t7, [s, #64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    70
        subs    count, count, #8
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    71
        bhs     fwd_copy_again
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    72
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    73
fwd_copy_drain:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    74
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    75
        stp     t2, t3, [d, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    76
        stp     t4, t5, [d, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    77
        stp     t6, t7, [d, #64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    78
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    79
        // count is now -8..-1 for 0..7 words to copy
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    80
        adr     t0, 0f
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    81
        add     t0, t0, count, lsl #5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    82
        br      t0
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    83
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    84
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    85
        ret                             // -8 == 0 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    86
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    87
        ldr     t0, [s, #16]            // -7 == 1 word
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    88
        str     t0, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    89
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    90
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    91
        ldp     t0, t1, [s, #16]        // -6 = 2 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    92
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    93
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    94
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    95
        ldp     t0, t1, [s, #16]        // -5 = 3 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    96
        ldr     t2, [s, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    97
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    98
        str     t2, [d, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
    99
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   100
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   101
        ldp     t0, t1, [s, #16]        // -4 = 4 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   102
        ldp     t2, t3, [s, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   103
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   104
        stp     t2, t3, [d, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   105
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   106
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   107
        ldp     t0, t1, [s, #16]        // -3 = 5 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   108
        ldp     t2, t3, [s, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   109
        ldr     t4, [s, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   110
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   111
        stp     t2, t3, [d, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   112
        str     t4, [d, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   113
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   114
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   115
        ldp     t0, t1, [s, #16]        // -2 = 6 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   116
        ldp     t2, t3, [s, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   117
        ldp     t4, t5, [s, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   118
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   119
        stp     t2, t3, [d, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   120
        stp     t4, t5, [d, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   121
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   122
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   123
        ldp     t0, t1, [s, #16]        // -1 = 7 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   124
        ldp     t2, t3, [s, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   125
        ldp     t4, t5, [s, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   126
        ldr     t6, [s, #64]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   127
        stp     t0, t1, [d, #16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   128
        stp     t2, t3, [d, #32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   129
        stp     t4, t5, [d, #48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   130
        str     t6, [d, #64]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   131
        // Is always aligned here, code for 7 words is one instruction
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   132
        // too large so it just falls through.
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   133
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   134
0:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   135
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   136
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   137
        .align  6
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   138
_Copy_conjoint_words:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   139
        sub     t0, d, s
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   140
        cmp     t0, count, lsl #3
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   141
        bhs     _Copy_disjoint_words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   142
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   143
        add     s, s, count, lsl #3
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   144
        add     d, d, count, lsl #3
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   145
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   146
        // Ensure 2 word aligned
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   147
        tbz     s, #3, bwd_copy_aligned
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   148
        ldr     t0, [s, #-8]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   149
        str     t0, [d, #-8]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   150
        sub     count, count, #1
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   151
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   152
bwd_copy_aligned:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   153
        ldp     t0, t1, [s, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   154
        ldp     t2, t3, [s, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   155
        ldp     t4, t5, [s, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   156
        ldp     t6, t7, [s, #-64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   157
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   158
        subs    count, count, #16
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   159
        blo     bwd_copy_drain
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   160
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   161
bwd_copy_again:
55398
e53ec3b362f4 8224851: AArch64: fix warnings and errors with Clang and GCC 8.3
ngasson
parents: 47216
diff changeset
   162
        prfum   pldl1keep, [s, #-256]
36595
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   163
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   164
        ldp     t0, t1, [s, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   165
        stp     t2, t3, [d, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   166
        ldp     t2, t3, [s, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   167
        stp     t4, t5, [d, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   168
        ldp     t4, t5, [s, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   169
        stp     t6, t7, [d, #-64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   170
        ldp     t6, t7, [s, #-64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   171
        subs    count, count, #8
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   172
        bhs     bwd_copy_again
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   173
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   174
bwd_copy_drain:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   175
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   176
        stp     t2, t3, [d, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   177
        stp     t4, t5, [d, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   178
        stp     t6, t7, [d, #-64]!
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   179
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   180
        // count is now -8..-1 for 0..7 words to copy
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   181
        adr     t0, 0f
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   182
        add     t0, t0, count, lsl #5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   183
        br      t0
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   184
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   185
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   186
        ret                             // -8 == 0 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   187
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   188
        ldr     t0, [s, #-8]            // -7 == 1 word
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   189
        str     t0, [d, #-8]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   190
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   191
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   192
        ldp     t0, t1, [s, #-16]       // -6 = 2 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   193
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   194
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   195
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   196
        ldp     t0, t1, [s, #-16]       // -5 = 3 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   197
        ldr     t2, [s, #-24]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   198
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   199
        str     t2, [d, #-24]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   200
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   201
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   202
        ldp     t0, t1, [s, #-16]       // -4 = 4 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   203
        ldp     t2, t3, [s, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   204
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   205
        stp     t2, t3, [d, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   206
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   207
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   208
        ldp     t0, t1, [s, #-16]       // -3 = 5 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   209
        ldp     t2, t3, [s, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   210
        ldr     t4, [s, #-40]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   211
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   212
        stp     t2, t3, [d, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   213
        str     t4, [d, #-40]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   214
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   215
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   216
        ldp     t0, t1, [s, #-16]       // -2 = 6 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   217
        ldp     t2, t3, [s, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   218
        ldp     t4, t5, [s, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   219
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   220
        stp     t2, t3, [d, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   221
        stp     t4, t5, [d, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   222
        ret
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   223
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   224
        ldp     t0, t1, [s, #-16]       // -1 = 7 words
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   225
        ldp     t2, t3, [s, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   226
        ldp     t4, t5, [s, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   227
        ldr     t6, [s, #-56]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   228
        stp     t0, t1, [d, #-16]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   229
        stp     t2, t3, [d, #-32]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   230
        stp     t4, t5, [d, #-48]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   231
        str     t6, [d, #-56]
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   232
        // Is always aligned here, code for 7 words is one instruction
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   233
        // too large so it just falls through.
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   234
        .align  5
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   235
0:
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents:
diff changeset
   236
        ret