hotspot/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.s
changeset 36595 3322a76f3a00
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.s	Thu Mar 10 14:53:09 2016 +0000
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2016, Linaro Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+        .global _Copy_conjoint_words
+        .global _Copy_disjoint_words
+
+s       .req    x0
+d       .req    x1
+count   .req    x2
+t0      .req    x3
+t1      .req    x4
+t2      .req    x5
+t3      .req    x6
+t4      .req    x7
+t5      .req    x8
+t6      .req    x9
+t7      .req    x10
+
+        .align  6
+_Copy_disjoint_words:
+        // Ensure 2 word aligned
+        tbz     s, #3, fwd_copy_aligned
+        ldr     t0, [s], #8
+        str     t0, [d], #8
+        sub     count, count, #1
+
+fwd_copy_aligned:
+        // Bias s & d so we only pre index on the last copy
+        sub     s, s, #16
+        sub     d, d, #16
+
+        ldp     t0, t1, [s, #16]
+        ldp     t2, t3, [s, #32]
+        ldp     t4, t5, [s, #48]
+        ldp     t6, t7, [s, #64]!
+
+        subs    count, count, #16
+        blo     fwd_copy_drain
+
+fwd_copy_again:
+        prfm    pldl1keep, [s, #256]
+        stp     t0, t1, [d, #16]
+        ldp     t0, t1, [s, #16]
+        stp     t2, t3, [d, #32]
+        ldp     t2, t3, [s, #32]
+        stp     t4, t5, [d, #48]
+        ldp     t4, t5, [s, #48]
+        stp     t6, t7, [d, #64]!
+        ldp     t6, t7, [s, #64]!
+        subs    count, count, #8
+        bhs     fwd_copy_again
+
+fwd_copy_drain:
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        stp     t4, t5, [d, #48]
+        stp     t6, t7, [d, #64]!
+
+        // count is now -8..-1 for 0..7 words to copy
+        adr     t0, 0f
+        add     t0, t0, count, lsl #5
+        br      t0
+
+        .align  5
+        ret                             // -8 == 0 words
+        .align  5
+        ldr     t0, [s, #16]            // -7 == 1 word
+        str     t0, [d, #16]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -6 = 2 words
+        stp     t0, t1, [d, #16]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -5 = 3 words
+        ldr     t2, [s, #32]
+        stp     t0, t1, [d, #16]
+        str     t2, [d, #32]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -4 = 4 words
+        ldp     t2, t3, [s, #32]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -3 = 5 words
+        ldp     t2, t3, [s, #32]
+        ldr     t4, [s, #48]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        str     t4, [d, #48]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -2 = 6 words
+        ldp     t2, t3, [s, #32]
+        ldp     t4, t5, [s, #48]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        stp     t4, t5, [d, #48]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -1 = 7 words
+        ldp     t2, t3, [s, #32]
+        ldp     t4, t5, [s, #48]
+        ldr     t6, [s, #64]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        stp     t4, t5, [d, #48]
+        str     t6, [d, #64]
+        // Is always aligned here, code for 7 words is one instruction
+        // too large so it just falls through.
+        .align  5
+0:
+        ret
+
+        .align  6
+_Copy_conjoint_words:
+        sub     t0, d, s
+        cmp     t0, count, lsl #3
+        bhs     _Copy_disjoint_words
+
+        add     s, s, count, lsl #3
+        add     d, d, count, lsl #3
+
+        // Ensure 2 word aligned
+        tbz     s, #3, bwd_copy_aligned
+        ldr     t0, [s, #-8]!
+        str     t0, [d, #-8]!
+        sub     count, count, #1
+
+bwd_copy_aligned:
+        ldp     t0, t1, [s, #-16]
+        ldp     t2, t3, [s, #-32]
+        ldp     t4, t5, [s, #-48]
+        ldp     t6, t7, [s, #-64]!
+
+        subs    count, count, #16
+        blo     bwd_copy_drain
+
+bwd_copy_again:
+        prfm    pldl1keep, [s, #-256]
+        stp     t0, t1, [d, #-16]
+        ldp     t0, t1, [s, #-16]
+        stp     t2, t3, [d, #-32]
+        ldp     t2, t3, [s, #-32]
+        stp     t4, t5, [d, #-48]
+        ldp     t4, t5, [s, #-48]
+        stp     t6, t7, [d, #-64]!
+        ldp     t6, t7, [s, #-64]!
+        subs    count, count, #8
+        bhs     bwd_copy_again
+
+bwd_copy_drain:
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        stp     t4, t5, [d, #-48]
+        stp     t6, t7, [d, #-64]!
+
+        // count is now -8..-1 for 0..7 words to copy
+        adr     t0, 0f
+        add     t0, t0, count, lsl #5
+        br      t0
+
+        .align  5
+        ret                             // -8 == 0 words
+        .align  5
+        ldr     t0, [s, #-8]            // -7 == 1 word
+        str     t0, [d, #-8]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -6 = 2 words
+        stp     t0, t1, [d, #-16]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -5 = 3 words
+        ldr     t2, [s, #-24]
+        stp     t0, t1, [d, #-16]
+        str     t2, [d, #-24]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -4 = 4 words
+        ldp     t2, t3, [s, #-32]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -3 = 5 words
+        ldp     t2, t3, [s, #-32]
+        ldr     t4, [s, #-40]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        str     t4, [d, #-40]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -2 = 6 words
+        ldp     t2, t3, [s, #-32]
+        ldp     t4, t5, [s, #-48]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        stp     t4, t5, [d, #-48]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -1 = 7 words
+        ldp     t2, t3, [s, #-32]
+        ldp     t4, t5, [s, #-48]
+        ldr     t6, [s, #-56]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        stp     t4, t5, [d, #-48]
+        str     t6, [d, #-56]
+        // Is always aligned here, code for 7 words is one instruction
+        // too large so it just falls through.
+        .align  5
+0:
+        ret