7143664: Clean up OrderAccess implementations and usage
Summary: Clarify and correct the abstract model for memory barriers provided by the orderAccess class. Refactor the implementations using template specialization to allow the bulk of the code to be shared, with platform specific customizations applied as needed.
Reviewed-by: acorn, dcubed, dholmes, dlong, goetz, kbarrett, sgehwolf
Contributed-by: Erik Osterlund <erik.osterlund@lnu.se>
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP
#define OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP
static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
(void)memmove(to, from, count * HeapWordSize);
#else
// Includes a zero-count check.
intx temp;
__asm__ volatile(" testl %6,%6 ;"
" jz 7f ;"
" cmpl %4,%5 ;"
" leal -4(%4,%6,4),%3;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 4f ;"
"1: cmpl $32,%6 ;"
" ja 3f ;"
" subl %4,%1 ;"
"2: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 2b ;"
" jmp 7f ;"
"3: rep; smovl ;"
" jmp 7f ;"
"4: cmpl $32,%2 ;"
" movl %7,%0 ;"
" leal -4(%5,%6,4),%1;"
" ja 6f ;"
" subl %4,%1 ;"
"5: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" subl $4,%0 ;"
" subl $1,%2 ;"
" jnz 5b ;"
" jmp 7f ;"
"6: std ;"
" rep; smovl ;"
" cld ;"
"7: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags");
#endif // AMD64
}
static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
(void)memcpy(to, from, count * HeapWordSize);
break;
}
#else
// Includes a zero-count check.
intx temp;
__asm__ volatile(" testl %6,%6 ;"
" jz 3f ;"
" cmpl $32,%6 ;"
" ja 2f ;"
" subl %4,%1 ;"
"1: movl (%4),%3 ;"
" movl %7,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 1b ;"
" jmp 3f ;"
"2: rep; smovl ;"
"3: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "cc");
#endif // AMD64
}
static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
while (count-- > 0) {
*to++ = *from++;
}
break;
}
#else
// pd_disjoint_words is word-atomic in this implementation.
pd_disjoint_words(from, to, count);
#endif // AMD64
}
static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
pd_conjoint_words(from, to, count);
}
static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(void* from, void* to, size_t count) {
#ifdef AMD64
(void)memmove(to, from, count);
#else
// Includes a zero-count check.
intx temp;
__asm__ volatile(" testl %6,%6 ;"
" jz 13f ;"
" cmpl %4,%5 ;"
" leal -1(%4,%6),%3 ;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 8f ;"
"1: cmpl $3,%6 ;"
" jbe 6f ;"
" movl %6,%3 ;"
" movl $4,%2 ;"
" subl %4,%2 ;"
" andl $3,%2 ;"
" jz 2f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"2: movl %7,%2 ;"
" shrl $2,%2 ;"
" jz 5f ;"
" cmpl $32,%2 ;"
" ja 4f ;"
" subl %4,%1 ;"
"3: movl (%4),%%edx ;"
" movl %%edx,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 3b ;"
" addl %4,%1 ;"
" jmp 5f ;"
"4: rep; smovl ;"
"5: movl %7,%2 ;"
" andl $3,%2 ;"
" jz 13f ;"
"6: xorl %7,%3 ;"
"7: movb (%4,%7,1),%%dl ;"
" movb %%dl,(%5,%7,1) ;"
" addl $1,%3 ;"
" subl $1,%2 ;"
" jnz 7b ;"
" jmp 13f ;"
"8: std ;"
" cmpl $12,%2 ;"
" ja 9f ;"
" movl %7,%0 ;"
" leal -1(%6,%5),%1 ;"
" jmp 11f ;"
"9: xchgl %3,%2 ;"
" movl %6,%0 ;"
" addl $1,%2 ;"
" leal -1(%7,%5),%1 ;"
" andl $3,%2 ;"
" jz 10f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"10: movl %7,%2 ;"
" subl $3,%0 ;"
" shrl $2,%2 ;"
" subl $3,%1 ;"
" rep; smovl ;"
" andl $3,%3 ;"
" jz 12f ;"
" movl %7,%2 ;"
" addl $3,%0 ;"
" addl $3,%1 ;"
"11: rep; smovb ;"
"12: cld ;"
"13: nop ;"
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags", "%edx");
#endif // AMD64
}
static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
_Copy_conjoint_jshorts_atomic(from, to, count);
}
static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jints_atomic(from, to, count);
#else
assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jlongs_atomic(from, to, count);
#else
// Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't.
if (from > to) {
while (count-- > 0) {
__asm__ volatile("fildll (%0); fistpll (%1)"
:
: "r" (from), "r" (to)
: "memory" );
++from;
++to;
}
} else {
while (count-- > 0) {
__asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)"
:
: "r" (from), "r" (to), "r" (count)
: "memory" );
}
}
#endif // AMD64
}
static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
#else
assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_bytes(from, to, count);
}
static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jshorts(from, to, count);
}
static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jints(from, to, count);
#else
pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
#endif // AMD64
}
#endif // OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP