--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_32.s Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,676 @@
+//
+// Copyright 2004-2007 Sun Microsystems, Inc. All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+
+ .globl fixcw
+ .globl sse_check
+ .globl sse_unavailable
+ .globl gs_load
+ .globl gs_thread
+ .globl _Atomic_cmpxchg_long_gcc
+
+ // NOTE WELL! The _Copy functions are called directly
+ // from server-compiler-generated code via CallLeafNoFP,
+ // which means that they *must* either not use floating
+ // point or use it in the same manner as does the server
+ // compiler.
+
+ .globl _Copy_conjoint_bytes
+ .globl _Copy_arrayof_conjoint_bytes
+ .globl _Copy_conjoint_jshorts_atomic
+ .globl _Copy_arrayof_conjoint_jshorts
+ .globl _Copy_conjoint_jints_atomic
+ .globl _Copy_arrayof_conjoint_jints
+ .globl _Copy_conjoint_jlongs_atomic
+ .globl _mmx_Copy_arrayof_conjoint_jshorts
+
+ .section .text,"ax"
+
+/ Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
+/ Set fpu to 53 bit precision. This happens too early to use a stub.
+ .align 16
+fixcw:
+ pushl $0x27f
+ fldcw 0(%esp)
+ popl %eax
+ ret
+
+ .align 16
+ .globl SafeFetch32
+ .globl SafeFetchN
+ .globl Fetch32PFI, Fetch32Resume
+SafeFetch32:
+SafeFetchN:
+ movl 0x8(%esp), %eax
+ movl 0x4(%esp), %ecx
+Fetch32PFI:
+ movl (%ecx), %eax
+Fetch32Resume:
+ ret
+
+
+ .align 16
+ .globl SpinPause
+SpinPause:
+ rep
+ nop
+ movl $1, %eax
+ ret
+
+
+/ Test SSE availability, used by os_solaris_i486.cpp
+ .align 16
+sse_check:
+ / Fault if SSE not available
+ xorps %xmm0,%xmm0
+ / No fault
+ movl $1,%eax
+ ret
+ / Signal handler continues here if SSE is not available
+sse_unavailable:
+ xorl %eax,%eax
+ ret
+
+/ Fast thread accessors, used by threadLS_solaris_i486.cpp
+ .align 16
+gs_load:
+ movl 4(%esp),%ecx
+ movl %gs:(%ecx),%eax
+ ret
+
+ .align 16
+gs_thread:
+ movl %gs:0x0,%eax
+ ret
+
+ / Support for void Copy::conjoint_bytes(void* from,
+ / void* to,
+ / size_t count)
+ .align 16
+_Copy_conjoint_bytes:
+ pushl %esi
+ movl 4+12(%esp),%ecx / count
+ pushl %edi
+ movl 8+ 4(%esp),%esi / from
+ movl 8+ 8(%esp),%edi / to
+ cmpl %esi,%edi
+ leal -1(%esi,%ecx),%eax / from + count - 1
+ jbe cb_CopyRight
+ cmpl %eax,%edi
+ jbe cb_CopyLeft
+ / copy from low to high
+cb_CopyRight:
+ cmpl $3,%ecx
+ jbe 5f / <= 3 bytes
+ / align source address at dword address boundary
+ movl %ecx,%eax / original count
+ movl $4,%ecx
+ subl %esi,%ecx
+ andl $3,%ecx / prefix byte count
+ jz 1f / no prefix
+ subl %ecx,%eax / byte count less prefix
+ / copy prefix
+ subl %esi,%edi
+0: movb (%esi),%dl
+ movb %dl,(%edi,%esi,1)
+ addl $1,%esi
+ subl $1,%ecx
+ jnz 0b
+ addl %esi,%edi
+1: movl %eax,%ecx / byte count less prefix
+ shrl $2,%ecx / dword count
+ jz 4f / no dwords to move
+ cmpl $32,%ecx
+ jbe 2f / <= 32 dwords
+ / copy aligned dwords
+ rep; smovl
+ jmp 4f
+ / copy aligned dwords
+2: subl %esi,%edi
+ .align 16
+3: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ addl $4,%esi
+ subl $1,%ecx
+ jnz 3b
+ addl %esi,%edi
+4: movl %eax,%ecx / byte count less prefix
+ andl $3,%ecx / suffix byte count
+ jz 7f / no suffix
+ / copy suffix
+5: xorl %eax,%eax
+6: movb (%esi,%eax,1),%dl
+ movb %dl,(%edi,%eax,1)
+ addl $1,%eax
+ subl $1,%ecx
+ jnz 6b
+7: popl %edi
+ popl %esi
+ ret
+ / copy from high to low
+cb_CopyLeft:
+ std
+ leal -4(%edi,%ecx),%edi / to + count - 4
+ movl %eax,%esi / from + count - 1
+ movl %ecx,%eax
+ subl $3,%esi / from + count - 4
+ cmpl $3,%ecx
+ jbe 5f / <= 3 bytes
+1: shrl $2,%ecx / dword count
+ jz 4f / no dwords to move
+ cmpl $32,%ecx
+ ja 3f / > 32 dwords
+ / copy dwords, aligned or not
+ subl %esi,%edi
+ .align 16
+2: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ subl $4,%esi
+ subl $1,%ecx
+ jnz 2b
+ addl %esi,%edi
+ jmp 4f
+ / copy dwords, aligned or not
+3: rep; smovl
+4: movl %eax,%ecx / byte count
+ andl $3,%ecx / suffix byte count
+ jz 7f / no suffix
+ / copy suffix
+5: subl %esi,%edi
+ addl $3,%esi
+6: movb (%esi),%dl
+ movb %dl,(%edi,%esi,1)
+ subl $1,%esi
+ subl $1,%ecx
+ jnz 6b
+7: cld
+ popl %edi
+ popl %esi
+ ret
+
+ / Support for void Copy::arrayof_conjoint_bytes(void* from,
+ / void* to,
+ / size_t count)
+ /
+ / Same as _Copy_conjoint_bytes, except no source alignment check.
+ .align 16
+_Copy_arrayof_conjoint_bytes:
+ pushl %esi
+ movl 4+12(%esp),%ecx / count
+ pushl %edi
+ movl 8+ 4(%esp),%esi / from
+ movl 8+ 8(%esp),%edi / to
+ cmpl %esi,%edi
+ leal -1(%esi,%ecx),%eax / from + count - 1
+ jbe acb_CopyRight
+ cmpl %eax,%edi
+ jbe acb_CopyLeft
+ / copy from low to high
+acb_CopyRight:
+ cmpl $3,%ecx
+ jbe 5f
+1: movl %ecx,%eax
+ shrl $2,%ecx
+ jz 4f
+ cmpl $32,%ecx
+ ja 3f
+ / copy aligned dwords
+ subl %esi,%edi
+ .align 16
+2: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ addl $4,%esi
+ subl $1,%ecx
+ jnz 2b
+ addl %esi,%edi
+ jmp 4f
+ / copy aligned dwords
+3: rep; smovl
+4: movl %eax,%ecx
+ andl $3,%ecx
+ jz 7f
+ / copy suffix
+5: xorl %eax,%eax
+6: movb (%esi,%eax,1),%dl
+ movb %dl,(%edi,%eax,1)
+ addl $1,%eax
+ subl $1,%ecx
+ jnz 6b
+7: popl %edi
+ popl %esi
+ ret
+acb_CopyLeft:
+ std
+ leal -4(%edi,%ecx),%edi / to + count - 4
+ movl %eax,%esi / from + count - 1
+ movl %ecx,%eax
+ subl $3,%esi / from + count - 4
+ cmpl $3,%ecx
+ jbe 5f
+1: shrl $2,%ecx
+ jz 4f
+ cmpl $32,%ecx
+ jbe 2f / <= 32 dwords
+ rep; smovl
+ jmp 4f
+ .=.+8
+2: subl %esi,%edi
+ .align 16
+3: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ subl $4,%esi
+ subl $1,%ecx
+ jnz 3b
+ addl %esi,%edi
+4: movl %eax,%ecx
+ andl $3,%ecx
+ jz 7f
+5: subl %esi,%edi
+ addl $3,%esi
+6: movb (%esi),%dl
+ movb %dl,(%edi,%esi,1)
+ subl $1,%esi
+ subl $1,%ecx
+ jnz 6b
+7: cld
+ popl %edi
+ popl %esi
+ ret
+
+ / Support for void Copy::conjoint_jshorts_atomic(void* from,
+ / void* to,
+ / size_t count)
+ .align 16
+_Copy_conjoint_jshorts_atomic:
+ pushl %esi
+ movl 4+12(%esp),%ecx / count
+ pushl %edi
+ movl 8+ 4(%esp),%esi / from
+ movl 8+ 8(%esp),%edi / to
+ cmpl %esi,%edi
+ leal -2(%esi,%ecx,2),%eax / from + count*2 - 2
+ jbe cs_CopyRight
+ cmpl %eax,%edi
+ jbe cs_CopyLeft
+ / copy from low to high
+cs_CopyRight:
+ / align source address at dword address boundary
+ movl %esi,%eax / original from
+ andl $3,%eax / either 0 or 2
+ jz 1f / no prefix
+ / copy prefix
+ movw (%esi),%dx
+ movw %dx,(%edi)
+ addl %eax,%esi / %eax == 2
+ addl %eax,%edi
+ subl $1,%ecx
+1: movl %ecx,%eax / word count less prefix
+ sarl %ecx / dword count
+ jz 4f / no dwords to move
+ cmpl $32,%ecx
+ jbe 2f / <= 32 dwords
+ / copy aligned dwords
+ rep; smovl
+ jmp 4f
+ / copy aligned dwords
+2: subl %esi,%edi
+ .align 16
+3: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ addl $4,%esi
+ subl $1,%ecx
+ jnz 3b
+ addl %esi,%edi
+4: andl $1,%eax / suffix count
+ jz 5f / no suffix
+ / copy suffix
+ movw (%esi),%dx
+ movw %dx,(%edi)
+5: popl %edi
+ popl %esi
+ ret
+ / copy from high to low
+cs_CopyLeft:
+ std
+ leal -4(%edi,%ecx,2),%edi / to + count*2 - 4
+ movl %eax,%esi / from + count*2 - 2
+ movl %ecx,%eax
+ subl $2,%esi / from + count*2 - 4
+1: sarl %ecx / dword count
+ jz 4f / no dwords to move
+ cmpl $32,%ecx
+ ja 3f / > 32 dwords
+ subl %esi,%edi
+ .align 16
+2: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ subl $4,%esi
+ subl $1,%ecx
+ jnz 2b
+ addl %esi,%edi
+ jmp 4f
+3: rep; smovl
+4: andl $1,%eax / suffix count
+ jz 5f / no suffix
+ / copy suffix
+ addl $2,%esi
+ addl $2,%edi
+ movw (%esi),%dx
+ movw %dx,(%edi)
+5: cld
+ popl %edi
+ popl %esi
+ ret
+
+ / Support for void Copy::arrayof_conjoint_jshorts(void* from,
+ / void* to,
+ / size_t count)
+ .align 16
+_Copy_arrayof_conjoint_jshorts:
+ pushl %esi
+ movl 4+12(%esp),%ecx / count
+ pushl %edi
+ movl 8+ 4(%esp),%esi / from
+ movl 8+ 8(%esp),%edi / to
+ cmpl %esi,%edi
+ leal -2(%esi,%ecx,2),%eax / from + count*2 - 2
+ jbe acs_CopyRight
+ cmpl %eax,%edi
+ jbe acs_CopyLeft
+acs_CopyRight:
+ movl %ecx,%eax / word count
+ sarl %ecx / dword count
+ jz 4f / no dwords to move
+ cmpl $32,%ecx
+ jbe 2f / <= 32 dwords
+ / copy aligned dwords
+ rep; smovl
+ jmp 4f
+ / copy aligned dwords
+ .=.+5
+2: subl %esi,%edi
+ .align 16
+3: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ addl $4,%esi
+ subl $1,%ecx
+ jnz 3b
+ addl %esi,%edi
+4: andl $1,%eax / suffix count
+ jz 5f / no suffix
+ / copy suffix
+ movw (%esi),%dx
+ movw %dx,(%edi)
+5: popl %edi
+ popl %esi
+ ret
+acs_CopyLeft:
+ std
+ leal -4(%edi,%ecx,2),%edi / to + count*2 - 4
+ movl %eax,%esi / from + count*2 - 2
+ movl %ecx,%eax
+ subl $2,%esi / from + count*2 - 4
+ sarl %ecx / dword count
+ jz 4f / no dwords to move
+ cmpl $32,%ecx
+ ja 3f / > 32 dwords
+ subl %esi,%edi
+ .align 16
+2: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ subl $4,%esi
+ subl $1,%ecx
+ jnz 2b
+ addl %esi,%edi
+ jmp 4f
+3: rep; smovl
+4: andl $1,%eax / suffix count
+ jz 5f / no suffix
+ / copy suffix
+ addl $2,%esi
+ addl $2,%edi
+ movw (%esi),%dx
+ movw %dx,(%edi)
+5: cld
+ popl %edi
+ popl %esi
+ ret
+
+ / Support for void Copy::conjoint_jints_atomic(void* from,
+ / void* to,
+ / size_t count)
+ / Equivalent to
+ / arrayof_conjoint_jints
+ .align 16
+_Copy_conjoint_jints_atomic:
+_Copy_arrayof_conjoint_jints:
+ pushl %esi
+ movl 4+12(%esp),%ecx / count
+ pushl %edi
+ movl 8+ 4(%esp),%esi / from
+ movl 8+ 8(%esp),%edi / to
+ cmpl %esi,%edi
+ leal -4(%esi,%ecx,4),%eax / from + count*4 - 4
+ jbe ci_CopyRight
+ cmpl %eax,%edi
+ jbe ci_CopyLeft
+ci_CopyRight:
+ cmpl $32,%ecx
+ jbe 2f / <= 32 dwords
+ rep; smovl
+ popl %edi
+ popl %esi
+ ret
+ .=.+10
+2: subl %esi,%edi
+ .align 16
+3: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ addl $4,%esi
+ subl $1,%ecx
+ jnz 3b
+ popl %edi
+ popl %esi
+ ret
+ci_CopyLeft:
+ std
+ leal -4(%edi,%ecx,4),%edi / to + count*4 - 4
+ cmpl $32,%ecx
+ ja 3f / > 32 dwords
+ subl %eax,%edi / eax == from + count*4 - 4
+ .align 16
+2: movl (%eax),%edx
+ movl %edx,(%edi,%eax,1)
+ subl $4,%eax
+ subl $1,%ecx
+ jnz 2b
+ cld
+ popl %edi
+ popl %esi
+ ret
+3: movl %eax,%esi / from + count*4 - 4
+ rep; smovl
+ cld
+ popl %edi
+ popl %esi
+ ret
+
+ / Support for void Copy::conjoint_jlongs_atomic(jlong* from,
+ / jlong* to,
+ / size_t count)
+ /
+ / 32-bit
+ /
+ / count treated as signed
+ /
+ / if (from > to) {
+ / while (--count >= 0) {
+ / *to++ = *from++;
+ / }
+ / } else {
+ / while (--count >= 0) {
+ / to[count] = from[count];
+ / }
+ / }
+ .align 16
+_Copy_conjoint_jlongs_atomic:
+ movl 4+8(%esp),%ecx / count
+ movl 4+0(%esp),%eax / from
+ movl 4+4(%esp),%edx / to
+ cmpl %eax,%edx
+ jae cla_CopyLeft
+cla_CopyRight:
+ subl %eax,%edx
+ jmp 2f
+ .align 16
+1: fildll (%eax)
+ fistpll (%edx,%eax,1)
+ addl $8,%eax
+2: subl $1,%ecx
+ jge 1b
+ ret
+ .align 16
+3: fildll (%eax,%ecx,8)
+ fistpll (%edx,%ecx,8)
+cla_CopyLeft:
+ subl $1,%ecx
+ jge 3b
+ ret
+
+ / Support for void Copy::arrayof_conjoint_jshorts(void* from,
+ / void* to,
+ / size_t count)
+ .align 16
+_mmx_Copy_arrayof_conjoint_jshorts:
+ pushl %esi
+ movl 4+12(%esp),%ecx
+ pushl %edi
+ movl 8+ 4(%esp),%esi
+ movl 8+ 8(%esp),%edi
+ cmpl %esi,%edi
+ leal -2(%esi,%ecx,2),%eax
+ jbe mmx_acs_CopyRight
+ cmpl %eax,%edi
+ jbe mmx_acs_CopyLeft
+mmx_acs_CopyRight:
+ movl %ecx,%eax
+ sarl %ecx
+ je 5f
+ cmpl $33,%ecx
+ jae 3f
+1: subl %esi,%edi
+ .align 16
+2: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ addl $4,%esi
+ subl $1,%ecx
+ jnz 2b
+ addl %esi,%edi
+ jmp 5f
+3: smovl / align to 8 bytes, we know we are 4 byte aligned to start
+ subl $1,%ecx
+4: .align 16
+ movq 0(%esi),%mm0
+ addl $64,%edi
+ movq 8(%esi),%mm1
+ subl $16,%ecx
+ movq 16(%esi),%mm2
+ movq %mm0,-64(%edi)
+ movq 24(%esi),%mm0
+ movq %mm1,-56(%edi)
+ movq 32(%esi),%mm1
+ movq %mm2,-48(%edi)
+ movq 40(%esi),%mm2
+ movq %mm0,-40(%edi)
+ movq 48(%esi),%mm0
+ movq %mm1,-32(%edi)
+ movq 56(%esi),%mm1
+ movq %mm2,-24(%edi)
+ movq %mm0,-16(%edi)
+ addl $64,%esi
+ movq %mm1,-8(%edi)
+ cmpl $16,%ecx
+ jge 4b
+ emms
+ testl %ecx,%ecx
+ ja 1b
+5: andl $1,%eax
+ je 7f
+6: movw (%esi),%dx
+ movw %dx,(%edi)
+7: popl %edi
+ popl %esi
+ ret
+mmx_acs_CopyLeft:
+ std
+ leal -4(%edi,%ecx,2),%edi
+ movl %eax,%esi
+ movl %ecx,%eax
+ subl $2,%esi
+ sarl %ecx
+ je 4f
+ cmpl $32,%ecx
+ ja 3f
+ subl %esi,%edi
+ .align 16
+2: movl (%esi),%edx
+ movl %edx,(%edi,%esi,1)
+ subl $4,%esi
+ subl $1,%ecx
+ jnz 2b
+ addl %esi,%edi
+ jmp 4f
+3: rep; smovl
+4: andl $1,%eax
+ je 6f
+ addl $2,%esi
+ addl $2,%edi
+5: movw (%esi),%dx
+ movw %dx,(%edi)
+6: cld
+ popl %edi
+ popl %esi
+ ret
+
+
+ / Support for jlong Atomic::cmpxchg(jlong exchange_value,
+ / volatile jlong* dest,
+ / jlong compare_value,
+ / bool is_MP)
+ / Used only for Solaris/gcc builds
+ .align 16
+_Atomic_cmpxchg_long_gcc:
+ / 8(%esp) : return PC
+ pushl %ebx / 4(%esp) : old %ebx
+ pushl %edi / 0(%esp) : old %edi
+ movl 12(%esp), %ebx / 12(%esp) : exchange_value (low)
+ movl 16(%esp), %ecx / 16(%esp) : exchange_value (high)
+ movl 24(%esp), %eax / 24(%esp) : compare_value (low)
+ movl 28(%esp), %edx / 28(%esp) : compare_value (high)
+ movl 20(%esp), %edi / 20(%esp) : dest
+ cmpl $0, 32(%esp) / 32(%esp) : is_MP
+ je 1f
+ lock
+1: cmpxchg8b (%edi)
+ popl %edi
+ popl %ebx
+ ret