33089
|
1 |
/*
|
|
2 |
* Copyright (c) 2015, Intel Corporation.
|
|
3 |
* Intel Math Library (LIBM) Source Code
|
|
4 |
*
|
|
5 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
6 |
*
|
|
7 |
* This code is free software; you can redistribute it and/or modify it
|
|
8 |
* under the terms of the GNU General Public License version 2 only, as
|
|
9 |
* published by the Free Software Foundation.
|
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
|
24 |
*
|
|
25 |
*/
|
|
26 |
|
|
27 |
/******************************************************************************/
|
|
28 |
// ALGORITHM DESCRIPTION
|
|
29 |
// ---------------------
|
|
30 |
//
|
|
31 |
// Description:
|
|
32 |
// Let K = 64 (table size).
|
|
33 |
// x x/log(2) n
|
|
34 |
// e = 2 = 2 * T[j] * (1 + P(y))
|
|
35 |
// where
|
|
36 |
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
|
|
37 |
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
|
|
38 |
// j/K
|
|
39 |
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
|
|
40 |
//
|
|
41 |
// P(y) is a minimax polynomial approximation of exp(x)-1
|
|
42 |
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
|
|
43 |
//
|
|
44 |
// To avoid problems with arithmetic overflow and underflow,
|
|
45 |
// n n1 n2
|
|
46 |
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
|
|
47 |
// where BIAS is a value of exponent bias.
|
|
48 |
//
|
|
49 |
// Special cases:
|
|
50 |
// exp(NaN) = NaN
|
|
51 |
// exp(+INF) = +INF
|
|
52 |
// exp(-INF) = 0
|
|
53 |
// exp(x) = 1 for subnormals
|
|
54 |
// for finite argument, only exp(0)=1 is exact
|
|
55 |
// For IEEE double
|
|
56 |
// if x > 709.782712893383973096 then exp(x) overflow
|
|
57 |
// if x < -745.133219101941108420 then exp(x) underflow
|
|
58 |
//
|
|
59 |
/******************************************************************************/
|
|
60 |
|
|
61 |
|
|
62 |
#include "precompiled.hpp"
|
|
63 |
#include "asm/assembler.hpp"
|
|
64 |
#include "asm/assembler.inline.hpp"
|
|
65 |
#include "macroAssembler_x86.hpp"
|
|
66 |
|
|
67 |
#ifdef _MSC_VER
|
|
68 |
#define ALIGNED_(x) __declspec(align(x))
|
|
69 |
#else
|
|
70 |
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
|
71 |
#endif
|
|
72 |
|
|
73 |
#ifdef _LP64
|
|
74 |
|
|
75 |
ALIGNED_(16) juint _cv[] =
|
|
76 |
{
|
|
77 |
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
|
|
78 |
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
|
|
79 |
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
|
|
80 |
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
|
|
81 |
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
|
82 |
};
|
|
83 |
|
|
84 |
ALIGNED_(16) juint _shifter[] =
|
|
85 |
{
|
|
86 |
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
|
87 |
};
|
|
88 |
|
|
89 |
ALIGNED_(16) juint _mmask[] =
|
|
90 |
{
|
|
91 |
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
|
92 |
};
|
|
93 |
|
|
94 |
ALIGNED_(16) juint _bias[] =
|
|
95 |
{
|
|
96 |
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
|
97 |
};
|
|
98 |
|
|
99 |
ALIGNED_(16) juint _Tbl_addr[] =
|
|
100 |
{
|
|
101 |
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
|
102 |
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
|
103 |
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
|
104 |
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
|
105 |
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
|
106 |
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
|
107 |
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
|
108 |
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
|
109 |
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
|
110 |
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
|
111 |
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
|
112 |
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
|
113 |
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
|
114 |
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
|
115 |
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
|
116 |
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
|
117 |
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
|
118 |
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
|
119 |
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
|
120 |
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
|
121 |
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
|
122 |
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
|
123 |
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
|
124 |
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
|
125 |
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
|
126 |
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
|
127 |
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
|
128 |
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
|
129 |
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
|
130 |
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
|
131 |
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
|
132 |
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
|
133 |
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
|
134 |
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
|
135 |
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
|
136 |
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
|
137 |
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
|
138 |
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
|
139 |
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
|
140 |
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
|
141 |
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
|
142 |
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
|
143 |
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
|
144 |
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
|
145 |
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
|
146 |
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
|
147 |
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
|
148 |
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
|
149 |
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
|
150 |
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
|
151 |
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
|
152 |
0x000fa7c1UL
|
|
153 |
};
|
|
154 |
|
|
155 |
ALIGNED_(16) juint _ALLONES[] =
|
|
156 |
{
|
|
157 |
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
|
158 |
};
|
|
159 |
|
|
160 |
ALIGNED_(16) juint _ebias[] =
|
|
161 |
{
|
|
162 |
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
|
163 |
};
|
|
164 |
|
|
165 |
ALIGNED_(4) juint _XMAX[] =
|
|
166 |
{
|
|
167 |
0xffffffffUL, 0x7fefffffUL
|
|
168 |
};
|
|
169 |
|
|
170 |
ALIGNED_(4) juint _XMIN[] =
|
|
171 |
{
|
|
172 |
0x00000000UL, 0x00100000UL
|
|
173 |
};
|
|
174 |
|
|
175 |
ALIGNED_(4) juint _INF[] =
|
|
176 |
{
|
|
177 |
0x00000000UL, 0x7ff00000UL
|
|
178 |
};
|
|
179 |
|
|
180 |
ALIGNED_(4) juint _ZERO[] =
|
|
181 |
{
|
|
182 |
0x00000000UL, 0x00000000UL
|
|
183 |
};
|
|
184 |
|
|
185 |
ALIGNED_(4) juint _ONE_val[] =
|
|
186 |
{
|
|
187 |
0x00000000UL, 0x3ff00000UL
|
|
188 |
};
|
|
189 |
|
|
190 |
|
|
191 |
// Registers:
|
|
192 |
// input: xmm0
|
|
193 |
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
|
194 |
// rax, rdx, rcx, tmp - r11
|
|
195 |
|
|
196 |
// Code generated by Intel C compiler for LIBM library
|
|
197 |
|
|
198 |
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
|
199 |
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
|
200 |
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
|
201 |
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
|
202 |
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
|
|
203 |
|
|
204 |
assert_different_registers(tmp, eax, ecx, edx);
|
|
205 |
jmp(start);
|
|
206 |
address cv = (address)_cv;
|
|
207 |
address Shifter = (address)_shifter;
|
|
208 |
address mmask = (address)_mmask;
|
|
209 |
address bias = (address)_bias;
|
|
210 |
address Tbl_addr = (address)_Tbl_addr;
|
|
211 |
address ALLONES = (address)_ALLONES;
|
|
212 |
address ebias = (address)_ebias;
|
|
213 |
address XMAX = (address)_XMAX;
|
|
214 |
address XMIN = (address)_XMIN;
|
|
215 |
address INF = (address)_INF;
|
|
216 |
address ZERO = (address)_ZERO;
|
|
217 |
address ONE_val = (address)_ONE_val;
|
|
218 |
|
|
219 |
bind(start);
|
|
220 |
subq(rsp, 24);
|
|
221 |
movsd(Address(rsp, 8), xmm0);
|
|
222 |
unpcklpd(xmm0, xmm0);
|
|
223 |
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
|
224 |
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
|
225 |
movdqu(xmm2, ExternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
|
226 |
movdqu(xmm3, ExternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
|
227 |
pextrw(eax, xmm0, 3);
|
|
228 |
andl(eax, 32767);
|
|
229 |
movl(edx, 16527);
|
|
230 |
subl(edx, eax);
|
|
231 |
subl(eax, 15504);
|
|
232 |
orl(edx, eax);
|
|
233 |
cmpl(edx, INT_MIN);
|
|
234 |
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
|
235 |
mulpd(xmm1, xmm0);
|
|
236 |
addpd(xmm1, xmm6);
|
|
237 |
movapd(xmm7, xmm1);
|
|
238 |
subpd(xmm1, xmm6);
|
|
239 |
mulpd(xmm2, xmm1);
|
|
240 |
movdqu(xmm4, ExternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
|
241 |
mulpd(xmm3, xmm1);
|
|
242 |
movdqu(xmm5, ExternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
|
243 |
subpd(xmm0, xmm2);
|
|
244 |
movdl(eax, xmm7);
|
|
245 |
movl(ecx, eax);
|
|
246 |
andl(ecx, 63);
|
|
247 |
shll(ecx, 4);
|
|
248 |
sarl(eax, 6);
|
|
249 |
movl(edx, eax);
|
|
250 |
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
|
251 |
pand(xmm7, xmm6);
|
|
252 |
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
|
253 |
paddq(xmm7, xmm6);
|
|
254 |
psllq(xmm7, 46);
|
|
255 |
subpd(xmm0, xmm3);
|
|
256 |
lea(tmp, ExternalAddress(Tbl_addr));
|
|
257 |
movdqu(xmm2, Address(ecx,tmp));
|
|
258 |
mulpd(xmm4, xmm0);
|
|
259 |
movapd(xmm6, xmm0);
|
|
260 |
movapd(xmm1, xmm0);
|
|
261 |
mulpd(xmm6, xmm6);
|
|
262 |
mulpd(xmm0, xmm6);
|
|
263 |
addpd(xmm5, xmm4);
|
|
264 |
mulsd(xmm0, xmm6);
|
|
265 |
mulpd(xmm6, ExternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
|
266 |
addsd(xmm1, xmm2);
|
|
267 |
unpckhpd(xmm2, xmm2);
|
|
268 |
mulpd(xmm0, xmm5);
|
|
269 |
addsd(xmm1, xmm0);
|
|
270 |
por(xmm2, xmm7);
|
|
271 |
unpckhpd(xmm0, xmm0);
|
|
272 |
addsd(xmm0, xmm1);
|
|
273 |
addsd(xmm0, xmm6);
|
|
274 |
addl(edx, 894);
|
|
275 |
cmpl(edx, 1916);
|
|
276 |
jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
|
|
277 |
mulsd(xmm0, xmm2);
|
|
278 |
addsd(xmm0, xmm2);
|
|
279 |
jmp (B1_5);
|
|
280 |
|
|
281 |
bind(L_2TAG_PACKET_1_0_2);
|
|
282 |
xorpd(xmm3, xmm3);
|
|
283 |
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
|
284 |
movl(edx, -1022);
|
|
285 |
subl(edx, eax);
|
|
286 |
movdl(xmm5, edx);
|
|
287 |
psllq(xmm4, xmm5);
|
|
288 |
movl(ecx, eax);
|
|
289 |
sarl(eax, 1);
|
|
290 |
pinsrw(xmm3, eax, 3);
|
|
291 |
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
|
292 |
psllq(xmm3, 4);
|
|
293 |
psubd(xmm2, xmm3);
|
|
294 |
mulsd(xmm0, xmm2);
|
|
295 |
cmpl(edx, 52);
|
|
296 |
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
|
297 |
pand(xmm4, xmm2);
|
|
298 |
paddd(xmm3, xmm6);
|
|
299 |
subsd(xmm2, xmm4);
|
|
300 |
addsd(xmm0, xmm2);
|
|
301 |
cmpl(ecx, 1023);
|
|
302 |
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
|
303 |
pextrw(ecx, xmm0, 3);
|
|
304 |
andl(ecx, 32768);
|
|
305 |
orl(edx, ecx);
|
|
306 |
cmpl(edx, 0);
|
|
307 |
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
|
308 |
movapd(xmm6, xmm0);
|
|
309 |
addsd(xmm0, xmm4);
|
|
310 |
mulsd(xmm0, xmm3);
|
|
311 |
pextrw(ecx, xmm0, 3);
|
|
312 |
andl(ecx, 32752);
|
|
313 |
cmpl(ecx, 0);
|
|
314 |
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
|
|
315 |
jmp(B1_5);
|
|
316 |
|
|
317 |
bind(L_2TAG_PACKET_5_0_2);
|
|
318 |
mulsd(xmm6, xmm3);
|
|
319 |
mulsd(xmm4, xmm3);
|
|
320 |
movdqu(xmm0, xmm6);
|
|
321 |
pxor(xmm6, xmm4);
|
|
322 |
psrad(xmm6, 31);
|
|
323 |
pshufd(xmm6, xmm6, 85);
|
|
324 |
psllq(xmm0, 1);
|
|
325 |
psrlq(xmm0, 1);
|
|
326 |
pxor(xmm0, xmm6);
|
|
327 |
psrlq(xmm6, 63);
|
|
328 |
paddq(xmm0, xmm6);
|
|
329 |
paddq(xmm0, xmm4);
|
|
330 |
movl(Address(rsp,0), 15);
|
|
331 |
jmp(L_2TAG_PACKET_6_0_2);
|
|
332 |
|
|
333 |
bind(L_2TAG_PACKET_4_0_2);
|
|
334 |
addsd(xmm0, xmm4);
|
|
335 |
mulsd(xmm0, xmm3);
|
|
336 |
jmp(B1_5);
|
|
337 |
|
|
338 |
bind(L_2TAG_PACKET_3_0_2);
|
|
339 |
addsd(xmm0, xmm4);
|
|
340 |
mulsd(xmm0, xmm3);
|
|
341 |
pextrw(ecx, xmm0, 3);
|
|
342 |
andl(ecx, 32752);
|
|
343 |
cmpl(ecx, 32752);
|
|
344 |
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
|
345 |
jmp(B1_5);
|
|
346 |
|
|
347 |
bind(L_2TAG_PACKET_2_0_2);
|
|
348 |
paddd(xmm3, xmm6);
|
|
349 |
addpd(xmm0, xmm2);
|
|
350 |
mulsd(xmm0, xmm3);
|
|
351 |
movl(Address(rsp,0), 15);
|
|
352 |
jmp(L_2TAG_PACKET_6_0_2);
|
|
353 |
|
|
354 |
bind(L_2TAG_PACKET_8_0_2);
|
|
355 |
cmpl(eax, 2146435072);
|
|
356 |
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
|
|
357 |
movl(eax, Address(rsp,12));
|
|
358 |
cmpl(eax, INT_MIN);
|
|
359 |
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
|
|
360 |
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
|
|
361 |
mulsd(xmm0, xmm0);
|
|
362 |
|
|
363 |
bind(L_2TAG_PACKET_7_0_2);
|
|
364 |
movl(Address(rsp,0), 14);
|
|
365 |
jmp(L_2TAG_PACKET_6_0_2);
|
|
366 |
|
|
367 |
bind(L_2TAG_PACKET_10_0_2);
|
|
368 |
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
|
|
369 |
mulsd(xmm0, xmm0);
|
|
370 |
movl(Address(rsp,0), 15);
|
|
371 |
jmp(L_2TAG_PACKET_6_0_2);
|
|
372 |
|
|
373 |
bind(L_2TAG_PACKET_9_0_2);
|
|
374 |
movl(edx, Address(rsp,8));
|
|
375 |
cmpl(eax, 2146435072);
|
|
376 |
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
|
|
377 |
cmpl(edx, 0);
|
|
378 |
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
|
379 |
movl(eax, Address(rsp,12));
|
|
380 |
cmpl(eax, 2146435072);
|
|
381 |
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
|
|
382 |
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
|
|
383 |
jmp(B1_5);
|
|
384 |
|
|
385 |
bind(L_2TAG_PACKET_12_0_2);
|
|
386 |
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
|
|
387 |
jmp(B1_5);
|
|
388 |
|
|
389 |
bind(L_2TAG_PACKET_11_0_2);
|
|
390 |
movsd(xmm0, Address(rsp, 8));
|
|
391 |
addsd(xmm0, xmm0);
|
|
392 |
jmp(B1_5);
|
|
393 |
|
|
394 |
bind(L_2TAG_PACKET_0_0_2);
|
|
395 |
movl(eax, Address(rsp, 12));
|
|
396 |
andl(eax, 2147483647);
|
|
397 |
cmpl(eax, 1083179008);
|
|
398 |
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
|
|
399 |
movsd(Address(rsp, 8), xmm0);
|
|
400 |
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
|
|
401 |
jmp(B1_5);
|
|
402 |
|
|
403 |
bind(L_2TAG_PACKET_6_0_2);
|
|
404 |
movq(Address(rsp, 16), xmm0);
|
|
405 |
|
|
406 |
bind(B1_3);
|
|
407 |
movq(xmm0, Address(rsp, 16));
|
|
408 |
|
|
409 |
bind(B1_5);
|
|
410 |
addq(rsp, 24);
|
|
411 |
}
|
|
412 |
#endif
|
|
413 |
|
|
414 |
#ifndef _LP64
|
|
415 |
|
|
416 |
ALIGNED_(16) juint _static_const_table[] =
|
|
417 |
{
|
|
418 |
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
|
|
419 |
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
|
|
420 |
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
|
|
421 |
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
|
|
422 |
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
|
|
423 |
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
|
|
424 |
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
|
|
425 |
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
|
|
426 |
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
|
427 |
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
|
428 |
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
|
429 |
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
|
430 |
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
|
431 |
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
|
432 |
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
|
433 |
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
|
434 |
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
|
435 |
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
|
436 |
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
|
437 |
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
|
438 |
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
|
439 |
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
|
440 |
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
|
441 |
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
|
442 |
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
|
443 |
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
|
444 |
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
|
445 |
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
|
446 |
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
|
447 |
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
|
448 |
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
|
449 |
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
|
450 |
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
|
451 |
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
|
452 |
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
|
453 |
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
|
454 |
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
|
455 |
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
|
456 |
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
|
457 |
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
|
458 |
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
|
459 |
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
|
460 |
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
|
461 |
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
|
462 |
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
|
463 |
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
|
464 |
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
|
465 |
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
|
466 |
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
|
467 |
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
|
468 |
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
|
469 |
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
|
470 |
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
|
471 |
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
|
472 |
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
|
473 |
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
|
474 |
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
|
475 |
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
|
476 |
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
|
477 |
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
|
|
478 |
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
|
|
479 |
0x00100000UL
|
|
480 |
};
|
|
481 |
|
|
482 |
//registers,
|
|
483 |
// input: (rbp + 8)
|
|
484 |
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
|
485 |
// rax, rdx, rcx, rbx (tmp)
|
|
486 |
|
|
487 |
// Code generated by Intel C compiler for LIBM library
|
|
488 |
|
|
489 |
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
|
490 |
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
|
491 |
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
|
492 |
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
|
493 |
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
|
494 |
|
|
495 |
assert_different_registers(tmp, eax, ecx, edx);
|
|
496 |
jmp(start);
|
|
497 |
address static_const_table = (address)_static_const_table;
|
|
498 |
|
|
499 |
bind(start);
|
|
500 |
subl(rsp, 120);
|
|
501 |
movl(Address(rsp, 64), tmp);
|
|
502 |
lea(tmp, ExternalAddress(static_const_table));
|
|
503 |
movdqu(xmm0, Address(rsp, 128));
|
|
504 |
unpcklpd(xmm0, xmm0);
|
|
505 |
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
|
506 |
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
|
507 |
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
|
508 |
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
|
509 |
pextrw(eax, xmm0, 3);
|
|
510 |
andl(eax, 32767);
|
|
511 |
movl(edx, 16527);
|
|
512 |
subl(edx, eax);
|
|
513 |
subl(eax, 15504);
|
|
514 |
orl(edx, eax);
|
|
515 |
cmpl(edx, INT_MIN);
|
|
516 |
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
|
517 |
mulpd(xmm1, xmm0);
|
|
518 |
addpd(xmm1, xmm6);
|
|
519 |
movapd(xmm7, xmm1);
|
|
520 |
subpd(xmm1, xmm6);
|
|
521 |
mulpd(xmm2, xmm1);
|
|
522 |
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
|
523 |
mulpd(xmm3, xmm1);
|
|
524 |
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
|
525 |
subpd(xmm0, xmm2);
|
|
526 |
movdl(eax, xmm7);
|
|
527 |
movl(ecx, eax);
|
|
528 |
andl(ecx, 63);
|
|
529 |
shll(ecx, 4);
|
|
530 |
sarl(eax, 6);
|
|
531 |
movl(edx, eax);
|
|
532 |
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
|
533 |
pand(xmm7, xmm6);
|
|
534 |
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
|
535 |
paddq(xmm7, xmm6);
|
|
536 |
psllq(xmm7, 46);
|
|
537 |
subpd(xmm0, xmm3);
|
|
538 |
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
|
|
539 |
mulpd(xmm4, xmm0);
|
|
540 |
movapd(xmm6, xmm0);
|
|
541 |
movapd(xmm1, xmm0);
|
|
542 |
mulpd(xmm6, xmm6);
|
|
543 |
mulpd(xmm0, xmm6);
|
|
544 |
addpd(xmm5, xmm4);
|
|
545 |
mulsd(xmm0, xmm6);
|
|
546 |
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
|
547 |
addsd(xmm1, xmm2);
|
|
548 |
unpckhpd(xmm2, xmm2);
|
|
549 |
mulpd(xmm0, xmm5);
|
|
550 |
addsd(xmm1, xmm0);
|
|
551 |
por(xmm2, xmm7);
|
|
552 |
unpckhpd(xmm0, xmm0);
|
|
553 |
addsd(xmm0, xmm1);
|
|
554 |
addsd(xmm0, xmm6);
|
|
555 |
addl(edx, 894);
|
|
556 |
cmpl(edx, 1916);
|
|
557 |
jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
|
|
558 |
mulsd(xmm0, xmm2);
|
|
559 |
addsd(xmm0, xmm2);
|
|
560 |
jmp(L_2TAG_PACKET_2_0_2);
|
|
561 |
|
|
562 |
bind(L_2TAG_PACKET_1_0_2);
|
|
563 |
fnstcw(Address(rsp, 24));
|
|
564 |
movzwl(edx, Address(rsp, 24));
|
|
565 |
orl(edx, 768);
|
|
566 |
movw(Address(rsp, 28), edx);
|
|
567 |
fldcw(Address(rsp, 28));
|
|
568 |
movl(edx, eax);
|
|
569 |
sarl(eax, 1);
|
|
570 |
subl(edx, eax);
|
|
571 |
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
|
|
572 |
pandn(xmm6, xmm2);
|
|
573 |
addl(eax, 1023);
|
|
574 |
movdl(xmm3, eax);
|
|
575 |
psllq(xmm3, 52);
|
|
576 |
por(xmm6, xmm3);
|
|
577 |
addl(edx, 1023);
|
|
578 |
movdl(xmm4, edx);
|
|
579 |
psllq(xmm4, 52);
|
|
580 |
movsd(Address(rsp, 8), xmm0);
|
|
581 |
fld_d(Address(rsp, 8));
|
|
582 |
movsd(Address(rsp, 16), xmm6);
|
|
583 |
fld_d(Address(rsp, 16));
|
|
584 |
fmula(1);
|
|
585 |
faddp(1);
|
|
586 |
movsd(Address(rsp, 8), xmm4);
|
|
587 |
fld_d(Address(rsp, 8));
|
|
588 |
fmulp(1);
|
|
589 |
fstp_d(Address(rsp, 8));
|
|
590 |
movsd(xmm0,Address(rsp, 8));
|
|
591 |
fldcw(Address(rsp, 24));
|
|
592 |
pextrw(ecx, xmm0, 3);
|
|
593 |
andl(ecx, 32752);
|
|
594 |
cmpl(ecx, 32752);
|
|
595 |
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
|
596 |
cmpl(ecx, 0);
|
|
597 |
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
|
598 |
jmp(L_2TAG_PACKET_2_0_2);
|
|
599 |
cmpl(ecx, INT_MIN);
|
|
600 |
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
|
|
601 |
cmpl(ecx, -1064950997);
|
|
602 |
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
|
603 |
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
|
|
604 |
movl(edx, Address(rsp, 128));
|
|
605 |
cmpl(edx ,-17155601);
|
|
606 |
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
|
607 |
jmp(L_2TAG_PACKET_4_0_2);
|
|
608 |
|
|
609 |
bind(L_2TAG_PACKET_3_0_2);
|
|
610 |
movl(edx, 14);
|
|
611 |
jmp(L_2TAG_PACKET_5_0_2);
|
|
612 |
|
|
613 |
bind(L_2TAG_PACKET_4_0_2);
|
|
614 |
movl(edx, 15);
|
|
615 |
|
|
616 |
bind(L_2TAG_PACKET_5_0_2);
|
|
617 |
movsd(Address(rsp, 0), xmm0);
|
|
618 |
movsd(xmm0, Address(rsp, 128));
|
|
619 |
fld_d(Address(rsp, 0));
|
|
620 |
jmp(L_2TAG_PACKET_6_0_2);
|
|
621 |
|
|
622 |
bind(L_2TAG_PACKET_7_0_2);
|
|
623 |
cmpl(eax, 2146435072);
|
|
624 |
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
|
|
625 |
movl(eax, Address(rsp, 132));
|
|
626 |
cmpl(eax, INT_MIN);
|
|
627 |
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
|
|
628 |
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
|
|
629 |
mulsd(xmm0, xmm0);
|
|
630 |
movl(edx, 14);
|
|
631 |
jmp(L_2TAG_PACKET_5_0_2);
|
|
632 |
|
|
633 |
bind(L_2TAG_PACKET_9_0_2);
|
|
634 |
movsd(xmm0, Address(tmp, 1216));
|
|
635 |
mulsd(xmm0, xmm0);
|
|
636 |
movl(edx, 15);
|
|
637 |
jmp(L_2TAG_PACKET_5_0_2);
|
|
638 |
|
|
639 |
bind(L_2TAG_PACKET_8_0_2);
|
|
640 |
movl(edx, Address(rsp, 128));
|
|
641 |
cmpl(eax, 2146435072);
|
|
642 |
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
|
|
643 |
cmpl(edx, 0);
|
|
644 |
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
|
|
645 |
movl(eax, Address(rsp, 132));
|
|
646 |
cmpl(eax, 2146435072);
|
|
647 |
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
|
648 |
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
|
|
649 |
jmp(L_2TAG_PACKET_2_0_2);
|
|
650 |
|
|
651 |
bind(L_2TAG_PACKET_11_0_2);
|
|
652 |
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
|
|
653 |
jmp(L_2TAG_PACKET_2_0_2);
|
|
654 |
|
|
655 |
bind(L_2TAG_PACKET_10_0_2);
|
|
656 |
movsd(xmm0, Address(rsp, 128));
|
|
657 |
addsd(xmm0, xmm0);
|
|
658 |
jmp(L_2TAG_PACKET_2_0_2);
|
|
659 |
|
|
660 |
bind(L_2TAG_PACKET_0_0_2);
|
|
661 |
movl(eax, Address(rsp, 132));
|
|
662 |
andl(eax, 2147483647);
|
|
663 |
cmpl(eax, 1083179008);
|
|
664 |
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
|
665 |
movsd(xmm0, Address(rsp, 128));
|
|
666 |
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
|
|
667 |
jmp(L_2TAG_PACKET_2_0_2);
|
|
668 |
|
|
669 |
bind(L_2TAG_PACKET_2_0_2);
|
|
670 |
movsd(Address(rsp, 48), xmm0);
|
|
671 |
fld_d(Address(rsp, 48));
|
|
672 |
|
|
673 |
bind(L_2TAG_PACKET_6_0_2);
|
|
674 |
movl(tmp, Address(rsp, 64));
|
|
675 |
}
|
|
676 |
|
|
677 |
#endif
|