|
1 /* |
|
2 * Copyright (c) 2015, Intel Corporation. |
|
3 * Intel Math Library (LIBM) Source Code |
|
4 * |
|
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
6 * |
|
7 * This code is free software; you can redistribute it and/or modify it |
|
8 * under the terms of the GNU General Public License version 2 only, as |
|
9 * published by the Free Software Foundation. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 * |
|
25 */ |
|
26 |
|
27 #include "precompiled.hpp" |
|
28 #include "asm/assembler.hpp" |
|
29 #include "asm/assembler.inline.hpp" |
|
30 #include "macroAssembler_x86.hpp" |
|
31 |
|
32 #ifdef _MSC_VER |
|
33 #define ALIGNED_(x) __declspec(align(x)) |
|
34 #else |
|
35 #define ALIGNED_(x) __attribute__ ((aligned(x))) |
|
36 #endif |
|
37 |
|
38 // The 64 bit code is at most SSE2 compliant |
|
39 |
|
40 /******************************************************************************/ |
|
41 // ALGORITHM DESCRIPTION - EXP() |
|
42 // --------------------- |
|
43 // |
|
44 // Description: |
|
45 // Let K = 64 (table size). |
|
46 // x x/log(2) n |
|
47 // e = 2 = 2 * T[j] * (1 + P(y)) |
|
48 // where |
|
49 // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] |
|
50 // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] |
|
51 // j/K |
|
52 // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). |
|
53 // |
|
54 // P(y) is a minimax polynomial approximation of exp(x)-1 |
|
55 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). |
|
56 // |
|
57 // To avoid problems with arithmetic overflow and underflow, |
|
58 // n n1 n2 |
|
59 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] |
|
60 // where BIAS is a value of exponent bias. |
|
61 // |
|
62 // Special cases: |
|
63 // exp(NaN) = NaN |
|
64 // exp(+INF) = +INF |
|
65 // exp(-INF) = 0 |
|
66 // exp(x) = 1 for subnormals |
|
67 // for finite argument, only exp(0)=1 is exact |
|
68 // For IEEE double |
|
69 // if x > 709.782712893383973096 then exp(x) overflow |
|
70 // if x < -745.133219101941108420 then exp(x) underflow |
|
71 // |
|
72 /******************************************************************************/ |
|
73 |
|
74 ALIGNED_(16) juint _cv[] = |
|
75 { |
|
76 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL, |
|
77 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL, |
|
78 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, |
|
79 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL, |
|
80 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL |
|
81 }; |
|
82 |
|
83 ALIGNED_(16) juint _shifter[] = |
|
84 { |
|
85 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL |
|
86 }; |
|
87 |
|
88 ALIGNED_(16) juint _mmask[] = |
|
89 { |
|
90 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL |
|
91 }; |
|
92 |
|
93 ALIGNED_(16) juint _bias[] = |
|
94 { |
|
95 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL |
|
96 }; |
|
97 |
|
98 ALIGNED_(16) juint _Tbl_addr[] = |
|
99 { |
|
100 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, |
|
101 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, |
|
102 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, |
|
103 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, |
|
104 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, |
|
105 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, |
|
106 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, |
|
107 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, |
|
108 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, |
|
109 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, |
|
110 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, |
|
111 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, |
|
112 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, |
|
113 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, |
|
114 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, |
|
115 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, |
|
116 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, |
|
117 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, |
|
118 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, |
|
119 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, |
|
120 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, |
|
121 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, |
|
122 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, |
|
123 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, |
|
124 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, |
|
125 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, |
|
126 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, |
|
127 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, |
|
128 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, |
|
129 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, |
|
130 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, |
|
131 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, |
|
132 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, |
|
133 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, |
|
134 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, |
|
135 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, |
|
136 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, |
|
137 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, |
|
138 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, |
|
139 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, |
|
140 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, |
|
141 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, |
|
142 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, |
|
143 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, |
|
144 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, |
|
145 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, |
|
146 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, |
|
147 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, |
|
148 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, |
|
149 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, |
|
150 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, |
|
151 0x000fa7c1UL |
|
152 }; |
|
153 |
|
154 ALIGNED_(16) juint _ALLONES[] = |
|
155 { |
|
156 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL |
|
157 }; |
|
158 |
|
159 ALIGNED_(16) juint _ebias[] = |
|
160 { |
|
161 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL |
|
162 }; |
|
163 |
|
164 ALIGNED_(4) juint _XMAX[] = |
|
165 { |
|
166 0xffffffffUL, 0x7fefffffUL |
|
167 }; |
|
168 |
|
169 ALIGNED_(4) juint _XMIN[] = |
|
170 { |
|
171 0x00000000UL, 0x00100000UL |
|
172 }; |
|
173 |
|
174 ALIGNED_(4) juint _INF[] = |
|
175 { |
|
176 0x00000000UL, 0x7ff00000UL |
|
177 }; |
|
178 |
|
179 ALIGNED_(4) juint _ZERO[] = |
|
180 { |
|
181 0x00000000UL, 0x00000000UL |
|
182 }; |
|
183 |
|
184 ALIGNED_(4) juint _ONE_val[] = |
|
185 { |
|
186 0x00000000UL, 0x3ff00000UL |
|
187 }; |
|
188 |
|
189 |
|
190 // Registers: |
|
191 // input: xmm0 |
|
192 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
193 // rax, rdx, rcx, tmp - r11 |
|
194 |
|
195 // Code generated by Intel C compiler for LIBM library |
|
196 |
|
197 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { |
|
198 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
|
199 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
|
200 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; |
|
201 Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start; |
|
202 |
|
203 assert_different_registers(tmp, eax, ecx, edx); |
|
204 jmp(start); |
|
205 address cv = (address)_cv; |
|
206 address Shifter = (address)_shifter; |
|
207 address mmask = (address)_mmask; |
|
208 address bias = (address)_bias; |
|
209 address Tbl_addr = (address)_Tbl_addr; |
|
210 address ALLONES = (address)_ALLONES; |
|
211 address ebias = (address)_ebias; |
|
212 address XMAX = (address)_XMAX; |
|
213 address XMIN = (address)_XMIN; |
|
214 address INF = (address)_INF; |
|
215 address ZERO = (address)_ZERO; |
|
216 address ONE_val = (address)_ONE_val; |
|
217 |
|
218 bind(start); |
|
219 subq(rsp, 24); |
|
220 movsd(Address(rsp, 8), xmm0); |
|
221 unpcklpd(xmm0, xmm0); |
|
222 movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL |
|
223 movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL |
|
224 movdqu(xmm2, ExternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL |
|
225 movdqu(xmm3, ExternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL |
|
226 pextrw(eax, xmm0, 3); |
|
227 andl(eax, 32767); |
|
228 movl(edx, 16527); |
|
229 subl(edx, eax); |
|
230 subl(eax, 15504); |
|
231 orl(edx, eax); |
|
232 cmpl(edx, INT_MIN); |
|
233 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
|
234 mulpd(xmm1, xmm0); |
|
235 addpd(xmm1, xmm6); |
|
236 movapd(xmm7, xmm1); |
|
237 subpd(xmm1, xmm6); |
|
238 mulpd(xmm2, xmm1); |
|
239 movdqu(xmm4, ExternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL |
|
240 mulpd(xmm3, xmm1); |
|
241 movdqu(xmm5, ExternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL |
|
242 subpd(xmm0, xmm2); |
|
243 movdl(eax, xmm7); |
|
244 movl(ecx, eax); |
|
245 andl(ecx, 63); |
|
246 shll(ecx, 4); |
|
247 sarl(eax, 6); |
|
248 movl(edx, eax); |
|
249 movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL |
|
250 pand(xmm7, xmm6); |
|
251 movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL |
|
252 paddq(xmm7, xmm6); |
|
253 psllq(xmm7, 46); |
|
254 subpd(xmm0, xmm3); |
|
255 lea(tmp, ExternalAddress(Tbl_addr)); |
|
256 movdqu(xmm2, Address(ecx,tmp)); |
|
257 mulpd(xmm4, xmm0); |
|
258 movapd(xmm6, xmm0); |
|
259 movapd(xmm1, xmm0); |
|
260 mulpd(xmm6, xmm6); |
|
261 mulpd(xmm0, xmm6); |
|
262 addpd(xmm5, xmm4); |
|
263 mulsd(xmm0, xmm6); |
|
264 mulpd(xmm6, ExternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL |
|
265 addsd(xmm1, xmm2); |
|
266 unpckhpd(xmm2, xmm2); |
|
267 mulpd(xmm0, xmm5); |
|
268 addsd(xmm1, xmm0); |
|
269 por(xmm2, xmm7); |
|
270 unpckhpd(xmm0, xmm0); |
|
271 addsd(xmm0, xmm1); |
|
272 addsd(xmm0, xmm6); |
|
273 addl(edx, 894); |
|
274 cmpl(edx, 1916); |
|
275 jcc (Assembler::above, L_2TAG_PACKET_1_0_2); |
|
276 mulsd(xmm0, xmm2); |
|
277 addsd(xmm0, xmm2); |
|
278 jmp (B1_5); |
|
279 |
|
280 bind(L_2TAG_PACKET_1_0_2); |
|
281 xorpd(xmm3, xmm3); |
|
282 movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL |
|
283 movl(edx, -1022); |
|
284 subl(edx, eax); |
|
285 movdl(xmm5, edx); |
|
286 psllq(xmm4, xmm5); |
|
287 movl(ecx, eax); |
|
288 sarl(eax, 1); |
|
289 pinsrw(xmm3, eax, 3); |
|
290 movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL |
|
291 psllq(xmm3, 4); |
|
292 psubd(xmm2, xmm3); |
|
293 mulsd(xmm0, xmm2); |
|
294 cmpl(edx, 52); |
|
295 jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); |
|
296 pand(xmm4, xmm2); |
|
297 paddd(xmm3, xmm6); |
|
298 subsd(xmm2, xmm4); |
|
299 addsd(xmm0, xmm2); |
|
300 cmpl(ecx, 1023); |
|
301 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); |
|
302 pextrw(ecx, xmm0, 3); |
|
303 andl(ecx, 32768); |
|
304 orl(edx, ecx); |
|
305 cmpl(edx, 0); |
|
306 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); |
|
307 movapd(xmm6, xmm0); |
|
308 addsd(xmm0, xmm4); |
|
309 mulsd(xmm0, xmm3); |
|
310 pextrw(ecx, xmm0, 3); |
|
311 andl(ecx, 32752); |
|
312 cmpl(ecx, 0); |
|
313 jcc(Assembler::equal, L_2TAG_PACKET_5_0_2); |
|
314 jmp(B1_5); |
|
315 |
|
316 bind(L_2TAG_PACKET_5_0_2); |
|
317 mulsd(xmm6, xmm3); |
|
318 mulsd(xmm4, xmm3); |
|
319 movdqu(xmm0, xmm6); |
|
320 pxor(xmm6, xmm4); |
|
321 psrad(xmm6, 31); |
|
322 pshufd(xmm6, xmm6, 85); |
|
323 psllq(xmm0, 1); |
|
324 psrlq(xmm0, 1); |
|
325 pxor(xmm0, xmm6); |
|
326 psrlq(xmm6, 63); |
|
327 paddq(xmm0, xmm6); |
|
328 paddq(xmm0, xmm4); |
|
329 movl(Address(rsp,0), 15); |
|
330 jmp(L_2TAG_PACKET_6_0_2); |
|
331 |
|
332 bind(L_2TAG_PACKET_4_0_2); |
|
333 addsd(xmm0, xmm4); |
|
334 mulsd(xmm0, xmm3); |
|
335 jmp(B1_5); |
|
336 |
|
337 bind(L_2TAG_PACKET_3_0_2); |
|
338 addsd(xmm0, xmm4); |
|
339 mulsd(xmm0, xmm3); |
|
340 pextrw(ecx, xmm0, 3); |
|
341 andl(ecx, 32752); |
|
342 cmpl(ecx, 32752); |
|
343 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); |
|
344 jmp(B1_5); |
|
345 |
|
346 bind(L_2TAG_PACKET_2_0_2); |
|
347 paddd(xmm3, xmm6); |
|
348 addpd(xmm0, xmm2); |
|
349 mulsd(xmm0, xmm3); |
|
350 movl(Address(rsp,0), 15); |
|
351 jmp(L_2TAG_PACKET_6_0_2); |
|
352 |
|
353 bind(L_2TAG_PACKET_8_0_2); |
|
354 cmpl(eax, 2146435072); |
|
355 jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); |
|
356 movl(eax, Address(rsp,12)); |
|
357 cmpl(eax, INT_MIN); |
|
358 jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); |
|
359 movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL |
|
360 mulsd(xmm0, xmm0); |
|
361 |
|
362 bind(L_2TAG_PACKET_7_0_2); |
|
363 movl(Address(rsp,0), 14); |
|
364 jmp(L_2TAG_PACKET_6_0_2); |
|
365 |
|
366 bind(L_2TAG_PACKET_10_0_2); |
|
367 movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL |
|
368 mulsd(xmm0, xmm0); |
|
369 movl(Address(rsp,0), 15); |
|
370 jmp(L_2TAG_PACKET_6_0_2); |
|
371 |
|
372 bind(L_2TAG_PACKET_9_0_2); |
|
373 movl(edx, Address(rsp,8)); |
|
374 cmpl(eax, 2146435072); |
|
375 jcc(Assembler::above, L_2TAG_PACKET_11_0_2); |
|
376 cmpl(edx, 0); |
|
377 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); |
|
378 movl(eax, Address(rsp,12)); |
|
379 cmpl(eax, 2146435072); |
|
380 jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); |
|
381 movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL |
|
382 jmp(B1_5); |
|
383 |
|
384 bind(L_2TAG_PACKET_12_0_2); |
|
385 movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL |
|
386 jmp(B1_5); |
|
387 |
|
388 bind(L_2TAG_PACKET_11_0_2); |
|
389 movsd(xmm0, Address(rsp, 8)); |
|
390 addsd(xmm0, xmm0); |
|
391 jmp(B1_5); |
|
392 |
|
393 bind(L_2TAG_PACKET_0_0_2); |
|
394 movl(eax, Address(rsp, 12)); |
|
395 andl(eax, 2147483647); |
|
396 cmpl(eax, 1083179008); |
|
397 jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); |
|
398 movsd(Address(rsp, 8), xmm0); |
|
399 addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL |
|
400 jmp(B1_5); |
|
401 |
|
402 bind(L_2TAG_PACKET_6_0_2); |
|
403 movq(Address(rsp, 16), xmm0); |
|
404 |
|
405 bind(B1_3); |
|
406 movq(xmm0, Address(rsp, 16)); |
|
407 |
|
408 bind(B1_5); |
|
409 addq(rsp, 24); |
|
410 } |
|
411 |
|
412 /******************************************************************************/ |
|
413 // ALGORITHM DESCRIPTION - LOG() |
|
414 // --------------------- |
|
415 // |
|
416 // x=2^k * mx, mx in [1,2) |
|
417 // |
|
418 // Get B~1/mx based on the output of rcpss instruction (B0) |
|
419 // B = int((B0*2^7+0.5))/2^7 |
|
420 // |
|
421 // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) |
|
422 // |
|
423 // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and |
|
424 // p(r) is a degree 7 polynomial |
|
425 // -log(B) read from data table (high, low parts) |
|
426 // Result is formed from high and low parts |
|
427 // |
|
428 // Special cases: |
|
429 // log(NaN) = quiet NaN, and raise invalid exception |
|
430 // log(+INF) = that INF |
|
431 // log(0) = -INF with divide-by-zero exception raised |
|
432 // log(1) = +0 |
|
433 // log(x) = NaN with invalid exception raised if x < -0, including -INF |
|
434 // |
|
435 /******************************************************************************/ |
|
436 |
|
437 ALIGNED_(16) juint _L_tbl[] = |
|
438 { |
|
439 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, |
|
440 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, |
|
441 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, |
|
442 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, |
|
443 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, |
|
444 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, |
|
445 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, |
|
446 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, |
|
447 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, |
|
448 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, |
|
449 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, |
|
450 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, |
|
451 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, |
|
452 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, |
|
453 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, |
|
454 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, |
|
455 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, |
|
456 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, |
|
457 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, |
|
458 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, |
|
459 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, |
|
460 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, |
|
461 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, |
|
462 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, |
|
463 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, |
|
464 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, |
|
465 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, |
|
466 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, |
|
467 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, |
|
468 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, |
|
469 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, |
|
470 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, |
|
471 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, |
|
472 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, |
|
473 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, |
|
474 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, |
|
475 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, |
|
476 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, |
|
477 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, |
|
478 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, |
|
479 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, |
|
480 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, |
|
481 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, |
|
482 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, |
|
483 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, |
|
484 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, |
|
485 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, |
|
486 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, |
|
487 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, |
|
488 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, |
|
489 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, |
|
490 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, |
|
491 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, |
|
492 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, |
|
493 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, |
|
494 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, |
|
495 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, |
|
496 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, |
|
497 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, |
|
498 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, |
|
499 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, |
|
500 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, |
|
501 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, |
|
502 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, |
|
503 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, |
|
504 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, |
|
505 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, |
|
506 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, |
|
507 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, |
|
508 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, |
|
509 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, |
|
510 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, |
|
511 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, |
|
512 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, |
|
513 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, |
|
514 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, |
|
515 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, |
|
516 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, |
|
517 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, |
|
518 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, |
|
519 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, |
|
520 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, |
|
521 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, |
|
522 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, |
|
523 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, |
|
524 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, |
|
525 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, |
|
526 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, |
|
527 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, |
|
528 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, |
|
529 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, |
|
530 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, |
|
531 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, |
|
532 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, |
|
533 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, |
|
534 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, |
|
535 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, |
|
536 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, |
|
537 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, |
|
538 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, |
|
539 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, |
|
540 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, |
|
541 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
542 0x80000000UL |
|
543 }; |
|
544 |
|
545 ALIGNED_(16) juint _log2[] = |
|
546 { |
|
547 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL |
|
548 }; |
|
549 |
|
550 ALIGNED_(16) juint _coeff[] = |
|
551 { |
|
552 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, |
|
553 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, |
|
554 0x00000000UL, 0xbfe00000UL |
|
555 }; |
|
556 |
|
557 //registers, |
|
558 // input: xmm0 |
|
559 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
560 // rax, rdx, rcx, r8, r11 |
|
561 |
|
562 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { |
|
563 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
|
564 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
|
565 Label L_2TAG_PACKET_8_0_2; |
|
566 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; |
|
567 |
|
568 assert_different_registers(tmp1, tmp2, eax, ecx, edx); |
|
569 jmp(start); |
|
570 address L_tbl = (address)_L_tbl; |
|
571 address log2 = (address)_log2; |
|
572 address coeff = (address)_coeff; |
|
573 |
|
574 bind(start); |
|
575 subq(rsp, 24); |
|
576 movsd(Address(rsp, 0), xmm0); |
|
577 mov64(rax, 0x3ff0000000000000); |
|
578 movdq(xmm2, rax); |
|
579 mov64(rdx, 0x77f0000000000000); |
|
580 movdq(xmm3, rdx); |
|
581 movl(ecx, 32768); |
|
582 movdl(xmm4, rcx); |
|
583 mov64(tmp1, 0xffffe00000000000); |
|
584 movdq(xmm5, tmp1); |
|
585 movdqu(xmm1, xmm0); |
|
586 pextrw(eax, xmm0, 3); |
|
587 por(xmm0, xmm2); |
|
588 movl(ecx, 16352); |
|
589 psrlq(xmm0, 27); |
|
590 lea(tmp2, ExternalAddress(L_tbl)); |
|
591 psrld(xmm0, 2); |
|
592 rcpps(xmm0, xmm0); |
|
593 psllq(xmm1, 12); |
|
594 pshufd(xmm6, xmm5, 228); |
|
595 psrlq(xmm1, 12); |
|
596 subl(eax, 16); |
|
597 cmpl(eax, 32736); |
|
598 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
|
599 |
|
600 bind(L_2TAG_PACKET_1_0_2); |
|
601 paddd(xmm0, xmm4); |
|
602 por(xmm1, xmm3); |
|
603 movdl(edx, xmm0); |
|
604 psllq(xmm0, 29); |
|
605 pand(xmm5, xmm1); |
|
606 pand(xmm0, xmm6); |
|
607 subsd(xmm1, xmm5); |
|
608 mulpd(xmm5, xmm0); |
|
609 andl(eax, 32752); |
|
610 subl(eax, ecx); |
|
611 cvtsi2sdl(xmm7, eax); |
|
612 mulsd(xmm1, xmm0); |
|
613 movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL |
|
614 movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL |
|
615 subsd(xmm5, xmm2); |
|
616 andl(edx, 16711680); |
|
617 shrl(edx, 12); |
|
618 movdqu(xmm0, Address(tmp2, edx)); |
|
619 movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL |
|
620 addsd(xmm1, xmm5); |
|
621 movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL |
|
622 mulsd(xmm6, xmm7); |
|
623 if (VM_Version::supports_sse3()) { |
|
624 movddup(xmm5, xmm1); |
|
625 } else { |
|
626 movdqu(xmm5, xmm1); |
|
627 movlhps(xmm5, xmm5); |
|
628 } |
|
629 mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL |
|
630 mulsd(xmm3, xmm1); |
|
631 addsd(xmm0, xmm6); |
|
632 mulpd(xmm4, xmm5); |
|
633 mulpd(xmm5, xmm5); |
|
634 if (VM_Version::supports_sse3()) { |
|
635 movddup(xmm6, xmm0); |
|
636 } else { |
|
637 movdqu(xmm6, xmm0); |
|
638 movlhps(xmm6, xmm6); |
|
639 } |
|
640 addsd(xmm0, xmm1); |
|
641 addpd(xmm4, xmm2); |
|
642 mulpd(xmm3, xmm5); |
|
643 subsd(xmm6, xmm0); |
|
644 mulsd(xmm4, xmm1); |
|
645 pshufd(xmm2, xmm0, 238); |
|
646 addsd(xmm1, xmm6); |
|
647 mulsd(xmm5, xmm5); |
|
648 addsd(xmm7, xmm2); |
|
649 addpd(xmm4, xmm3); |
|
650 addsd(xmm1, xmm7); |
|
651 mulpd(xmm4, xmm5); |
|
652 addsd(xmm1, xmm4); |
|
653 pshufd(xmm5, xmm4, 238); |
|
654 addsd(xmm1, xmm5); |
|
655 addsd(xmm0, xmm1); |
|
656 jmp(B1_5); |
|
657 |
|
658 bind(L_2TAG_PACKET_0_0_2); |
|
659 movq(xmm0, Address(rsp, 0)); |
|
660 movq(xmm1, Address(rsp, 0)); |
|
661 addl(eax, 16); |
|
662 cmpl(eax, 32768); |
|
663 jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); |
|
664 cmpl(eax, 16); |
|
665 jcc(Assembler::below, L_2TAG_PACKET_3_0_2); |
|
666 |
|
667 bind(L_2TAG_PACKET_4_0_2); |
|
668 addsd(xmm0, xmm0); |
|
669 jmp(B1_5); |
|
670 |
|
671 bind(L_2TAG_PACKET_5_0_2); |
|
672 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
|
673 cmpl(edx, 0); |
|
674 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
|
675 jmp(L_2TAG_PACKET_6_0_2); |
|
676 |
|
677 bind(L_2TAG_PACKET_3_0_2); |
|
678 xorpd(xmm1, xmm1); |
|
679 addsd(xmm1, xmm0); |
|
680 movdl(edx, xmm1); |
|
681 psrlq(xmm1, 32); |
|
682 movdl(ecx, xmm1); |
|
683 orl(edx, ecx); |
|
684 cmpl(edx, 0); |
|
685 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
|
686 xorpd(xmm1, xmm1); |
|
687 movl(eax, 18416); |
|
688 pinsrw(xmm1, eax, 3); |
|
689 mulsd(xmm0, xmm1); |
|
690 movdqu(xmm1, xmm0); |
|
691 pextrw(eax, xmm0, 3); |
|
692 por(xmm0, xmm2); |
|
693 psrlq(xmm0, 27); |
|
694 movl(ecx, 18416); |
|
695 psrld(xmm0, 2); |
|
696 rcpps(xmm0, xmm0); |
|
697 psllq(xmm1, 12); |
|
698 pshufd(xmm6, xmm5, 228); |
|
699 psrlq(xmm1, 12); |
|
700 jmp(L_2TAG_PACKET_1_0_2); |
|
701 |
|
702 bind(L_2TAG_PACKET_2_0_2); |
|
703 movdl(edx, xmm1); |
|
704 psrlq(xmm1, 32); |
|
705 movdl(ecx, xmm1); |
|
706 addl(ecx, ecx); |
|
707 cmpl(ecx, -2097152); |
|
708 jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); |
|
709 orl(edx, ecx); |
|
710 cmpl(edx, 0); |
|
711 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
|
712 |
|
713 bind(L_2TAG_PACKET_6_0_2); |
|
714 xorpd(xmm1, xmm1); |
|
715 xorpd(xmm0, xmm0); |
|
716 movl(eax, 32752); |
|
717 pinsrw(xmm1, eax, 3); |
|
718 mulsd(xmm0, xmm1); |
|
719 movl(Address(rsp, 16), 3); |
|
720 jmp(L_2TAG_PACKET_8_0_2); |
|
721 bind(L_2TAG_PACKET_7_0_2); |
|
722 xorpd(xmm1, xmm1); |
|
723 xorpd(xmm0, xmm0); |
|
724 movl(eax, 49136); |
|
725 pinsrw(xmm0, eax, 3); |
|
726 divsd(xmm0, xmm1); |
|
727 movl(Address(rsp, 16), 2); |
|
728 |
|
729 bind(L_2TAG_PACKET_8_0_2); |
|
730 movq(Address(rsp, 8), xmm0); |
|
731 |
|
732 bind(B1_3); |
|
733 movq(xmm0, Address(rsp, 8)); |
|
734 |
|
735 bind(B1_5); |
|
736 addq(rsp, 24); |
|
737 } |
|
738 |
|
739 /******************************************************************************/ |
|
740 // ALGORITHM DESCRIPTION - POW() |
|
741 // --------------------- |
|
742 // |
|
743 // Let x=2^k * mx, mx in [1,2) |
|
744 // |
|
745 // log2(x) calculation: |
|
746 // |
|
747 // Get B~1/mx based on the output of rcpps instruction (B0) |
|
748 // B = int((B0*LH*2^9+0.5))/2^9 |
|
749 // LH is a short approximation for log2(e) |
|
750 // |
|
751 // Reduced argument, scaled by LH: |
|
752 // r=B*mx-LH (computed accurately in high and low parts) |
|
753 // |
|
754 // log2(x) result: k - log2(B) + p(r) |
|
755 // p(r) is a degree 8 polynomial |
|
756 // -log2(B) read from data table (high, low parts) |
|
757 // log2(x) is formed from high and low parts |
|
758 // For |x| in [1-1/32, 1+1/16), a slower but more accurate computation |
|
759 // based om the same table design is performed. |
|
760 // |
|
761 // Main path is taken if | floor(log2(|log2(|x|)|) + floor(log2|y|) | < 8, |
|
762 // to filter out all potential OF/UF cases. |
|
763 // exp2(y*log2(x)) is computed using an 8-bit index table and a degree 5 |
|
764 // polynomial |
|
765 // |
|
766 // Special cases: |
|
767 // pow(-0,y) = -INF and raises the divide-by-zero exception for y an odd |
|
768 // integer < 0. |
|
769 // pow(-0,y) = +INF and raises the divide-by-zero exception for y < 0 and |
|
770 // not an odd integer. |
|
771 // pow(-0,y) = -0 for y an odd integer > 0. |
|
772 // pow(-0,y) = +0 for y > 0 and not an odd integer. |
|
773 // pow(-1,-INF) = NaN. |
|
774 // pow(+1,y) = NaN for any y, even a NaN. |
|
775 // pow(x,-0) = 1 for any x, even a NaN. |
|
776 // pow(x,y) = a NaN and raises the invalid exception for finite x < 0 and |
|
777 // finite non-integer y. |
|
778 // pow(x,-INF) = +INF for |x|<1. |
|
779 // pow(x,-INF) = +0 for |x|>1. |
|
780 // pow(x,+INF) = +0 for |x|<1. |
|
781 // pow(x,+INF) = +INF for |x|>1. |
|
782 // pow(-INF,y) = -0 for y an odd integer < 0. |
|
783 // pow(-INF,y) = +0 for y < 0 and not an odd integer. |
|
784 // pow(-INF,y) = -INF for y an odd integer > 0. |
|
785 // pow(-INF,y) = +INF for y > 0 and not an odd integer. |
|
786 // pow(+INF,y) = +0 for y <0. |
|
787 // pow(+INF,y) = +INF for y >0. |
|
788 // |
|
789 /******************************************************************************/ |
|
790 |
|
791 ALIGNED_(16) juint _HIGHSIGMASK[] = |
|
792 { |
|
793 0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL |
|
794 }; |
|
795 |
|
796 ALIGNED_(16) juint _LOG2_E[] = |
|
797 { |
|
798 0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL |
|
799 }; |
|
800 |
|
801 ALIGNED_(16) juint _HIGHMASK_Y[] = |
|
802 { |
|
803 0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL |
|
804 }; |
|
805 |
|
806 ALIGNED_(16) juint _T_exp[] = |
|
807 { |
|
808 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3b700000UL, 0xfa5abcbfUL, |
|
809 0x3ff00b1aUL, 0xa7609f71UL, 0xbc84f6b2UL, 0xa9fb3335UL, 0x3ff0163dUL, |
|
810 0x9ab8cdb7UL, 0x3c9b6129UL, 0x143b0281UL, 0x3ff02168UL, 0x0fc54eb6UL, |
|
811 0xbc82bf31UL, 0x3e778061UL, 0x3ff02c9aUL, 0x535b085dUL, 0xbc719083UL, |
|
812 0x2e11bbccUL, 0x3ff037d4UL, 0xeeade11aUL, 0x3c656811UL, 0xe86e7f85UL, |
|
813 0x3ff04315UL, 0x1977c96eUL, 0xbc90a31cUL, 0x72f654b1UL, 0x3ff04e5fUL, |
|
814 0x3aa0d08cUL, 0x3c84c379UL, 0xd3158574UL, 0x3ff059b0UL, 0xa475b465UL, |
|
815 0x3c8d73e2UL, 0x0e3c1f89UL, 0x3ff0650aUL, 0x5799c397UL, 0xbc95cb7bUL, |
|
816 0x29ddf6deUL, 0x3ff0706bUL, 0xe2b13c27UL, 0xbc8c91dfUL, 0x2b72a836UL, |
|
817 0x3ff07bd4UL, 0x54458700UL, 0x3c832334UL, 0x18759bc8UL, 0x3ff08745UL, |
|
818 0x4bb284ffUL, 0x3c6186beUL, 0xf66607e0UL, 0x3ff092bdUL, 0x800a3fd1UL, |
|
819 0xbc968063UL, 0xcac6f383UL, 0x3ff09e3eUL, 0x18316136UL, 0x3c914878UL, |
|
820 0x9b1f3919UL, 0x3ff0a9c7UL, 0x873d1d38UL, 0x3c85d16cUL, 0x6cf9890fUL, |
|
821 0x3ff0b558UL, 0x4adc610bUL, 0x3c98a62eUL, 0x45e46c85UL, 0x3ff0c0f1UL, |
|
822 0x06d21cefUL, 0x3c94f989UL, 0x2b7247f7UL, 0x3ff0cc92UL, 0x16e24f71UL, |
|
823 0x3c901edcUL, 0x23395decUL, 0x3ff0d83bUL, 0xe43f316aUL, 0xbc9bc14dUL, |
|
824 0x32d3d1a2UL, 0x3ff0e3ecUL, 0x27c57b52UL, 0x3c403a17UL, 0x5fdfa9c5UL, |
|
825 0x3ff0efa5UL, 0xbc54021bUL, 0xbc949db9UL, 0xaffed31bUL, 0x3ff0fb66UL, |
|
826 0xc44ebd7bUL, 0xbc6b9bedUL, 0x28d7233eUL, 0x3ff10730UL, 0x1692fdd5UL, |
|
827 0x3c8d46ebUL, 0xd0125b51UL, 0x3ff11301UL, 0x39449b3aUL, 0xbc96c510UL, |
|
828 0xab5e2ab6UL, 0x3ff11edbUL, 0xf703fb72UL, 0xbc9ca454UL, 0xc06c31ccUL, |
|
829 0x3ff12abdUL, 0xb36ca5c7UL, 0xbc51b514UL, 0x14f204abUL, 0x3ff136a8UL, |
|
830 0xba48dcf0UL, 0xbc67108fUL, 0xaea92de0UL, 0x3ff1429aUL, 0x9af1369eUL, |
|
831 0xbc932fbfUL, 0x934f312eUL, 0x3ff14e95UL, 0x39bf44abUL, 0xbc8b91e8UL, |
|
832 0xc8a58e51UL, 0x3ff15a98UL, 0xb9eeab0aUL, 0x3c82406aUL, 0x5471c3c2UL, |
|
833 0x3ff166a4UL, 0x82ea1a32UL, 0x3c58f23bUL, 0x3c7d517bUL, 0x3ff172b8UL, |
|
834 0xb9d78a76UL, 0xbc819041UL, 0x8695bbc0UL, 0x3ff17ed4UL, 0xe2ac5a64UL, |
|
835 0x3c709e3fUL, 0x388c8deaUL, 0x3ff18af9UL, 0xd1970f6cUL, 0xbc911023UL, |
|
836 0x58375d2fUL, 0x3ff19726UL, 0x85f17e08UL, 0x3c94aaddUL, 0xeb6fcb75UL, |
|
837 0x3ff1a35bUL, 0x7b4968e4UL, 0x3c8e5b4cUL, 0xf8138a1cUL, 0x3ff1af99UL, |
|
838 0xa4b69280UL, 0x3c97bf85UL, 0x84045cd4UL, 0x3ff1bbe0UL, 0x352ef607UL, |
|
839 0xbc995386UL, 0x95281c6bUL, 0x3ff1c82fUL, 0x8010f8c9UL, 0x3c900977UL, |
|
840 0x3168b9aaUL, 0x3ff1d487UL, 0x00a2643cUL, 0x3c9e016eUL, 0x5eb44027UL, |
|
841 0x3ff1e0e7UL, 0x088cb6deUL, 0xbc96fdd8UL, 0x22fcd91dUL, 0x3ff1ed50UL, |
|
842 0x027bb78cUL, 0xbc91df98UL, 0x8438ce4dUL, 0x3ff1f9c1UL, 0xa097af5cUL, |
|
843 0xbc9bf524UL, 0x88628cd6UL, 0x3ff2063bUL, 0x814a8495UL, 0x3c8dc775UL, |
|
844 0x3578a819UL, 0x3ff212beUL, 0x2cfcaac9UL, 0x3c93592dUL, 0x917ddc96UL, |
|
845 0x3ff21f49UL, 0x9494a5eeUL, 0x3c82a97eUL, 0xa27912d1UL, 0x3ff22bddUL, |
|
846 0x5577d69fUL, 0x3c8d34fbUL, 0x6e756238UL, 0x3ff2387aUL, 0xb6c70573UL, |
|
847 0x3c99b07eUL, 0xfb82140aUL, 0x3ff2451fUL, 0x911ca996UL, 0x3c8acfccUL, |
|
848 0x4fb2a63fUL, 0x3ff251ceUL, 0xbef4f4a4UL, 0x3c8ac155UL, 0x711ece75UL, |
|
849 0x3ff25e85UL, 0x4ac31b2cUL, 0x3c93e1a2UL, 0x65e27cddUL, 0x3ff26b45UL, |
|
850 0x9940e9d9UL, 0x3c82bd33UL, 0x341ddf29UL, 0x3ff2780eUL, 0x05f9e76cUL, |
|
851 0x3c9e067cUL, 0xe1f56381UL, 0x3ff284dfUL, 0x8c3f0d7eUL, 0xbc9a4c3aUL, |
|
852 0x7591bb70UL, 0x3ff291baUL, 0x28401cbdUL, 0xbc82cc72UL, 0xf51fdee1UL, |
|
853 0x3ff29e9dUL, 0xafad1255UL, 0x3c8612e8UL, 0x66d10f13UL, 0x3ff2ab8aUL, |
|
854 0x191690a7UL, 0xbc995743UL, 0xd0dad990UL, 0x3ff2b87fUL, 0xd6381aa4UL, |
|
855 0xbc410adcUL, 0x39771b2fUL, 0x3ff2c57eUL, 0xa6eb5124UL, 0xbc950145UL, |
|
856 0xa6e4030bUL, 0x3ff2d285UL, 0x54db41d5UL, 0x3c900247UL, 0x1f641589UL, |
|
857 0x3ff2df96UL, 0xfbbce198UL, 0x3c9d16cfUL, 0xa93e2f56UL, 0x3ff2ecafUL, |
|
858 0x45d52383UL, 0x3c71ca0fUL, 0x4abd886bUL, 0x3ff2f9d2UL, 0x532bda93UL, |
|
859 0xbc653c55UL, 0x0a31b715UL, 0x3ff306feUL, 0xd23182e4UL, 0x3c86f46aUL, |
|
860 0xedeeb2fdUL, 0x3ff31432UL, 0xf3f3fcd1UL, 0x3c8959a3UL, 0xfc4cd831UL, |
|
861 0x3ff32170UL, 0x8e18047cUL, 0x3c8a9ce7UL, 0x3ba8ea32UL, 0x3ff32eb8UL, |
|
862 0x3cb4f318UL, 0xbc9c45e8UL, 0xb26416ffUL, 0x3ff33c08UL, 0x843659a6UL, |
|
863 0x3c932721UL, 0x66e3fa2dUL, 0x3ff34962UL, 0x930881a4UL, 0xbc835a75UL, |
|
864 0x5f929ff1UL, 0x3ff356c5UL, 0x5c4e4628UL, 0xbc8b5ceeUL, 0xa2de883bUL, |
|
865 0x3ff36431UL, 0xa06cb85eUL, 0xbc8c3144UL, 0x373aa9cbUL, 0x3ff371a7UL, |
|
866 0xbf42eae2UL, 0xbc963aeaUL, 0x231e754aUL, 0x3ff37f26UL, 0x9eceb23cUL, |
|
867 0xbc99f5caUL, 0x6d05d866UL, 0x3ff38caeUL, 0x3c9904bdUL, 0xbc9e958dUL, |
|
868 0x1b7140efUL, 0x3ff39a40UL, 0xfc8e2934UL, 0xbc99a9a5UL, 0x34e59ff7UL, |
|
869 0x3ff3a7dbUL, 0xd661f5e3UL, 0xbc75e436UL, 0xbfec6cf4UL, 0x3ff3b57fUL, |
|
870 0xe26fff18UL, 0x3c954c66UL, 0xc313a8e5UL, 0x3ff3c32dUL, 0x375d29c3UL, |
|
871 0xbc9efff8UL, 0x44ede173UL, 0x3ff3d0e5UL, 0x8c284c71UL, 0x3c7fe8d0UL, |
|
872 0x4c123422UL, 0x3ff3dea6UL, 0x11f09ebcUL, 0x3c8ada09UL, 0xdf1c5175UL, |
|
873 0x3ff3ec70UL, 0x7b8c9bcaUL, 0xbc8af663UL, 0x04ac801cUL, 0x3ff3fa45UL, |
|
874 0xf956f9f3UL, 0xbc97d023UL, 0xc367a024UL, 0x3ff40822UL, 0xb6f4d048UL, |
|
875 0x3c8bddf8UL, 0x21f72e2aUL, 0x3ff4160aUL, 0x1c309278UL, 0xbc5ef369UL, |
|
876 0x2709468aUL, 0x3ff423fbUL, 0xc0b314ddUL, 0xbc98462dUL, 0xd950a897UL, |
|
877 0x3ff431f5UL, 0xe35f7999UL, 0xbc81c7ddUL, 0x3f84b9d4UL, 0x3ff43ffaUL, |
|
878 0x9704c003UL, 0x3c8880beUL, 0x6061892dUL, 0x3ff44e08UL, 0x04ef80d0UL, |
|
879 0x3c489b7aUL, 0x42a7d232UL, 0x3ff45c20UL, 0x82fb1f8eUL, 0xbc686419UL, |
|
880 0xed1d0057UL, 0x3ff46a41UL, 0xd1648a76UL, 0x3c9c944bUL, 0x668b3237UL, |
|
881 0x3ff4786dUL, 0xed445733UL, 0xbc9c20f0UL, 0xb5c13cd0UL, 0x3ff486a2UL, |
|
882 0xb69062f0UL, 0x3c73c1a3UL, 0xe192aed2UL, 0x3ff494e1UL, 0x5e499ea0UL, |
|
883 0xbc83b289UL, 0xf0d7d3deUL, 0x3ff4a32aUL, 0xf3d1be56UL, 0x3c99cb62UL, |
|
884 0xea6db7d7UL, 0x3ff4b17dUL, 0x7f2897f0UL, 0xbc8125b8UL, 0xd5362a27UL, |
|
885 0x3ff4bfdaUL, 0xafec42e2UL, 0x3c7d4397UL, 0xb817c114UL, 0x3ff4ce41UL, |
|
886 0x690abd5dUL, 0x3c905e29UL, 0x99fddd0dUL, 0x3ff4dcb2UL, 0xbc6a7833UL, |
|
887 0x3c98ecdbUL, 0x81d8abffUL, 0x3ff4eb2dUL, 0x2e5d7a52UL, 0xbc95257dUL, |
|
888 0x769d2ca7UL, 0x3ff4f9b2UL, 0xd25957e3UL, 0xbc94b309UL, 0x7f4531eeUL, |
|
889 0x3ff50841UL, 0x49b7465fUL, 0x3c7a249bUL, 0xa2cf6642UL, 0x3ff516daUL, |
|
890 0x69bd93efUL, 0xbc8f7685UL, 0xe83f4eefUL, 0x3ff5257dUL, 0x43efef71UL, |
|
891 0xbc7c998dUL, 0x569d4f82UL, 0x3ff5342bUL, 0x1db13cadUL, 0xbc807abeUL, |
|
892 0xf4f6ad27UL, 0x3ff542e2UL, 0x192d5f7eUL, 0x3c87926dUL, 0xca5d920fUL, |
|
893 0x3ff551a4UL, 0xefede59bUL, 0xbc8d689cUL, 0xdde910d2UL, 0x3ff56070UL, |
|
894 0x168eebf0UL, 0xbc90fb6eUL, 0x36b527daUL, 0x3ff56f47UL, 0x011d93adUL, |
|
895 0x3c99bb2cUL, 0xdbe2c4cfUL, 0x3ff57e27UL, 0x8a57b9c4UL, 0xbc90b98cUL, |
|
896 0xd497c7fdUL, 0x3ff58d12UL, 0x5b9a1de8UL, 0x3c8295e1UL, 0x27ff07ccUL, |
|
897 0x3ff59c08UL, 0xe467e60fUL, 0xbc97e2ceUL, 0xdd485429UL, 0x3ff5ab07UL, |
|
898 0x054647adUL, 0x3c96324cUL, 0xfba87a03UL, 0x3ff5ba11UL, 0x4c233e1aUL, |
|
899 0xbc9b77a1UL, 0x8a5946b7UL, 0x3ff5c926UL, 0x816986a2UL, 0x3c3c4b1bUL, |
|
900 0x90998b93UL, 0x3ff5d845UL, 0xa8b45643UL, 0xbc9cd6a7UL, 0x15ad2148UL, |
|
901 0x3ff5e76fUL, 0x3080e65eUL, 0x3c9ba6f9UL, 0x20dceb71UL, 0x3ff5f6a3UL, |
|
902 0xe3cdcf92UL, 0xbc89eaddUL, 0xb976dc09UL, 0x3ff605e1UL, 0x9b56de47UL, |
|
903 0xbc93e242UL, 0xe6cdf6f4UL, 0x3ff6152aUL, 0x4ab84c27UL, 0x3c9e4b3eUL, |
|
904 0xb03a5585UL, 0x3ff6247eUL, 0x7e40b497UL, 0xbc9383c1UL, 0x1d1929fdUL, |
|
905 0x3ff633ddUL, 0xbeb964e5UL, 0x3c984710UL, 0x34ccc320UL, 0x3ff64346UL, |
|
906 0x759d8933UL, 0xbc8c483cUL, 0xfebc8fb7UL, 0x3ff652b9UL, 0xc9a73e09UL, |
|
907 0xbc9ae3d5UL, 0x82552225UL, 0x3ff66238UL, 0x87591c34UL, 0xbc9bb609UL, |
|
908 0xc70833f6UL, 0x3ff671c1UL, 0x586c6134UL, 0xbc8e8732UL, 0xd44ca973UL, |
|
909 0x3ff68155UL, 0x44f73e65UL, 0x3c6038aeUL, 0xb19e9538UL, 0x3ff690f4UL, |
|
910 0x9aeb445dUL, 0x3c8804bdUL, 0x667f3bcdUL, 0x3ff6a09eUL, 0x13b26456UL, |
|
911 0xbc9bdd34UL, 0xfa75173eUL, 0x3ff6b052UL, 0x2c9a9d0eUL, 0x3c7a38f5UL, |
|
912 0x750bdabfUL, 0x3ff6c012UL, 0x67ff0b0dUL, 0xbc728956UL, 0xddd47645UL, |
|
913 0x3ff6cfdcUL, 0xb6f17309UL, 0x3c9c7aa9UL, 0x3c651a2fUL, 0x3ff6dfb2UL, |
|
914 0x683c88abUL, 0xbc6bbe3aUL, 0x98593ae5UL, 0x3ff6ef92UL, 0x9e1ac8b2UL, |
|
915 0xbc90b974UL, 0xf9519484UL, 0x3ff6ff7dUL, 0x25860ef6UL, 0xbc883c0fUL, |
|
916 0x66f42e87UL, 0x3ff70f74UL, 0xd45aa65fUL, 0x3c59d644UL, 0xe8ec5f74UL, |
|
917 0x3ff71f75UL, 0x86887a99UL, 0xbc816e47UL, 0x86ead08aUL, 0x3ff72f82UL, |
|
918 0x2cd62c72UL, 0xbc920aa0UL, 0x48a58174UL, 0x3ff73f9aUL, 0x6c65d53cUL, |
|
919 0xbc90a8d9UL, 0x35d7cbfdUL, 0x3ff74fbdUL, 0x618a6e1cUL, 0x3c9047fdUL, |
|
920 0x564267c9UL, 0x3ff75febUL, 0x57316dd3UL, 0xbc902459UL, 0xb1ab6e09UL, |
|
921 0x3ff77024UL, 0x169147f8UL, 0x3c9b7877UL, 0x4fde5d3fUL, 0x3ff78069UL, |
|
922 0x0a02162dUL, 0x3c9866b8UL, 0x38ac1cf6UL, 0x3ff790b9UL, 0x62aadd3eUL, |
|
923 0x3c9349a8UL, 0x73eb0187UL, 0x3ff7a114UL, 0xee04992fUL, 0xbc841577UL, |
|
924 0x0976cfdbUL, 0x3ff7b17bUL, 0x8468dc88UL, 0xbc9bebb5UL, 0x0130c132UL, |
|
925 0x3ff7c1edUL, 0xd1164dd6UL, 0x3c9f124cUL, 0x62ff86f0UL, 0x3ff7d26aUL, |
|
926 0xfb72b8b4UL, 0x3c91bddbUL, 0x36cf4e62UL, 0x3ff7e2f3UL, 0xba15797eUL, |
|
927 0x3c705d02UL, 0x8491c491UL, 0x3ff7f387UL, 0xcf9311aeUL, 0xbc807f11UL, |
|
928 0x543e1a12UL, 0x3ff80427UL, 0x626d972bUL, 0xbc927c86UL, 0xadd106d9UL, |
|
929 0x3ff814d2UL, 0x0d151d4dUL, 0x3c946437UL, 0x994cce13UL, 0x3ff82589UL, |
|
930 0xd41532d8UL, 0xbc9d4c1dUL, 0x1eb941f7UL, 0x3ff8364cUL, 0x31df2bd5UL, |
|
931 0x3c999b9aUL, 0x4623c7adUL, 0x3ff8471aUL, 0xa341cdfbUL, 0xbc88d684UL, |
|
932 0x179f5b21UL, 0x3ff857f4UL, 0xf8b216d0UL, 0xbc5ba748UL, 0x9b4492edUL, |
|
933 0x3ff868d9UL, 0x9bd4f6baUL, 0xbc9fc6f8UL, 0xd931a436UL, 0x3ff879caUL, |
|
934 0xd2db47bdUL, 0x3c85d2d7UL, 0xd98a6699UL, 0x3ff88ac7UL, 0xf37cb53aUL, |
|
935 0x3c9994c2UL, 0xa478580fUL, 0x3ff89bd0UL, 0x4475202aUL, 0x3c9d5395UL, |
|
936 0x422aa0dbUL, 0x3ff8ace5UL, 0x56864b27UL, 0x3c96e9f1UL, 0xbad61778UL, |
|
937 0x3ff8be05UL, 0xfc43446eUL, 0x3c9ecb5eUL, 0x16b5448cUL, 0x3ff8cf32UL, |
|
938 0x32e9e3aaUL, 0xbc70d55eUL, 0x5e0866d9UL, 0x3ff8e06aUL, 0x6fc9b2e6UL, |
|
939 0xbc97114aUL, 0x99157736UL, 0x3ff8f1aeUL, 0xa2e3976cUL, 0x3c85cc13UL, |
|
940 0xd0282c8aUL, 0x3ff902feUL, 0x85fe3fd2UL, 0x3c9592caUL, 0x0b91ffc6UL, |
|
941 0x3ff9145bUL, 0x2e582524UL, 0xbc9dd679UL, 0x53aa2fe2UL, 0x3ff925c3UL, |
|
942 0xa639db7fUL, 0xbc83455fUL, 0xb0cdc5e5UL, 0x3ff93737UL, 0x81b57ebcUL, |
|
943 0xbc675fc7UL, 0x2b5f98e5UL, 0x3ff948b8UL, 0x797d2d99UL, 0xbc8dc3d6UL, |
|
944 0xcbc8520fUL, 0x3ff95a44UL, 0x96a5f039UL, 0xbc764b7cUL, 0x9a7670b3UL, |
|
945 0x3ff96bddUL, 0x7f19c896UL, 0xbc5ba596UL, 0x9fde4e50UL, 0x3ff97d82UL, |
|
946 0x7c1b85d1UL, 0xbc9d185bUL, 0xe47a22a2UL, 0x3ff98f33UL, 0xa24c78ecUL, |
|
947 0x3c7cabdaUL, 0x70ca07baUL, 0x3ff9a0f1UL, 0x91cee632UL, 0xbc9173bdUL, |
|
948 0x4d53fe0dUL, 0x3ff9b2bbUL, 0x4df6d518UL, 0xbc9dd84eUL, 0x82a3f090UL, |
|
949 0x3ff9c491UL, 0xb071f2beUL, 0x3c7c7c46UL, 0x194bb8d5UL, 0x3ff9d674UL, |
|
950 0xa3dd8233UL, 0xbc9516beUL, 0x19e32323UL, 0x3ff9e863UL, 0x78e64c6eUL, |
|
951 0x3c7824caUL, 0x8d07f29eUL, 0x3ff9fa5eUL, 0xaaf1faceUL, 0xbc84a9ceUL, |
|
952 0x7b5de565UL, 0x3ffa0c66UL, 0x5d1cd533UL, 0xbc935949UL, 0xed8eb8bbUL, |
|
953 0x3ffa1e7aUL, 0xee8be70eUL, 0x3c9c6618UL, 0xec4a2d33UL, 0x3ffa309bUL, |
|
954 0x7ddc36abUL, 0x3c96305cUL, 0x80460ad8UL, 0x3ffa42c9UL, 0x589fb120UL, |
|
955 0xbc9aa780UL, 0xb23e255dUL, 0x3ffa5503UL, 0xdb8d41e1UL, 0xbc9d2f6eUL, |
|
956 0x8af46052UL, 0x3ffa674aUL, 0x30670366UL, 0x3c650f56UL, 0x1330b358UL, |
|
957 0x3ffa799eUL, 0xcac563c7UL, 0x3c9bcb7eUL, 0x53c12e59UL, 0x3ffa8bfeUL, |
|
958 0xb2ba15a9UL, 0xbc94f867UL, 0x5579fdbfUL, 0x3ffa9e6bUL, 0x0ef7fd31UL, |
|
959 0x3c90fac9UL, 0x21356ebaUL, 0x3ffab0e5UL, 0xdae94545UL, 0x3c889c31UL, |
|
960 0xbfd3f37aUL, 0x3ffac36bUL, 0xcae76cd0UL, 0xbc8f9234UL, 0x3a3c2774UL, |
|
961 0x3ffad5ffUL, 0xb6b1b8e5UL, 0x3c97ef3bUL, 0x995ad3adUL, 0x3ffae89fUL, |
|
962 0x345dcc81UL, 0x3c97a1cdUL, 0xe622f2ffUL, 0x3ffafb4cUL, 0x0f315ecdUL, |
|
963 0xbc94b2fcUL, 0x298db666UL, 0x3ffb0e07UL, 0x4c80e425UL, 0xbc9bdef5UL, |
|
964 0x6c9a8952UL, 0x3ffb20ceUL, 0x4a0756ccUL, 0x3c94dd02UL, 0xb84f15fbUL, |
|
965 0x3ffb33a2UL, 0x3084d708UL, 0xbc62805eUL, 0x15b749b1UL, 0x3ffb4684UL, |
|
966 0xe9df7c90UL, 0xbc7f763dUL, 0x8de5593aUL, 0x3ffb5972UL, 0xbbba6de3UL, |
|
967 0xbc9c71dfUL, 0x29f1c52aUL, 0x3ffb6c6eUL, 0x52883f6eUL, 0x3c92a8f3UL, |
|
968 0xf2fb5e47UL, 0x3ffb7f76UL, 0x7e54ac3bUL, 0xbc75584fUL, 0xf22749e4UL, |
|
969 0x3ffb928cUL, 0x54cb65c6UL, 0xbc9b7216UL, 0x30a1064aUL, 0x3ffba5b0UL, |
|
970 0x0e54292eUL, 0xbc9efcd3UL, 0xb79a6f1fUL, 0x3ffbb8e0UL, 0xc9696205UL, |
|
971 0xbc3f52d1UL, 0x904bc1d2UL, 0x3ffbcc1eUL, 0x7a2d9e84UL, 0x3c823dd0UL, |
|
972 0xc3f3a207UL, 0x3ffbdf69UL, 0x60ea5b53UL, 0xbc3c2623UL, 0x5bd71e09UL, |
|
973 0x3ffbf2c2UL, 0x3f6b9c73UL, 0xbc9efdcaUL, 0x6141b33dUL, 0x3ffc0628UL, |
|
974 0xa1fbca34UL, 0xbc8d8a5aUL, 0xdd85529cUL, 0x3ffc199bUL, 0x895048ddUL, |
|
975 0x3c811065UL, 0xd9fa652cUL, 0x3ffc2d1cUL, 0x17c8a5d7UL, 0xbc96e516UL, |
|
976 0x5fffd07aUL, 0x3ffc40abUL, 0xe083c60aUL, 0x3c9b4537UL, 0x78fafb22UL, |
|
977 0x3ffc5447UL, 0x2493b5afUL, 0x3c912f07UL, 0x2e57d14bUL, 0x3ffc67f1UL, |
|
978 0xff483cadUL, 0x3c92884dUL, 0x8988c933UL, 0x3ffc7ba8UL, 0xbe255559UL, |
|
979 0xbc8e76bbUL, 0x9406e7b5UL, 0x3ffc8f6dUL, 0x48805c44UL, 0x3c71acbcUL, |
|
980 0x5751c4dbUL, 0x3ffca340UL, 0xd10d08f5UL, 0xbc87f2beUL, 0xdcef9069UL, |
|
981 0x3ffcb720UL, 0xd1e949dbUL, 0x3c7503cbUL, 0x2e6d1675UL, 0x3ffccb0fUL, |
|
982 0x86009092UL, 0xbc7d220fUL, 0x555dc3faUL, 0x3ffcdf0bUL, 0x53829d72UL, |
|
983 0xbc8dd83bUL, 0x5b5bab74UL, 0x3ffcf315UL, 0xb86dff57UL, 0xbc9a08e9UL, |
|
984 0x4a07897cUL, 0x3ffd072dUL, 0x43797a9cUL, 0xbc9cbc37UL, 0x2b08c968UL, |
|
985 0x3ffd1b53UL, 0x219a36eeUL, 0x3c955636UL, 0x080d89f2UL, 0x3ffd2f87UL, |
|
986 0x719d8578UL, 0xbc9d487bUL, 0xeacaa1d6UL, 0x3ffd43c8UL, 0xbf5a1614UL, |
|
987 0x3c93db53UL, 0xdcfba487UL, 0x3ffd5818UL, 0xd75b3707UL, 0x3c82ed02UL, |
|
988 0xe862e6d3UL, 0x3ffd6c76UL, 0x4a8165a0UL, 0x3c5fe87aUL, 0x16c98398UL, |
|
989 0x3ffd80e3UL, 0x8beddfe8UL, 0xbc911ec1UL, 0x71ff6075UL, 0x3ffd955dUL, |
|
990 0xbb9af6beUL, 0x3c9a052dUL, 0x03db3285UL, 0x3ffda9e6UL, 0x696db532UL, |
|
991 0x3c9c2300UL, 0xd63a8315UL, 0x3ffdbe7cUL, 0x926b8be4UL, 0xbc9b76f1UL, |
|
992 0xf301b460UL, 0x3ffdd321UL, 0x78f018c3UL, 0x3c92da57UL, 0x641c0658UL, |
|
993 0x3ffde7d5UL, 0x8e79ba8fUL, 0xbc9ca552UL, 0x337b9b5fUL, 0x3ffdfc97UL, |
|
994 0x4f184b5cUL, 0xbc91a5cdUL, 0x6b197d17UL, 0x3ffe1167UL, 0xbd5c7f44UL, |
|
995 0xbc72b529UL, 0x14f5a129UL, 0x3ffe2646UL, 0x817a1496UL, 0xbc97b627UL, |
|
996 0x3b16ee12UL, 0x3ffe3b33UL, 0x31fdc68bUL, 0xbc99f4a4UL, 0xe78b3ff6UL, |
|
997 0x3ffe502eUL, 0x80a9cc8fUL, 0x3c839e89UL, 0x24676d76UL, 0x3ffe6539UL, |
|
998 0x7522b735UL, 0xbc863ff8UL, 0xfbc74c83UL, 0x3ffe7a51UL, 0xca0c8de2UL, |
|
999 0x3c92d522UL, 0x77cdb740UL, 0x3ffe8f79UL, 0x80b054b1UL, 0xbc910894UL, |
|
1000 0xa2a490daUL, 0x3ffea4afUL, 0x179c2893UL, 0xbc9e9c23UL, 0x867cca6eUL, |
|
1001 0x3ffeb9f4UL, 0x2293e4f2UL, 0x3c94832fUL, 0x2d8e67f1UL, 0x3ffecf48UL, |
|
1002 0xb411ad8cUL, 0xbc9c93f3UL, 0xa2188510UL, 0x3ffee4aaUL, 0xa487568dUL, |
|
1003 0x3c91c68dUL, 0xee615a27UL, 0x3ffefa1bUL, 0x86a4b6b0UL, 0x3c9dc7f4UL, |
|
1004 0x1cb6412aUL, 0x3fff0f9cUL, 0x65181d45UL, 0xbc932200UL, 0x376bba97UL, |
|
1005 0x3fff252bUL, 0xbf0d8e43UL, 0x3c93a1a5UL, 0x48dd7274UL, 0x3fff3ac9UL, |
|
1006 0x3ed837deUL, 0xbc795a5aUL, 0x5b6e4540UL, 0x3fff5076UL, 0x2dd8a18bUL, |
|
1007 0x3c99d3e1UL, 0x798844f8UL, 0x3fff6632UL, 0x3539343eUL, 0x3c9fa37bUL, |
|
1008 0xad9cbe14UL, 0x3fff7bfdUL, 0xd006350aUL, 0xbc9dbb12UL, 0x02243c89UL, |
|
1009 0x3fff91d8UL, 0xa779f689UL, 0xbc612ea8UL, 0x819e90d8UL, 0x3fffa7c1UL, |
|
1010 0xf3a5931eUL, 0x3c874853UL, 0x3692d514UL, 0x3fffbdbaUL, 0x15098eb6UL, |
|
1011 0xbc796773UL, 0x2b8f71f1UL, 0x3fffd3c2UL, 0x966579e7UL, 0x3c62eb74UL, |
|
1012 0x6b2a23d9UL, 0x3fffe9d9UL, 0x7442fde3UL, 0x3c74a603UL |
|
1013 }; |
|
1014 |
|
1015 ALIGNED_(16) juint _e_coeff[] = |
|
1016 { |
|
1017 0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL, 0x6fba4e77UL, |
|
1018 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL, 0xfefa39efUL, 0x3fe62e42UL, |
|
1019 0x00000000UL, 0x00000000UL |
|
1020 }; |
|
1021 |
|
1022 ALIGNED_(16) juint _coeff_h[] = |
|
1023 { |
|
1024 0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL |
|
1025 }; |
|
1026 |
|
1027 ALIGNED_(16) juint _HIGHMASK_LOG_X[] = |
|
1028 { |
|
1029 0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL |
|
1030 }; |
|
1031 |
|
1032 ALIGNED_(8) juint _HALFMASK[] = |
|
1033 { |
|
1034 0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL |
|
1035 }; |
|
1036 |
|
1037 ALIGNED_(16) juint _coeff_pow[] = |
|
1038 { |
|
1039 0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL, 0x9f95985aUL, |
|
1040 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL, 0x518775e3UL, 0x3f9004f2UL, |
|
1041 0xac8349bbUL, 0x3fa76c9bUL, 0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, |
|
1042 0xbf5dabe1UL, 0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL, |
|
1043 0x486ececbUL, 0x3fc4635eUL, 0x412055ccUL, 0xbdd61bb2UL |
|
1044 }; |
|
1045 |
|
1046 ALIGNED_(16) juint _L_tbl_pow[] = |
|
1047 { |
|
1048 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x20000000UL, |
|
1049 0x3feff00aUL, 0x96621f95UL, 0x3e5b1856UL, 0xe0000000UL, 0x3fefe019UL, |
|
1050 0xe5916f9eUL, 0xbe325278UL, 0x00000000UL, 0x3fefd02fUL, 0x859a1062UL, |
|
1051 0x3e595fb7UL, 0xc0000000UL, 0x3fefc049UL, 0xb245f18fUL, 0xbe529c38UL, |
|
1052 0xe0000000UL, 0x3fefb069UL, 0xad2880a7UL, 0xbe501230UL, 0x60000000UL, |
|
1053 0x3fefa08fUL, 0xc8e72420UL, 0x3e597bd1UL, 0x80000000UL, 0x3fef90baUL, |
|
1054 0xc30c4500UL, 0xbe5d6c75UL, 0xe0000000UL, 0x3fef80eaUL, 0x02c63f43UL, |
|
1055 0x3e2e1318UL, 0xc0000000UL, 0x3fef7120UL, 0xb3d4ccccUL, 0xbe44c52aUL, |
|
1056 0x00000000UL, 0x3fef615cUL, 0xdbd91397UL, 0xbe4e7d6cUL, 0xa0000000UL, |
|
1057 0x3fef519cUL, 0x65c5cd68UL, 0xbe522dc8UL, 0xa0000000UL, 0x3fef41e2UL, |
|
1058 0x46d1306cUL, 0xbe5a840eUL, 0xe0000000UL, 0x3fef322dUL, 0xd2980e94UL, |
|
1059 0x3e5071afUL, 0xa0000000UL, 0x3fef227eUL, 0x773abadeUL, 0xbe5891e5UL, |
|
1060 0xa0000000UL, 0x3fef12d4UL, 0xdc6bf46bUL, 0xbe5cccbeUL, 0xe0000000UL, |
|
1061 0x3fef032fUL, 0xbc7247faUL, 0xbe2bab83UL, 0x80000000UL, 0x3feef390UL, |
|
1062 0xbcaa1e46UL, 0xbe53bb3bUL, 0x60000000UL, 0x3feee3f6UL, 0x5f6c682dUL, |
|
1063 0xbe54c619UL, 0x80000000UL, 0x3feed461UL, 0x5141e368UL, 0xbe4b6d86UL, |
|
1064 0xe0000000UL, 0x3feec4d1UL, 0xec678f76UL, 0xbe369af6UL, 0x80000000UL, |
|
1065 0x3feeb547UL, 0x41301f55UL, 0xbe2d4312UL, 0x60000000UL, 0x3feea5c2UL, |
|
1066 0x676da6bdUL, 0xbe4d8dd0UL, 0x60000000UL, 0x3fee9642UL, 0x57a891c4UL, |
|
1067 0x3e51f991UL, 0xa0000000UL, 0x3fee86c7UL, 0xe4eb491eUL, 0x3e579bf9UL, |
|
1068 0x20000000UL, 0x3fee7752UL, 0xfddc4a2cUL, 0xbe3356e6UL, 0xc0000000UL, |
|
1069 0x3fee67e1UL, 0xd75b5bf1UL, 0xbe449531UL, 0x80000000UL, 0x3fee5876UL, |
|
1070 0xbd423b8eUL, 0x3df54fe4UL, 0x60000000UL, 0x3fee4910UL, 0x330e51b9UL, |
|
1071 0x3e54289cUL, 0x80000000UL, 0x3fee39afUL, 0x8651a95fUL, 0xbe55aad6UL, |
|
1072 0xa0000000UL, 0x3fee2a53UL, 0x5e98c708UL, 0xbe2fc4a9UL, 0xe0000000UL, |
|
1073 0x3fee1afcUL, 0x0989328dUL, 0x3e23958cUL, 0x40000000UL, 0x3fee0babUL, |
|
1074 0xee642abdUL, 0xbe425dd8UL, 0xa0000000UL, 0x3fedfc5eUL, 0xc394d236UL, |
|
1075 0x3e526362UL, 0x20000000UL, 0x3feded17UL, 0xe104aa8eUL, 0x3e4ce247UL, |
|
1076 0xc0000000UL, 0x3fedddd4UL, 0x265a9be4UL, 0xbe5bb77aUL, 0x40000000UL, |
|
1077 0x3fedce97UL, 0x0ecac52fUL, 0x3e4a7cb1UL, 0xe0000000UL, 0x3fedbf5eUL, |
|
1078 0x124cb3b8UL, 0x3e257024UL, 0x80000000UL, 0x3fedb02bUL, 0xe6d4febeUL, |
|
1079 0xbe2033eeUL, 0x20000000UL, 0x3feda0fdUL, 0x39cca00eUL, 0xbe3ddabcUL, |
|
1080 0xc0000000UL, 0x3fed91d3UL, 0xef8a552aUL, 0xbe543390UL, 0x40000000UL, |
|
1081 0x3fed82afUL, 0xb8e85204UL, 0x3e513850UL, 0xe0000000UL, 0x3fed738fUL, |
|
1082 0x3d59fe08UL, 0xbe5db728UL, 0x40000000UL, 0x3fed6475UL, 0x3aa7ead1UL, |
|
1083 0x3e58804bUL, 0xc0000000UL, 0x3fed555fUL, 0xf8a35ba9UL, 0xbe5298b0UL, |
|
1084 0x00000000UL, 0x3fed464fUL, 0x9a88dd15UL, 0x3e5a8cdbUL, 0x40000000UL, |
|
1085 0x3fed3743UL, 0xb0b0a190UL, 0x3e598635UL, 0x80000000UL, 0x3fed283cUL, |
|
1086 0xe2113295UL, 0xbe5c1119UL, 0x80000000UL, 0x3fed193aUL, 0xafbf1728UL, |
|
1087 0xbe492e9cUL, 0x60000000UL, 0x3fed0a3dUL, 0xe4a4ccf3UL, 0x3e19b90eUL, |
|
1088 0x20000000UL, 0x3fecfb45UL, 0xba3cbeb8UL, 0x3e406b50UL, 0xc0000000UL, |
|
1089 0x3fecec51UL, 0x110f7dddUL, 0x3e0d6806UL, 0x40000000UL, 0x3fecdd63UL, |
|
1090 0x7dd7d508UL, 0xbe5a8943UL, 0x80000000UL, 0x3fecce79UL, 0x9b60f271UL, |
|
1091 0xbe50676aUL, 0x80000000UL, 0x3fecbf94UL, 0x0b9ad660UL, 0x3e59174fUL, |
|
1092 0x60000000UL, 0x3fecb0b4UL, 0x00823d9cUL, 0x3e5bbf72UL, 0x20000000UL, |
|
1093 0x3feca1d9UL, 0x38a6ec89UL, 0xbe4d38f9UL, 0x80000000UL, 0x3fec9302UL, |
|
1094 0x3a0b7d8eUL, 0x3e53dbfdUL, 0xc0000000UL, 0x3fec8430UL, 0xc6826b34UL, |
|
1095 0xbe27c5c9UL, 0xc0000000UL, 0x3fec7563UL, 0x0c706381UL, 0xbe593653UL, |
|
1096 0x60000000UL, 0x3fec669bUL, 0x7df34ec7UL, 0x3e461ab5UL, 0xe0000000UL, |
|
1097 0x3fec57d7UL, 0x40e5e7e8UL, 0xbe5c3daeUL, 0x00000000UL, 0x3fec4919UL, |
|
1098 0x5602770fUL, 0xbe55219dUL, 0xc0000000UL, 0x3fec3a5eUL, 0xec7911ebUL, |
|
1099 0x3e5a5d25UL, 0x60000000UL, 0x3fec2ba9UL, 0xb39ea225UL, 0xbe53c00bUL, |
|
1100 0x80000000UL, 0x3fec1cf8UL, 0x967a212eUL, 0x3e5a8ddfUL, 0x60000000UL, |
|
1101 0x3fec0e4cUL, 0x580798bdUL, 0x3e5f53abUL, 0x00000000UL, 0x3febffa5UL, |
|
1102 0xb8282df6UL, 0xbe46b874UL, 0x20000000UL, 0x3febf102UL, 0xe33a6729UL, |
|
1103 0x3e54963fUL, 0x00000000UL, 0x3febe264UL, 0x3b53e88aUL, 0xbe3adce1UL, |
|
1104 0x60000000UL, 0x3febd3caUL, 0xc2585084UL, 0x3e5cde9fUL, 0x80000000UL, |
|
1105 0x3febc535UL, 0xa335c5eeUL, 0xbe39fd9cUL, 0x20000000UL, 0x3febb6a5UL, |
|
1106 0x7325b04dUL, 0x3e42ba15UL, 0x60000000UL, 0x3feba819UL, 0x1564540fUL, |
|
1107 0x3e3a9f35UL, 0x40000000UL, 0x3feb9992UL, 0x83fff592UL, 0xbe5465ceUL, |
|
1108 0xa0000000UL, 0x3feb8b0fUL, 0xb9da63d3UL, 0xbe4b1a0aUL, 0x80000000UL, |
|
1109 0x3feb7c91UL, 0x6d6f1ea4UL, 0x3e557657UL, 0x00000000UL, 0x3feb6e18UL, |
|
1110 0x5e80a1bfUL, 0x3e4ddbb6UL, 0x00000000UL, 0x3feb5fa3UL, 0x1c9eacb5UL, |
|
1111 0x3e592877UL, 0xa0000000UL, 0x3feb5132UL, 0x6d40beb3UL, 0xbe51858cUL, |
|
1112 0xa0000000UL, 0x3feb42c6UL, 0xd740c67bUL, 0x3e427ad2UL, 0x40000000UL, |
|
1113 0x3feb345fUL, 0xa3e0cceeUL, 0xbe5c2fc4UL, 0x40000000UL, 0x3feb25fcUL, |
|
1114 0x8e752b50UL, 0xbe3da3c2UL, 0xc0000000UL, 0x3feb179dUL, 0xa892e7deUL, |
|
1115 0x3e1fb481UL, 0xc0000000UL, 0x3feb0943UL, 0x21ed71e9UL, 0xbe365206UL, |
|
1116 0x20000000UL, 0x3feafaeeUL, 0x0e1380a3UL, 0x3e5c5b7bUL, 0x20000000UL, |
|
1117 0x3feaec9dUL, 0x3c3d640eUL, 0xbe5dbbd0UL, 0x60000000UL, 0x3feade50UL, |
|
1118 0x8f97a715UL, 0x3e3a8ec5UL, 0x20000000UL, 0x3fead008UL, 0x23ab2839UL, |
|
1119 0x3e2fe98aUL, 0x40000000UL, 0x3feac1c4UL, 0xf4bbd50fUL, 0x3e54d8f6UL, |
|
1120 0xe0000000UL, 0x3feab384UL, 0x14757c4dUL, 0xbe48774cUL, 0xc0000000UL, |
|
1121 0x3feaa549UL, 0x7c7b0eeaUL, 0x3e5b51bbUL, 0x20000000UL, 0x3fea9713UL, |
|
1122 0xf56f7013UL, 0x3e386200UL, 0xe0000000UL, 0x3fea88e0UL, 0xbe428ebeUL, |
|
1123 0xbe514af5UL, 0xe0000000UL, 0x3fea7ab2UL, 0x8d0e4496UL, 0x3e4f9165UL, |
|
1124 0x60000000UL, 0x3fea6c89UL, 0xdbacc5d5UL, 0xbe5c063bUL, 0x20000000UL, |
|
1125 0x3fea5e64UL, 0x3f19d970UL, 0xbe5a0c8cUL, 0x20000000UL, 0x3fea5043UL, |
|
1126 0x09ea3e6bUL, 0x3e5065dcUL, 0x80000000UL, 0x3fea4226UL, 0x78df246cUL, |
|
1127 0x3e5e05f6UL, 0x40000000UL, 0x3fea340eUL, 0x4057d4a0UL, 0x3e431b2bUL, |
|
1128 0x40000000UL, 0x3fea25faUL, 0x82867bb5UL, 0x3e4b76beUL, 0xa0000000UL, |
|
1129 0x3fea17eaUL, 0x9436f40aUL, 0xbe5aad39UL, 0x20000000UL, 0x3fea09dfUL, |
|
1130 0x4b5253b3UL, 0x3e46380bUL, 0x00000000UL, 0x3fe9fbd8UL, 0x8fc52466UL, |
|
1131 0xbe386f9bUL, 0x20000000UL, 0x3fe9edd5UL, 0x22d3f344UL, 0xbe538347UL, |
|
1132 0x60000000UL, 0x3fe9dfd6UL, 0x1ac33522UL, 0x3e5dbc53UL, 0x00000000UL, |
|
1133 0x3fe9d1dcUL, 0xeabdff1dUL, 0x3e40fc0cUL, 0xe0000000UL, 0x3fe9c3e5UL, |
|
1134 0xafd30e73UL, 0xbe585e63UL, 0xe0000000UL, 0x3fe9b5f3UL, 0xa52f226aUL, |
|
1135 0xbe43e8f9UL, 0x20000000UL, 0x3fe9a806UL, 0xecb8698dUL, 0xbe515b36UL, |
|
1136 0x80000000UL, 0x3fe99a1cUL, 0xf2b4e89dUL, 0x3e48b62bUL, 0x20000000UL, |
|
1137 0x3fe98c37UL, 0x7c9a88fbUL, 0x3e44414cUL, 0x00000000UL, 0x3fe97e56UL, |
|
1138 0xda015741UL, 0xbe5d13baUL, 0xe0000000UL, 0x3fe97078UL, 0x5fdace06UL, |
|
1139 0x3e51b947UL, 0x00000000UL, 0x3fe962a0UL, 0x956ca094UL, 0x3e518785UL, |
|
1140 0x40000000UL, 0x3fe954cbUL, 0x01164c1dUL, 0x3e5d5b57UL, 0xc0000000UL, |
|
1141 0x3fe946faUL, 0xe63b3767UL, 0xbe4f84e7UL, 0x40000000UL, 0x3fe9392eUL, |
|
1142 0xe57cc2a9UL, 0x3e34eda3UL, 0xe0000000UL, 0x3fe92b65UL, 0x8c75b544UL, |
|
1143 0x3e5766a0UL, 0xc0000000UL, 0x3fe91da1UL, 0x37d1d087UL, 0xbe5e2ab1UL, |
|
1144 0x80000000UL, 0x3fe90fe1UL, 0xa953dc20UL, 0x3e5fa1f3UL, 0x80000000UL, |
|
1145 0x3fe90225UL, 0xdbd3f369UL, 0x3e47d6dbUL, 0xa0000000UL, 0x3fe8f46dUL, |
|
1146 0x1c9be989UL, 0xbe5e2b0aUL, 0xa0000000UL, 0x3fe8e6b9UL, 0x3c93d76aUL, |
|
1147 0x3e5c8618UL, 0xe0000000UL, 0x3fe8d909UL, 0x2182fc9aUL, 0xbe41aa9eUL, |
|
1148 0x20000000UL, 0x3fe8cb5eUL, 0xe6b3539dUL, 0xbe530d19UL, 0x60000000UL, |
|
1149 0x3fe8bdb6UL, 0x49e58cc3UL, 0xbe3bb374UL, 0xa0000000UL, 0x3fe8b012UL, |
|
1150 0xa7cfeb8fUL, 0x3e56c412UL, 0x00000000UL, 0x3fe8a273UL, 0x8d52bc19UL, |
|
1151 0x3e1429b8UL, 0x60000000UL, 0x3fe894d7UL, 0x4dc32c6cUL, 0xbe48604cUL, |
|
1152 0xc0000000UL, 0x3fe8873fUL, 0x0c868e56UL, 0xbe564ee5UL, 0x00000000UL, |
|
1153 0x3fe879acUL, 0x56aee828UL, 0x3e5e2fd8UL, 0x60000000UL, 0x3fe86c1cUL, |
|
1154 0x7ceab8ecUL, 0x3e493365UL, 0xc0000000UL, 0x3fe85e90UL, 0x78d4dadcUL, |
|
1155 0xbe4f7f25UL, 0x00000000UL, 0x3fe85109UL, 0x0ccd8280UL, 0x3e31e7a2UL, |
|
1156 0x40000000UL, 0x3fe84385UL, 0x34ba4e15UL, 0x3e328077UL, 0x80000000UL, |
|
1157 0x3fe83605UL, 0xa670975aUL, 0xbe53eee5UL, 0xa0000000UL, 0x3fe82889UL, |
|
1158 0xf61b77b2UL, 0xbe43a20aUL, 0xa0000000UL, 0x3fe81b11UL, 0x13e6643bUL, |
|
1159 0x3e5e5fe5UL, 0xc0000000UL, 0x3fe80d9dUL, 0x82cc94e8UL, 0xbe5ff1f9UL, |
|
1160 0xa0000000UL, 0x3fe8002dUL, 0x8a0c9c5dUL, 0xbe42b0e7UL, 0x60000000UL, |
|
1161 0x3fe7f2c1UL, 0x22a16f01UL, 0x3e5d9ea0UL, 0x20000000UL, 0x3fe7e559UL, |
|
1162 0xc38cd451UL, 0x3e506963UL, 0xc0000000UL, 0x3fe7d7f4UL, 0x9902bc71UL, |
|
1163 0x3e4503d7UL, 0x40000000UL, 0x3fe7ca94UL, 0xdef2a3c0UL, 0x3e3d98edUL, |
|
1164 0xa0000000UL, 0x3fe7bd37UL, 0xed49abb0UL, 0x3e24c1ffUL, 0xe0000000UL, |
|
1165 0x3fe7afdeUL, 0xe3b0be70UL, 0xbe40c467UL, 0x00000000UL, 0x3fe7a28aUL, |
|
1166 0xaf9f193cUL, 0xbe5dff6cUL, 0xe0000000UL, 0x3fe79538UL, 0xb74cf6b6UL, |
|
1167 0xbe258ed0UL, 0xa0000000UL, 0x3fe787ebUL, 0x1d9127c7UL, 0x3e345fb0UL, |
|
1168 0x40000000UL, 0x3fe77aa2UL, 0x1028c21dUL, 0xbe4619bdUL, 0xa0000000UL, |
|
1169 0x3fe76d5cUL, 0x7cb0b5e4UL, 0x3e40f1a2UL, 0xe0000000UL, 0x3fe7601aUL, |
|
1170 0x2b1bc4adUL, 0xbe32e8bbUL, 0xe0000000UL, 0x3fe752dcUL, 0x6839f64eUL, |
|
1171 0x3e41f57bUL, 0xc0000000UL, 0x3fe745a2UL, 0xc4121f7eUL, 0xbe52c40aUL, |
|
1172 0x60000000UL, 0x3fe7386cUL, 0xd6852d72UL, 0xbe5c4e6bUL, 0xc0000000UL, |
|
1173 0x3fe72b39UL, 0x91d690f7UL, 0xbe57f88fUL, 0xe0000000UL, 0x3fe71e0aUL, |
|
1174 0x627a2159UL, 0xbe4425d5UL, 0xc0000000UL, 0x3fe710dfUL, 0x50a54033UL, |
|
1175 0x3e422b7eUL, 0x60000000UL, 0x3fe703b8UL, 0x3b0b5f91UL, 0x3e5d3857UL, |
|
1176 0xe0000000UL, 0x3fe6f694UL, 0x84d628a2UL, 0xbe51f090UL, 0x00000000UL, |
|
1177 0x3fe6e975UL, 0x306d8894UL, 0xbe414d83UL, 0xe0000000UL, 0x3fe6dc58UL, |
|
1178 0x30bf24aaUL, 0xbe4650caUL, 0x80000000UL, 0x3fe6cf40UL, 0xd4628d69UL, |
|
1179 0xbe5db007UL, 0xc0000000UL, 0x3fe6c22bUL, 0xa2aae57bUL, 0xbe31d279UL, |
|
1180 0xc0000000UL, 0x3fe6b51aUL, 0x860edf7eUL, 0xbe2d4c4aUL, 0x80000000UL, |
|
1181 0x3fe6a80dUL, 0xf3559341UL, 0xbe5f7e98UL, 0xe0000000UL, 0x3fe69b03UL, |
|
1182 0xa885899eUL, 0xbe5c2011UL, 0xe0000000UL, 0x3fe68dfdUL, 0x2bdc6d37UL, |
|
1183 0x3e224a82UL, 0xa0000000UL, 0x3fe680fbUL, 0xc12ad1b9UL, 0xbe40cf56UL, |
|
1184 0x00000000UL, 0x3fe673fdUL, 0x1bcdf659UL, 0xbdf52f2dUL, 0x00000000UL, |
|
1185 0x3fe66702UL, 0x5df10408UL, 0x3e5663e0UL, 0xc0000000UL, 0x3fe65a0aUL, |
|
1186 0xa4070568UL, 0xbe40b12fUL, 0x00000000UL, 0x3fe64d17UL, 0x71c54c47UL, |
|
1187 0x3e5f5e8bUL, 0x00000000UL, 0x3fe64027UL, 0xbd4b7e83UL, 0x3e42ead6UL, |
|
1188 0xa0000000UL, 0x3fe6333aUL, 0x61598bd2UL, 0xbe4c48d4UL, 0xc0000000UL, |
|
1189 0x3fe62651UL, 0x6f538d61UL, 0x3e548401UL, 0xa0000000UL, 0x3fe6196cUL, |
|
1190 0x14344120UL, 0xbe529af6UL, 0x00000000UL, 0x3fe60c8bUL, 0x5982c587UL, |
|
1191 0xbe3e1e4fUL, 0x00000000UL, 0x3fe5ffadUL, 0xfe51d4eaUL, 0xbe4c897aUL, |
|
1192 0x80000000UL, 0x3fe5f2d2UL, 0xfd46ebe1UL, 0x3e552e00UL, 0xa0000000UL, |
|
1193 0x3fe5e5fbUL, 0xa4695699UL, 0x3e5ed471UL, 0x60000000UL, 0x3fe5d928UL, |
|
1194 0x80d118aeUL, 0x3e456b61UL, 0xa0000000UL, 0x3fe5cc58UL, 0x304c330bUL, |
|
1195 0x3e54dc29UL, 0x80000000UL, 0x3fe5bf8cUL, 0x0af2dedfUL, 0xbe3aa9bdUL, |
|
1196 0xe0000000UL, 0x3fe5b2c3UL, 0x15fc9258UL, 0xbe479a37UL, 0xc0000000UL, |
|
1197 0x3fe5a5feUL, 0x9292c7eaUL, 0x3e188650UL, 0x20000000UL, 0x3fe5993dUL, |
|
1198 0x33b4d380UL, 0x3e5d6d93UL, 0x20000000UL, 0x3fe58c7fUL, 0x02fd16c7UL, |
|
1199 0x3e2fe961UL, 0xa0000000UL, 0x3fe57fc4UL, 0x4a05edb6UL, 0xbe4d55b4UL, |
|
1200 0xa0000000UL, 0x3fe5730dUL, 0x3d443abbUL, 0xbe5e6954UL, 0x00000000UL, |
|
1201 0x3fe5665aUL, 0x024acfeaUL, 0x3e50e61bUL, 0x00000000UL, 0x3fe559aaUL, |
|
1202 0xcc9edd09UL, 0xbe325403UL, 0x60000000UL, 0x3fe54cfdUL, 0x1fe26950UL, |
|
1203 0x3e5d500eUL, 0x60000000UL, 0x3fe54054UL, 0x6c5ae164UL, 0xbe4a79b4UL, |
|
1204 0xc0000000UL, 0x3fe533aeUL, 0x154b0287UL, 0xbe401571UL, 0xa0000000UL, |
|
1205 0x3fe5270cUL, 0x0673f401UL, 0xbe56e56bUL, 0xe0000000UL, 0x3fe51a6dUL, |
|
1206 0x751b639cUL, 0x3e235269UL, 0xa0000000UL, 0x3fe50dd2UL, 0x7c7b2bedUL, |
|
1207 0x3ddec887UL, 0xc0000000UL, 0x3fe5013aUL, 0xafab4e17UL, 0x3e5e7575UL, |
|
1208 0x60000000UL, 0x3fe4f4a6UL, 0x2e308668UL, 0x3e59aed6UL, 0x80000000UL, |
|
1209 0x3fe4e815UL, 0xf33e2a76UL, 0xbe51f184UL, 0xe0000000UL, 0x3fe4db87UL, |
|
1210 0x839f3e3eUL, 0x3e57db01UL, 0xc0000000UL, 0x3fe4cefdUL, 0xa9eda7bbUL, |
|
1211 0x3e535e0fUL, 0x00000000UL, 0x3fe4c277UL, 0x2a8f66a5UL, 0x3e5ce451UL, |
|
1212 0xc0000000UL, 0x3fe4b5f3UL, 0x05192456UL, 0xbe4e8518UL, 0xc0000000UL, |
|
1213 0x3fe4a973UL, 0x4aa7cd1dUL, 0x3e46784aUL, 0x40000000UL, 0x3fe49cf7UL, |
|
1214 0x8e23025eUL, 0xbe5749f2UL, 0x00000000UL, 0x3fe4907eUL, 0x18d30215UL, |
|
1215 0x3e360f39UL, 0x20000000UL, 0x3fe48408UL, 0x63dcf2f3UL, 0x3e5e00feUL, |
|
1216 0xc0000000UL, 0x3fe47795UL, 0x46182d09UL, 0xbe5173d9UL, 0xa0000000UL, |
|
1217 0x3fe46b26UL, 0x8f0e62aaUL, 0xbe48f281UL, 0xe0000000UL, 0x3fe45ebaUL, |
|
1218 0x5775c40cUL, 0xbe56aad4UL, 0x60000000UL, 0x3fe45252UL, 0x0fe25f69UL, |
|
1219 0x3e48bd71UL, 0x40000000UL, 0x3fe445edUL, 0xe9989ec5UL, 0x3e590d97UL, |
|
1220 0x80000000UL, 0x3fe4398bUL, 0xb3d9ffe3UL, 0x3e479dbcUL, 0x20000000UL, |
|
1221 0x3fe42d2dUL, 0x388e4d2eUL, 0xbe5eed80UL, 0xe0000000UL, 0x3fe420d1UL, |
|
1222 0x6f797c18UL, 0x3e554b4cUL, 0x20000000UL, 0x3fe4147aUL, 0x31048bb4UL, |
|
1223 0xbe5b1112UL, 0x80000000UL, 0x3fe40825UL, 0x2efba4f9UL, 0x3e48ebc7UL, |
|
1224 0x40000000UL, 0x3fe3fbd4UL, 0x50201119UL, 0x3e40b701UL, 0x40000000UL, |
|
1225 0x3fe3ef86UL, 0x0a4db32cUL, 0x3e551de8UL, 0xa0000000UL, 0x3fe3e33bUL, |
|
1226 0x0c9c148bUL, 0xbe50c1f6UL, 0x20000000UL, 0x3fe3d6f4UL, 0xc9129447UL, |
|
1227 0x3e533fa0UL, 0x00000000UL, 0x3fe3cab0UL, 0xaae5b5a0UL, 0xbe22b68eUL, |
|
1228 0x20000000UL, 0x3fe3be6fUL, 0x02305e8aUL, 0xbe54fc08UL, 0x60000000UL, |
|
1229 0x3fe3b231UL, 0x7f908258UL, 0x3e57dc05UL, 0x00000000UL, 0x3fe3a5f7UL, |
|
1230 0x1a09af78UL, 0x3e08038bUL, 0xe0000000UL, 0x3fe399bfUL, 0x490643c1UL, |
|
1231 0xbe5dbe42UL, 0xe0000000UL, 0x3fe38d8bUL, 0x5e8ad724UL, 0xbe3c2b72UL, |
|
1232 0x20000000UL, 0x3fe3815bUL, 0xc67196b6UL, 0x3e1713cfUL, 0xa0000000UL, |
|
1233 0x3fe3752dUL, 0x6182e429UL, 0xbe3ec14cUL, 0x40000000UL, 0x3fe36903UL, |
|
1234 0xab6eb1aeUL, 0x3e5a2cc5UL, 0x40000000UL, 0x3fe35cdcUL, 0xfe5dc064UL, |
|
1235 0xbe5c5878UL, 0x40000000UL, 0x3fe350b8UL, 0x0ba6b9e4UL, 0x3e51619bUL, |
|
1236 0x80000000UL, 0x3fe34497UL, 0x857761aaUL, 0x3e5fff53UL, 0x00000000UL, |
|
1237 0x3fe3387aUL, 0xf872d68cUL, 0x3e484f4dUL, 0xa0000000UL, 0x3fe32c5fUL, |
|
1238 0x087e97c2UL, 0x3e52842eUL, 0x80000000UL, 0x3fe32048UL, 0x73d6d0c0UL, |
|
1239 0xbe503edfUL, 0x80000000UL, 0x3fe31434UL, 0x0c1456a1UL, 0xbe5f72adUL, |
|
1240 0xa0000000UL, 0x3fe30823UL, 0x83a1a4d5UL, 0xbe5e65ccUL, 0xe0000000UL, |
|
1241 0x3fe2fc15UL, 0x855a7390UL, 0xbe506438UL, 0x40000000UL, 0x3fe2f00bUL, |
|
1242 0xa2898287UL, 0x3e3d22a2UL, 0xe0000000UL, 0x3fe2e403UL, 0x8b56f66fUL, |
|
1243 0xbe5aa5fdUL, 0x80000000UL, 0x3fe2d7ffUL, 0x52db119aUL, 0x3e3a2e3dUL, |
|
1244 0x60000000UL, 0x3fe2cbfeUL, 0xe2ddd4c0UL, 0xbe586469UL, 0x40000000UL, |
|
1245 0x3fe2c000UL, 0x6b01bf10UL, 0x3e352b9dUL, 0x40000000UL, 0x3fe2b405UL, |
|
1246 0xb07a1cdfUL, 0x3e5c5cdaUL, 0x80000000UL, 0x3fe2a80dUL, 0xc7b5f868UL, |
|
1247 0xbe5668b3UL, 0xc0000000UL, 0x3fe29c18UL, 0x185edf62UL, 0xbe563d66UL, |
|
1248 0x00000000UL, 0x3fe29027UL, 0xf729e1ccUL, 0x3e59a9a0UL, 0x80000000UL, |
|
1249 0x3fe28438UL, 0x6433c727UL, 0xbe43cc89UL, 0x00000000UL, 0x3fe2784dUL, |
|
1250 0x41782631UL, 0xbe30750cUL, 0xa0000000UL, 0x3fe26c64UL, 0x914911b7UL, |
|
1251 0xbe58290eUL, 0x40000000UL, 0x3fe2607fUL, 0x3dcc73e1UL, 0xbe4269cdUL, |
|
1252 0x00000000UL, 0x3fe2549dUL, 0x2751bf70UL, 0xbe5a6998UL, 0xc0000000UL, |
|
1253 0x3fe248bdUL, 0x4248b9fbUL, 0xbe4ddb00UL, 0x80000000UL, 0x3fe23ce1UL, |
|
1254 0xf35cf82fUL, 0x3e561b71UL, 0x60000000UL, 0x3fe23108UL, 0x8e481a2dUL, |
|
1255 0x3e518fb9UL, 0x60000000UL, 0x3fe22532UL, 0x5ab96edcUL, 0xbe5fafc5UL, |
|
1256 0x40000000UL, 0x3fe2195fUL, 0x80943911UL, 0xbe07f819UL, 0x40000000UL, |
|
1257 0x3fe20d8fUL, 0x386f2d6cUL, 0xbe54ba8bUL, 0x40000000UL, 0x3fe201c2UL, |
|
1258 0xf29664acUL, 0xbe5eb815UL, 0x20000000UL, 0x3fe1f5f8UL, 0x64f03390UL, |
|
1259 0x3e5e320cUL, 0x20000000UL, 0x3fe1ea31UL, 0x747ff696UL, 0x3e5ef0a5UL, |
|
1260 0x40000000UL, 0x3fe1de6dUL, 0x3e9ceb51UL, 0xbe5f8d27UL, 0x20000000UL, |
|
1261 0x3fe1d2acUL, 0x4ae0b55eUL, 0x3e5faa21UL, 0x20000000UL, 0x3fe1c6eeUL, |
|
1262 0x28569a5eUL, 0x3e598a4fUL, 0x20000000UL, 0x3fe1bb33UL, 0x54b33e07UL, |
|
1263 0x3e46130aUL, 0x20000000UL, 0x3fe1af7bUL, 0x024f1078UL, 0xbe4dbf93UL, |
|
1264 0x00000000UL, 0x3fe1a3c6UL, 0xb0783bfaUL, 0x3e419248UL, 0xe0000000UL, |
|
1265 0x3fe19813UL, 0x2f02b836UL, 0x3e4e02b7UL, 0xc0000000UL, 0x3fe18c64UL, |
|
1266 0x28dec9d4UL, 0x3e09064fUL, 0x80000000UL, 0x3fe180b8UL, 0x45cbf406UL, |
|
1267 0x3e5b1f46UL, 0x40000000UL, 0x3fe1750fUL, 0x03d9964cUL, 0x3e5b0a79UL, |
|
1268 0x00000000UL, 0x3fe16969UL, 0x8b5b882bUL, 0xbe238086UL, 0xa0000000UL, |
|
1269 0x3fe15dc5UL, 0x73bad6f8UL, 0xbdf1fca4UL, 0x20000000UL, 0x3fe15225UL, |
|
1270 0x5385769cUL, 0x3e5e8d76UL, 0xa0000000UL, 0x3fe14687UL, 0x1676dc6bUL, |
|
1271 0x3e571d08UL, 0x20000000UL, 0x3fe13aedUL, 0xa8c41c7fUL, 0xbe598a25UL, |
|
1272 0x60000000UL, 0x3fe12f55UL, 0xc4e1aaf0UL, 0x3e435277UL, 0xa0000000UL, |
|
1273 0x3fe123c0UL, 0x403638e1UL, 0xbe21aa7cUL, 0xc0000000UL, 0x3fe1182eUL, |
|
1274 0x557a092bUL, 0xbdd0116bUL, 0xc0000000UL, 0x3fe10c9fUL, 0x7d779f66UL, |
|
1275 0x3e4a61baUL, 0xc0000000UL, 0x3fe10113UL, 0x2b09c645UL, 0xbe5d586eUL, |
|
1276 0x20000000UL, 0x3fe0ea04UL, 0xea2cad46UL, 0x3e5aa97cUL, 0x20000000UL, |
|
1277 0x3fe0d300UL, 0x23190e54UL, 0x3e50f1a7UL, 0xa0000000UL, 0x3fe0bc07UL, |
|
1278 0x1379a5a6UL, 0xbe51619dUL, 0x60000000UL, 0x3fe0a51aUL, 0x926a3d4aUL, |
|
1279 0x3e5cf019UL, 0xa0000000UL, 0x3fe08e38UL, 0xa8c24358UL, 0x3e35241eUL, |
|
1280 0x20000000UL, 0x3fe07762UL, 0x24317e7aUL, 0x3e512cfaUL, 0x00000000UL, |
|
1281 0x3fe06097UL, 0xfd9cf274UL, 0xbe55bef3UL, 0x00000000UL, 0x3fe049d7UL, |
|
1282 0x3689b49dUL, 0xbe36d26dUL, 0x40000000UL, 0x3fe03322UL, 0xf72ef6c4UL, |
|
1283 0xbe54cd08UL, 0xa0000000UL, 0x3fe01c78UL, 0x23702d2dUL, 0xbe5900bfUL, |
|
1284 0x00000000UL, 0x3fe005daUL, 0x3f59c14cUL, 0x3e57d80bUL, 0x40000000UL, |
|
1285 0x3fdfde8dUL, 0xad67766dUL, 0xbe57fad4UL, 0x40000000UL, 0x3fdfb17cUL, |
|
1286 0x644f4ae7UL, 0x3e1ee43bUL, 0x40000000UL, 0x3fdf8481UL, 0x903234d2UL, |
|
1287 0x3e501a86UL, 0x40000000UL, 0x3fdf579cUL, 0xafe9e509UL, 0xbe267c3eUL, |
|
1288 0x00000000UL, 0x3fdf2acdUL, 0xb7dfda0bUL, 0xbe48149bUL, 0x40000000UL, |
|
1289 0x3fdefe13UL, 0x3b94305eUL, 0x3e5f4ea7UL, 0x80000000UL, 0x3fded16fUL, |
|
1290 0x5d95da61UL, 0xbe55c198UL, 0x00000000UL, 0x3fdea4e1UL, 0x406960c9UL, |
|
1291 0xbdd99a19UL, 0x00000000UL, 0x3fde7868UL, 0xd22f3539UL, 0x3e470c78UL, |
|
1292 0x80000000UL, 0x3fde4c04UL, 0x83eec535UL, 0xbe3e1232UL, 0x40000000UL, |
|
1293 0x3fde1fb6UL, 0x3dfbffcbUL, 0xbe4b7d71UL, 0x40000000UL, 0x3fddf37dUL, |
|
1294 0x7e1be4e0UL, 0xbe5b8f8fUL, 0x40000000UL, 0x3fddc759UL, 0x46dae887UL, |
|
1295 0xbe350458UL, 0x80000000UL, 0x3fdd9b4aUL, 0xed6ecc49UL, 0xbe5f0045UL, |
|
1296 0x80000000UL, 0x3fdd6f50UL, 0x2e9e883cUL, 0x3e2915daUL, 0x80000000UL, |
|
1297 0x3fdd436bUL, 0xf0bccb32UL, 0x3e4a68c9UL, 0x80000000UL, 0x3fdd179bUL, |
|
1298 0x9bbfc779UL, 0xbe54a26aUL, 0x00000000UL, 0x3fdcebe0UL, 0x7cea33abUL, |
|
1299 0x3e43c6b7UL, 0x40000000UL, 0x3fdcc039UL, 0xe740fd06UL, 0x3e5526c2UL, |
|
1300 0x40000000UL, 0x3fdc94a7UL, 0x9eadeb1aUL, 0xbe396d8dUL, 0xc0000000UL, |
|
1301 0x3fdc6929UL, 0xf0a8f95aUL, 0xbe5c0ab2UL, 0x80000000UL, 0x3fdc3dc0UL, |
|
1302 0x6ee2693bUL, 0x3e0992e6UL, 0xc0000000UL, 0x3fdc126bUL, 0x5ac6b581UL, |
|
1303 0xbe2834b6UL, 0x40000000UL, 0x3fdbe72bUL, 0x8cc226ffUL, 0x3e3596a6UL, |
|
1304 0x00000000UL, 0x3fdbbbffUL, 0xf92a74bbUL, 0x3e3c5813UL, 0x00000000UL, |
|
1305 0x3fdb90e7UL, 0x479664c0UL, 0xbe50d644UL, 0x00000000UL, 0x3fdb65e3UL, |
|
1306 0x5004975bUL, 0xbe55258fUL, 0x00000000UL, 0x3fdb3af3UL, 0xe4b23194UL, |
|
1307 0xbe588407UL, 0xc0000000UL, 0x3fdb1016UL, 0xe65d4d0aUL, 0x3e527c26UL, |
|
1308 0x80000000UL, 0x3fdae54eUL, 0x814fddd6UL, 0x3e5962a2UL, 0x40000000UL, |
|
1309 0x3fdaba9aUL, 0xe19d0913UL, 0xbe562f4eUL, 0x80000000UL, 0x3fda8ff9UL, |
|
1310 0x43cfd006UL, 0xbe4cfdebUL, 0x40000000UL, 0x3fda656cUL, 0x686f0a4eUL, |
|
1311 0x3e5e47a8UL, 0xc0000000UL, 0x3fda3af2UL, 0x7200d410UL, 0x3e5e1199UL, |
|
1312 0xc0000000UL, 0x3fda108cUL, 0xabd2266eUL, 0x3e5ee4d1UL, 0x40000000UL, |
|
1313 0x3fd9e63aUL, 0x396f8f2cUL, 0x3e4dbffbUL, 0x00000000UL, 0x3fd9bbfbUL, |
|
1314 0xe32b25ddUL, 0x3e5c3a54UL, 0x40000000UL, 0x3fd991cfUL, 0x431e4035UL, |
|
1315 0xbe457925UL, 0x80000000UL, 0x3fd967b6UL, 0x7bed3dd3UL, 0x3e40c61dUL, |
|
1316 0x00000000UL, 0x3fd93db1UL, 0xd7449365UL, 0x3e306419UL, 0x80000000UL, |
|
1317 0x3fd913beUL, 0x1746e791UL, 0x3e56fcfcUL, 0x40000000UL, 0x3fd8e9dfUL, |
|
1318 0xf3a9028bUL, 0xbe5041b9UL, 0xc0000000UL, 0x3fd8c012UL, 0x56840c50UL, |
|
1319 0xbe26e20aUL, 0x40000000UL, 0x3fd89659UL, 0x19763102UL, 0xbe51f466UL, |
|
1320 0x80000000UL, 0x3fd86cb2UL, 0x7032de7cUL, 0xbe4d298aUL, 0x80000000UL, |
|
1321 0x3fd8431eUL, 0xdeb39fabUL, 0xbe4361ebUL, 0x40000000UL, 0x3fd8199dUL, |
|
1322 0x5d01cbe0UL, 0xbe5425b3UL, 0x80000000UL, 0x3fd7f02eUL, 0x3ce99aa9UL, |
|
1323 0x3e146fa8UL, 0x80000000UL, 0x3fd7c6d2UL, 0xd1a262b9UL, 0xbe5a1a69UL, |
|
1324 0xc0000000UL, 0x3fd79d88UL, 0x8606c236UL, 0x3e423a08UL, 0x80000000UL, |
|
1325 0x3fd77451UL, 0x8fd1e1b7UL, 0x3e5a6a63UL, 0xc0000000UL, 0x3fd74b2cUL, |
|
1326 0xe491456aUL, 0x3e42c1caUL, 0x40000000UL, 0x3fd7221aUL, 0x4499a6d7UL, |
|
1327 0x3e36a69aUL, 0x00000000UL, 0x3fd6f91aUL, 0x5237df94UL, 0xbe0f8f02UL, |
|
1328 0x00000000UL, 0x3fd6d02cUL, 0xb6482c6eUL, 0xbe5abcf7UL, 0x00000000UL, |
|
1329 0x3fd6a750UL, 0x1919fd61UL, 0xbe57ade2UL, 0x00000000UL, 0x3fd67e86UL, |
|
1330 0xaa7a994dUL, 0xbe3f3fbdUL, 0x00000000UL, 0x3fd655ceUL, 0x67db014cUL, |
|
1331 0x3e33c550UL, 0x00000000UL, 0x3fd62d28UL, 0xa82856b7UL, 0xbe1409d1UL, |
|
1332 0xc0000000UL, 0x3fd60493UL, 0x1e6a300dUL, 0x3e55d899UL, 0x80000000UL, |
|
1333 0x3fd5dc11UL, 0x1222bd5cUL, 0xbe35bfc0UL, 0xc0000000UL, 0x3fd5b3a0UL, |
|
1334 0x6e8dc2d3UL, 0x3e5d4d79UL, 0x00000000UL, 0x3fd58b42UL, 0xe0e4ace6UL, |
|
1335 0xbe517303UL, 0x80000000UL, 0x3fd562f4UL, 0xb306e0a8UL, 0x3e5edf0fUL, |
|
1336 0xc0000000UL, 0x3fd53ab8UL, 0x6574bc54UL, 0x3e5ee859UL, 0x80000000UL, |
|
1337 0x3fd5128eUL, 0xea902207UL, 0x3e5f6188UL, 0xc0000000UL, 0x3fd4ea75UL, |
|
1338 0x9f911d79UL, 0x3e511735UL, 0x80000000UL, 0x3fd4c26eUL, 0xf9c77397UL, |
|
1339 0xbe5b1643UL, 0x40000000UL, 0x3fd49a78UL, 0x15fc9258UL, 0x3e479a37UL, |
|
1340 0x80000000UL, 0x3fd47293UL, 0xd5a04dd9UL, 0xbe426e56UL, 0xc0000000UL, |
|
1341 0x3fd44abfUL, 0xe04042f5UL, 0x3e56f7c6UL, 0x40000000UL, 0x3fd422fdUL, |
|
1342 0x1d8bf2c8UL, 0x3e5d8810UL, 0x00000000UL, 0x3fd3fb4cUL, 0x88a8ddeeUL, |
|
1343 0xbe311454UL, 0xc0000000UL, 0x3fd3d3abUL, 0x3e3b5e47UL, 0xbe5d1b72UL, |
|
1344 0x40000000UL, 0x3fd3ac1cUL, 0xc2ab5d59UL, 0x3e31b02bUL, 0xc0000000UL, |
|
1345 0x3fd3849dUL, 0xd4e34b9eUL, 0x3e51cb2fUL, 0x40000000UL, 0x3fd35d30UL, |
|
1346 0x177204fbUL, 0xbe2b8cd7UL, 0x80000000UL, 0x3fd335d3UL, 0xfcd38c82UL, |
|
1347 0xbe4356e1UL, 0x80000000UL, 0x3fd30e87UL, 0x64f54accUL, 0xbe4e6224UL, |
|
1348 0x00000000UL, 0x3fd2e74cUL, 0xaa7975d9UL, 0x3e5dc0feUL, 0x80000000UL, |
|
1349 0x3fd2c021UL, 0x516dab3fUL, 0xbe50ffa3UL, 0x40000000UL, 0x3fd29907UL, |
|
1350 0x2bfb7313UL, 0x3e5674a2UL, 0xc0000000UL, 0x3fd271fdUL, 0x0549fc99UL, |
|
1351 0x3e385d29UL, 0xc0000000UL, 0x3fd24b04UL, 0x55b63073UL, 0xbe500c6dUL, |
|
1352 0x00000000UL, 0x3fd2241cUL, 0x3f91953aUL, 0x3e389977UL, 0xc0000000UL, |
|
1353 0x3fd1fd43UL, 0xa1543f71UL, 0xbe3487abUL, 0xc0000000UL, 0x3fd1d67bUL, |
|
1354 0x4ec8867cUL, 0x3df6a2dcUL, 0x00000000UL, 0x3fd1afc4UL, 0x4328e3bbUL, |
|
1355 0x3e41d9c0UL, 0x80000000UL, 0x3fd1891cUL, 0x2e1cda84UL, 0x3e3bdd87UL, |
|
1356 0x40000000UL, 0x3fd16285UL, 0x4b5331aeUL, 0xbe53128eUL, 0x00000000UL, |
|
1357 0x3fd13bfeUL, 0xb9aec164UL, 0xbe52ac98UL, 0xc0000000UL, 0x3fd11586UL, |
|
1358 0xd91e1316UL, 0xbe350630UL, 0x80000000UL, 0x3fd0ef1fUL, 0x7cacc12cUL, |
|
1359 0x3e3f5219UL, 0x40000000UL, 0x3fd0c8c8UL, 0xbce277b7UL, 0x3e3d30c0UL, |
|
1360 0x00000000UL, 0x3fd0a281UL, 0x2a63447dUL, 0xbe541377UL, 0x80000000UL, |
|
1361 0x3fd07c49UL, 0xfac483b5UL, 0xbe5772ecUL, 0xc0000000UL, 0x3fd05621UL, |
|
1362 0x36b8a570UL, 0xbe4fd4bdUL, 0xc0000000UL, 0x3fd03009UL, 0xbae505f7UL, |
|
1363 0xbe450388UL, 0x80000000UL, 0x3fd00a01UL, 0x3e35aeadUL, 0xbe5430fcUL, |
|
1364 0x80000000UL, 0x3fcfc811UL, 0x707475acUL, 0x3e38806eUL, 0x80000000UL, |
|
1365 0x3fcf7c3fUL, 0xc91817fcUL, 0xbe40cceaUL, 0x80000000UL, 0x3fcf308cUL, |
|
1366 0xae05d5e9UL, 0xbe4919b8UL, 0x80000000UL, 0x3fcee4f8UL, 0xae6cc9e6UL, |
|
1367 0xbe530b94UL, 0x00000000UL, 0x3fce9983UL, 0x1efe3e8eUL, 0x3e57747eUL, |
|
1368 0x00000000UL, 0x3fce4e2dUL, 0xda78d9bfUL, 0xbe59a608UL, 0x00000000UL, |
|
1369 0x3fce02f5UL, 0x8abe2c2eUL, 0x3e4a35adUL, 0x00000000UL, 0x3fcdb7dcUL, |
|
1370 0x1495450dUL, 0xbe0872ccUL, 0x80000000UL, 0x3fcd6ce1UL, 0x86ee0ba0UL, |
|
1371 0xbe4f59a0UL, 0x00000000UL, 0x3fcd2205UL, 0xe81ca888UL, 0x3e5402c3UL, |
|
1372 0x00000000UL, 0x3fccd747UL, 0x3b4424b9UL, 0x3e5dfdc3UL, 0x80000000UL, |
|
1373 0x3fcc8ca7UL, 0xd305b56cUL, 0x3e202da6UL, 0x00000000UL, 0x3fcc4226UL, |
|
1374 0x399a6910UL, 0xbe482a1cUL, 0x80000000UL, 0x3fcbf7c2UL, 0x747f7938UL, |
|
1375 0xbe587372UL, 0x80000000UL, 0x3fcbad7cUL, 0x6fc246a0UL, 0x3e50d83dUL, |
|
1376 0x00000000UL, 0x3fcb6355UL, 0xee9e9be5UL, 0xbe5c35bdUL, 0x80000000UL, |
|
1377 0x3fcb194aUL, 0x8416c0bcUL, 0x3e546d4fUL, 0x00000000UL, 0x3fcacf5eUL, |
|
1378 0x49f7f08fUL, 0x3e56da76UL, 0x00000000UL, 0x3fca858fUL, 0x5dc30de2UL, |
|
1379 0x3e5f390cUL, 0x00000000UL, 0x3fca3bdeUL, 0x950583b6UL, 0xbe5e4169UL, |
|
1380 0x80000000UL, 0x3fc9f249UL, 0x33631553UL, 0x3e52aeb1UL, 0x00000000UL, |
|
1381 0x3fc9a8d3UL, 0xde8795a6UL, 0xbe59a504UL, 0x00000000UL, 0x3fc95f79UL, |
|
1382 0x076bf41eUL, 0x3e5122feUL, 0x80000000UL, 0x3fc9163cUL, 0x2914c8e7UL, |
|
1383 0x3e3dd064UL, 0x00000000UL, 0x3fc8cd1dUL, 0x3a30eca3UL, 0xbe21b4aaUL, |
|
1384 0x80000000UL, 0x3fc8841aUL, 0xb2a96650UL, 0xbe575444UL, 0x80000000UL, |
|
1385 0x3fc83b34UL, 0x2376c0cbUL, 0xbe2a74c7UL, 0x80000000UL, 0x3fc7f26bUL, |
|
1386 0xd8a0b653UL, 0xbe5181b6UL, 0x00000000UL, 0x3fc7a9bfUL, 0x32257882UL, |
|
1387 0xbe4a78b4UL, 0x00000000UL, 0x3fc7612fUL, 0x1eee8bd9UL, 0xbe1bfe9dUL, |
|
1388 0x80000000UL, 0x3fc718bbUL, 0x0c603cc4UL, 0x3e36fdc9UL, 0x80000000UL, |
|
1389 0x3fc6d064UL, 0x3728b8cfUL, 0xbe1e542eUL, 0x80000000UL, 0x3fc68829UL, |
|
1390 0xc79a4067UL, 0x3e5c380fUL, 0x00000000UL, 0x3fc6400bUL, 0xf69eac69UL, |
|
1391 0x3e550a84UL, 0x80000000UL, 0x3fc5f808UL, 0xb7a780a4UL, 0x3e5d9224UL, |
|
1392 0x80000000UL, 0x3fc5b022UL, 0xad9dfb1eUL, 0xbe55242fUL, 0x00000000UL, |
|
1393 0x3fc56858UL, 0x659b18beUL, 0xbe4bfda3UL, 0x80000000UL, 0x3fc520a9UL, |
|
1394 0x66ee3631UL, 0xbe57d769UL, 0x80000000UL, 0x3fc4d916UL, 0x1ec62819UL, |
|
1395 0x3e2427f7UL, 0x80000000UL, 0x3fc4919fUL, 0xdec25369UL, 0xbe435431UL, |
|
1396 0x00000000UL, 0x3fc44a44UL, 0xa8acfc4bUL, 0xbe3c62e8UL, 0x00000000UL, |
|
1397 0x3fc40304UL, 0xcf1d3eabUL, 0xbdfba29fUL, 0x80000000UL, 0x3fc3bbdfUL, |
|
1398 0x79aba3eaUL, 0xbdf1b7c8UL, 0x80000000UL, 0x3fc374d6UL, 0xb8d186daUL, |
|
1399 0xbe5130cfUL, 0x80000000UL, 0x3fc32de8UL, 0x9d74f152UL, 0x3e2285b6UL, |
|
1400 0x00000000UL, 0x3fc2e716UL, 0x50ae7ca9UL, 0xbe503920UL, 0x80000000UL, |
|
1401 0x3fc2a05eUL, 0x6caed92eUL, 0xbe533924UL, 0x00000000UL, 0x3fc259c2UL, |
|
1402 0x9cb5034eUL, 0xbe510e31UL, 0x80000000UL, 0x3fc21340UL, 0x12c4d378UL, |
|
1403 0xbe540b43UL, 0x80000000UL, 0x3fc1ccd9UL, 0xcc418706UL, 0x3e59887aUL, |
|
1404 0x00000000UL, 0x3fc1868eUL, 0x921f4106UL, 0xbe528e67UL, 0x80000000UL, |
|
1405 0x3fc1405cUL, 0x3969441eUL, 0x3e5d8051UL, 0x00000000UL, 0x3fc0fa46UL, |
|
1406 0xd941ef5bUL, 0x3e5f9079UL, 0x80000000UL, 0x3fc0b44aUL, 0x5a3e81b2UL, |
|
1407 0xbe567691UL, 0x00000000UL, 0x3fc06e69UL, 0x9d66afe7UL, 0xbe4d43fbUL, |
|
1408 0x00000000UL, 0x3fc028a2UL, 0x0a92a162UL, 0xbe52f394UL, 0x00000000UL, |
|
1409 0x3fbfc5eaUL, 0x209897e5UL, 0x3e529e37UL, 0x00000000UL, 0x3fbf3ac5UL, |
|
1410 0x8458bd7bUL, 0x3e582831UL, 0x00000000UL, 0x3fbeafd5UL, 0xb8d8b4b8UL, |
|
1411 0xbe486b4aUL, 0x00000000UL, 0x3fbe2518UL, 0xe0a3b7b6UL, 0x3e5bafd2UL, |
|
1412 0x00000000UL, 0x3fbd9a90UL, 0x2bf2710eUL, 0x3e383b2bUL, 0x00000000UL, |
|
1413 0x3fbd103cUL, 0x73eb6ab7UL, 0xbe56d78dUL, 0x00000000UL, 0x3fbc861bUL, |
|
1414 0x32ceaff5UL, 0xbe32dc5aUL, 0x00000000UL, 0x3fbbfc2eUL, 0xbee04cb7UL, |
|
1415 0xbe4a71a4UL, 0x00000000UL, 0x3fbb7274UL, 0x35ae9577UL, 0x3e38142fUL, |
|
1416 0x00000000UL, 0x3fbae8eeUL, 0xcbaddab4UL, 0xbe5490f0UL, 0x00000000UL, |
|
1417 0x3fba5f9aUL, 0x95ce1114UL, 0x3e597c71UL, 0x00000000UL, 0x3fb9d67aUL, |
|
1418 0x6d7c0f78UL, 0x3e3abc2dUL, 0x00000000UL, 0x3fb94d8dUL, 0x2841a782UL, |
|
1419 0xbe566cbcUL, 0x00000000UL, 0x3fb8c4d2UL, 0x6ed429c6UL, 0xbe3cfff9UL, |
|
1420 0x00000000UL, 0x3fb83c4aUL, 0xe4a49fbbUL, 0xbe552964UL, 0x00000000UL, |
|
1421 0x3fb7b3f4UL, 0x2193d81eUL, 0xbe42fa72UL, 0x00000000UL, 0x3fb72bd0UL, |
|
1422 0xdd70c122UL, 0x3e527a8cUL, 0x00000000UL, 0x3fb6a3dfUL, 0x03108a54UL, |
|
1423 0xbe450393UL, 0x00000000UL, 0x3fb61c1fUL, 0x30ff7954UL, 0x3e565840UL, |
|
1424 0x00000000UL, 0x3fb59492UL, 0xdedd460cUL, 0xbe5422b5UL, 0x00000000UL, |
|
1425 0x3fb50d36UL, 0x950f9f45UL, 0xbe5313f6UL, 0x00000000UL, 0x3fb4860bUL, |
|
1426 0x582cdcb1UL, 0x3e506d39UL, 0x00000000UL, 0x3fb3ff12UL, 0x7216d3a6UL, |
|
1427 0x3e4aa719UL, 0x00000000UL, 0x3fb3784aUL, 0x57a423fdUL, 0x3e5a9b9fUL, |
|
1428 0x00000000UL, 0x3fb2f1b4UL, 0x7a138b41UL, 0xbe50b418UL, 0x00000000UL, |
|
1429 0x3fb26b4eUL, 0x2fbfd7eaUL, 0x3e23a53eUL, 0x00000000UL, 0x3fb1e519UL, |
|
1430 0x18913ccbUL, 0x3e465fc1UL, 0x00000000UL, 0x3fb15f15UL, 0x7ea24e21UL, |
|
1431 0x3e042843UL, 0x00000000UL, 0x3fb0d941UL, 0x7c6d9c77UL, 0x3e59f61eUL, |
|
1432 0x00000000UL, 0x3fb0539eUL, 0x114efd44UL, 0x3e4ccab7UL, 0x00000000UL, |
|
1433 0x3faf9c56UL, 0x1777f657UL, 0x3e552f65UL, 0x00000000UL, 0x3fae91d2UL, |
|
1434 0xc317b86aUL, 0xbe5a61e0UL, 0x00000000UL, 0x3fad87acUL, 0xb7664efbUL, |
|
1435 0xbe41f64eUL, 0x00000000UL, 0x3fac7de6UL, 0x5d3d03a9UL, 0x3e0807a0UL, |
|
1436 0x00000000UL, 0x3fab7480UL, 0x743c38ebUL, 0xbe3726e1UL, 0x00000000UL, |
|
1437 0x3faa6b78UL, 0x06a253f1UL, 0x3e5ad636UL, 0x00000000UL, 0x3fa962d0UL, |
|
1438 0xa35f541bUL, 0x3e5a187aUL, 0x00000000UL, 0x3fa85a88UL, 0x4b86e446UL, |
|
1439 0xbe508150UL, 0x00000000UL, 0x3fa7529cUL, 0x2589cacfUL, 0x3e52938aUL, |
|
1440 0x00000000UL, 0x3fa64b10UL, 0xaf6b11f2UL, 0xbe3454cdUL, 0x00000000UL, |
|
1441 0x3fa543e2UL, 0x97506fefUL, 0xbe5fdec5UL, 0x00000000UL, 0x3fa43d10UL, |
|
1442 0xe75f7dd9UL, 0xbe388dd3UL, 0x00000000UL, 0x3fa3369cUL, 0xa4139632UL, |
|
1443 0xbdea5177UL, 0x00000000UL, 0x3fa23086UL, 0x352d6f1eUL, 0xbe565ad6UL, |
|
1444 0x00000000UL, 0x3fa12accUL, 0x77449eb7UL, 0xbe50d5c7UL, 0x00000000UL, |
|
1445 0x3fa0256eUL, 0x7478da78UL, 0x3e404724UL, 0x00000000UL, 0x3f9e40dcUL, |
|
1446 0xf59cef7fUL, 0xbe539d0aUL, 0x00000000UL, 0x3f9c3790UL, 0x1511d43cUL, |
|
1447 0x3e53c2c8UL, 0x00000000UL, 0x3f9a2f00UL, 0x9b8bff3cUL, 0xbe43b3e1UL, |
|
1448 0x00000000UL, 0x3f982724UL, 0xad1e22a5UL, 0x3e46f0bdUL, 0x00000000UL, |
|
1449 0x3f962000UL, 0x130d9356UL, 0x3e475ba0UL, 0x00000000UL, 0x3f941994UL, |
|
1450 0x8f86f883UL, 0xbe513d0bUL, 0x00000000UL, 0x3f9213dcUL, 0x914d0dc8UL, |
|
1451 0xbe534335UL, 0x00000000UL, 0x3f900ed8UL, 0x2d73e5e7UL, 0xbe22ba75UL, |
|
1452 0x00000000UL, 0x3f8c1510UL, 0xc5b7d70eUL, 0x3e599c5dUL, 0x00000000UL, |
|
1453 0x3f880de0UL, 0x8a27857eUL, 0xbe3d28c8UL, 0x00000000UL, 0x3f840810UL, |
|
1454 0xda767328UL, 0x3e531b3dUL, 0x00000000UL, 0x3f8003b0UL, 0x77bacaf3UL, |
|
1455 0xbe5f04e3UL, 0x00000000UL, 0x3f780150UL, 0xdf4b0720UL, 0x3e5a8bffUL, |
|
1456 0x00000000UL, 0x3f6ffc40UL, 0x34c48e71UL, 0xbe3fcd99UL, 0x00000000UL, |
|
1457 0x3f5ff6c0UL, 0x1ad218afUL, 0xbe4c78a7UL, 0x00000000UL, 0x00000000UL, |
|
1458 0x00000000UL, 0x80000000UL |
|
1459 }; |
|
1460 |
|
1461 ALIGNED_(8) juint _log2_pow[] = |
|
1462 { |
|
1463 0xfefa39efUL, 0x3fe62e42UL, 0xfefa39efUL, 0xbfe62e42UL |
|
1464 }; |
|
1465 |
|
1466 //registers, |
|
1467 // input: xmm0, xmm1 |
|
1468 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
1469 // rax, rdx, rcx, r8, r11 |
|
1470 |
|
1471 // Code generated by Intel C compiler for LIBM library |
|
1472 |
|
1473 void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { |
|
1474 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
|
1475 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
|
1476 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; |
|
1477 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, L_2TAG_PACKET_14_0_2, L_2TAG_PACKET_15_0_2; |
|
1478 Label L_2TAG_PACKET_16_0_2, L_2TAG_PACKET_17_0_2, L_2TAG_PACKET_18_0_2, L_2TAG_PACKET_19_0_2; |
|
1479 Label L_2TAG_PACKET_20_0_2, L_2TAG_PACKET_21_0_2, L_2TAG_PACKET_22_0_2, L_2TAG_PACKET_23_0_2; |
|
1480 Label L_2TAG_PACKET_24_0_2, L_2TAG_PACKET_25_0_2, L_2TAG_PACKET_26_0_2, L_2TAG_PACKET_27_0_2; |
|
1481 Label L_2TAG_PACKET_28_0_2, L_2TAG_PACKET_29_0_2, L_2TAG_PACKET_30_0_2, L_2TAG_PACKET_31_0_2; |
|
1482 Label L_2TAG_PACKET_32_0_2, L_2TAG_PACKET_33_0_2, L_2TAG_PACKET_34_0_2, L_2TAG_PACKET_35_0_2; |
|
1483 Label L_2TAG_PACKET_36_0_2, L_2TAG_PACKET_37_0_2, L_2TAG_PACKET_38_0_2, L_2TAG_PACKET_39_0_2; |
|
1484 Label L_2TAG_PACKET_40_0_2, L_2TAG_PACKET_41_0_2, L_2TAG_PACKET_42_0_2, L_2TAG_PACKET_43_0_2; |
|
1485 Label L_2TAG_PACKET_44_0_2, L_2TAG_PACKET_45_0_2, L_2TAG_PACKET_46_0_2, L_2TAG_PACKET_47_0_2; |
|
1486 Label L_2TAG_PACKET_48_0_2, L_2TAG_PACKET_49_0_2, L_2TAG_PACKET_50_0_2, L_2TAG_PACKET_51_0_2; |
|
1487 Label L_2TAG_PACKET_52_0_2, L_2TAG_PACKET_53_0_2, L_2TAG_PACKET_54_0_2, L_2TAG_PACKET_55_0_2; |
|
1488 Label L_2TAG_PACKET_56_0_2; |
|
1489 Label B1_2, B1_3, B1_5, start; |
|
1490 |
|
1491 assert_different_registers(tmp1, tmp2, eax, ecx, edx); |
|
1492 jmp(start); |
|
1493 address HIGHSIGMASK = (address)_HIGHSIGMASK; |
|
1494 address LOG2_E = (address)_LOG2_E; |
|
1495 address coeff = (address)_coeff_pow; |
|
1496 address L_tbl = (address)_L_tbl_pow; |
|
1497 address HIGHMASK_Y = (address)_HIGHMASK_Y; |
|
1498 address T_exp = (address)_T_exp; |
|
1499 address e_coeff = (address)_e_coeff; |
|
1500 address coeff_h = (address)_coeff_h; |
|
1501 address HIGHMASK_LOG_X = (address)_HIGHMASK_LOG_X; |
|
1502 address HALFMASK = (address)_HALFMASK; |
|
1503 address log2 = (address)_log2_pow; |
|
1504 |
|
1505 |
|
1506 bind(start); |
|
1507 subq(rsp, 40); |
|
1508 movsd(Address(rsp, 8), xmm0); |
|
1509 movsd(Address(rsp, 16), xmm1); |
|
1510 |
|
1511 bind(B1_2); |
|
1512 pextrw(eax, xmm0, 3); |
|
1513 xorpd(xmm2, xmm2); |
|
1514 mov64(tmp2, 0x3ff0000000000000); |
|
1515 movdq(xmm2, tmp2); |
|
1516 movl(tmp1, 1069088768); |
|
1517 movdq(xmm7, tmp1); |
|
1518 xorpd(xmm1, xmm1); |
|
1519 mov64(tmp3, 0x77f0000000000000); |
|
1520 movdq(xmm1, tmp3); |
|
1521 movdqu(xmm3, xmm0); |
|
1522 movl(edx, 32752); |
|
1523 andl(edx, eax); |
|
1524 subl(edx, 16368); |
|
1525 movl(ecx, edx); |
|
1526 sarl(edx, 31); |
|
1527 addl(ecx, edx); |
|
1528 xorl(ecx, edx); |
|
1529 por(xmm0, xmm2); |
|
1530 movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL |
|
1531 psrlq(xmm0, 27); |
|
1532 movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL |
|
1533 psrld(xmm0, 2); |
|
1534 addl(ecx, 16); |
|
1535 bsrl(ecx, ecx); |
|
1536 rcpps(xmm0, xmm0); |
|
1537 psllq(xmm3, 12); |
|
1538 movl(tmp4, 8192); |
|
1539 movdq(xmm4, tmp4); |
|
1540 psrlq(xmm3, 12); |
|
1541 subl(eax, 16); |
|
1542 cmpl(eax, 32736); |
|
1543 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
|
1544 movl(tmp1, 0); |
|
1545 |
|
1546 bind(L_2TAG_PACKET_1_0_2); |
|
1547 mulss(xmm0, xmm7); |
|
1548 movl(edx, -1); |
|
1549 subl(ecx, 4); |
|
1550 shll(edx); |
|
1551 shlq(edx, 32); |
|
1552 movdq(xmm5, edx); |
|
1553 por(xmm3, xmm1); |
|
1554 subl(eax, 16351); |
|
1555 cmpl(eax, 1); |
|
1556 jcc(Assembler::belowEqual, L_2TAG_PACKET_2_0_2); |
|
1557 paddd(xmm0, xmm4); |
|
1558 pand(xmm5, xmm3); |
|
1559 movdl(edx, xmm0); |
|
1560 psllq(xmm0, 29); |
|
1561 |
|
1562 bind(L_2TAG_PACKET_3_0_2); |
|
1563 subsd(xmm3, xmm5); |
|
1564 pand(xmm0, xmm6); |
|
1565 subl(eax, 1); |
|
1566 sarl(eax, 4); |
|
1567 cvtsi2sdl(xmm7, eax); |
|
1568 mulpd(xmm5, xmm0); |
|
1569 |
|
1570 bind(L_2TAG_PACKET_4_0_2); |
|
1571 mulsd(xmm3, xmm0); |
|
1572 movdqu(xmm1, ExternalAddress(coeff)); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL |
|
1573 lea(tmp4, ExternalAddress(L_tbl)); |
|
1574 subsd(xmm5, xmm2); |
|
1575 movdqu(xmm4, ExternalAddress(16 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL |
|
1576 movl(ecx, eax); |
|
1577 sarl(eax, 31); |
|
1578 addl(ecx, eax); |
|
1579 xorl(eax, ecx); |
|
1580 addl(eax, 1); |
|
1581 bsrl(eax, eax); |
|
1582 unpcklpd(xmm5, xmm3); |
|
1583 movdqu(xmm6, ExternalAddress(32 + coeff)); //0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL |
|
1584 addsd(xmm3, xmm5); |
|
1585 andl(edx, 16760832); |
|
1586 shrl(edx, 10); |
|
1587 addpd(xmm5, Address(tmp4, edx, Address::times_1, -3648)); |
|
1588 movdqu(xmm0, ExternalAddress(48 + coeff)); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL |
|
1589 pshufd(xmm2, xmm3, 68); |
|
1590 mulsd(xmm3, xmm3); |
|
1591 mulpd(xmm1, xmm2); |
|
1592 mulpd(xmm4, xmm2); |
|
1593 addsd(xmm5, xmm7); |
|
1594 mulsd(xmm2, xmm3); |
|
1595 addpd(xmm6, xmm1); |
|
1596 mulsd(xmm3, xmm3); |
|
1597 addpd(xmm0, xmm4); |
|
1598 movq(xmm1, Address(rsp, 16)); |
|
1599 movw(ecx, Address(rsp, 22)); |
|
1600 pshufd(xmm7, xmm5, 238); |
|
1601 movq(xmm4, ExternalAddress(HIGHMASK_Y)); //0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL |
|
1602 mulpd(xmm6, xmm2); |
|
1603 pshufd(xmm3, xmm3, 68); |
|
1604 mulpd(xmm0, xmm2); |
|
1605 shll(eax, 4); |
|
1606 subl(eax, 15872); |
|
1607 andl(ecx, 32752); |
|
1608 addl(eax, ecx); |
|
1609 mulpd(xmm3, xmm6); |
|
1610 cmpl(eax, 624); |
|
1611 jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); |
|
1612 xorpd(xmm6, xmm6); |
|
1613 movl(edx, 17080); |
|
1614 pinsrw(xmm6, edx, 3); |
|
1615 movdqu(xmm2, xmm1); |
|
1616 pand(xmm4, xmm1); |
|
1617 subsd(xmm1, xmm4); |
|
1618 mulsd(xmm4, xmm5); |
|
1619 addsd(xmm0, xmm7); |
|
1620 mulsd(xmm1, xmm5); |
|
1621 movdqu(xmm7, xmm6); |
|
1622 addsd(xmm6, xmm4); |
|
1623 lea(tmp4, ExternalAddress(T_exp)); |
|
1624 addpd(xmm3, xmm0); |
|
1625 movdl(edx, xmm6); |
|
1626 subsd(xmm6, xmm7); |
|
1627 pshufd(xmm0, xmm3, 238); |
|
1628 subsd(xmm4, xmm6); |
|
1629 addsd(xmm0, xmm3); |
|
1630 movl(ecx, edx); |
|
1631 andl(edx, 255); |
|
1632 addl(edx, edx); |
|
1633 movdqu(xmm5, Address(tmp4, edx, Address::times_8, 0)); |
|
1634 addsd(xmm4, xmm1); |
|
1635 mulsd(xmm2, xmm0); |
|
1636 movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL |
|
1637 movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL |
|
1638 shll(ecx, 12); |
|
1639 xorl(ecx, tmp1); |
|
1640 andl(rcx, -1048576); |
|
1641 movdq(xmm6, rcx); |
|
1642 addsd(xmm2, xmm4); |
|
1643 mov64(tmp2, 0x3fe62e42fefa39ef); |
|
1644 movdq(xmm1, tmp2); |
|
1645 pshufd(xmm0, xmm2, 68); |
|
1646 pshufd(xmm4, xmm2, 68); |
|
1647 mulsd(xmm1, xmm2); |
|
1648 pshufd(xmm6, xmm6, 17); |
|
1649 mulpd(xmm0, xmm0); |
|
1650 mulpd(xmm7, xmm4); |
|
1651 paddd(xmm5, xmm6); |
|
1652 mulsd(xmm1, xmm5); |
|
1653 pshufd(xmm6, xmm5, 238); |
|
1654 mulsd(xmm0, xmm0); |
|
1655 addpd(xmm3, xmm7); |
|
1656 addsd(xmm1, xmm6); |
|
1657 mulpd(xmm0, xmm3); |
|
1658 pshufd(xmm3, xmm0, 238); |
|
1659 mulsd(xmm0, xmm5); |
|
1660 mulsd(xmm3, xmm5); |
|
1661 addsd(xmm0, xmm1); |
|
1662 addsd(xmm0, xmm3); |
|
1663 addsd(xmm0, xmm5); |
|
1664 jmp(B1_5); |
|
1665 |
|
1666 bind(L_2TAG_PACKET_0_0_2); |
|
1667 addl(eax, 16); |
|
1668 movl(edx, 32752); |
|
1669 andl(edx, eax); |
|
1670 cmpl(edx, 32752); |
|
1671 jcc(Assembler::equal, L_2TAG_PACKET_6_0_2); |
|
1672 testl(eax, 32768); |
|
1673 jcc(Assembler::notEqual, L_2TAG_PACKET_7_0_2); |
|
1674 |
|
1675 bind(L_2TAG_PACKET_8_0_2); |
|
1676 movq(xmm0, Address(rsp, 8)); |
|
1677 movq(xmm3, Address(rsp, 8)); |
|
1678 movdl(edx, xmm3); |
|
1679 psrlq(xmm3, 32); |
|
1680 movdl(ecx, xmm3); |
|
1681 orl(edx, ecx); |
|
1682 cmpl(edx, 0); |
|
1683 jcc(Assembler::equal, L_2TAG_PACKET_9_0_2); |
|
1684 xorpd(xmm3, xmm3); |
|
1685 movl(eax, 18416); |
|
1686 pinsrw(xmm3, eax, 3); |
|
1687 mulsd(xmm0, xmm3); |
|
1688 xorpd(xmm2, xmm2); |
|
1689 movl(eax, 16368); |
|
1690 pinsrw(xmm2, eax, 3); |
|
1691 movdqu(xmm3, xmm0); |
|
1692 pextrw(eax, xmm0, 3); |
|
1693 por(xmm0, xmm2); |
|
1694 movl(ecx, 18416); |
|
1695 psrlq(xmm0, 27); |
|
1696 movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL |
|
1697 psrld(xmm0, 2); |
|
1698 rcpps(xmm0, xmm0); |
|
1699 psllq(xmm3, 12); |
|
1700 movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL |
|
1701 psrlq(xmm3, 12); |
|
1702 mulss(xmm0, xmm7); |
|
1703 movl(edx, -1024); |
|
1704 movdl(xmm5, edx); |
|
1705 por(xmm3, xmm1); |
|
1706 paddd(xmm0, xmm4); |
|
1707 psllq(xmm5, 32); |
|
1708 movdl(edx, xmm0); |
|
1709 psllq(xmm0, 29); |
|
1710 pand(xmm5, xmm3); |
|
1711 movl(tmp1, 0); |
|
1712 pand(xmm0, xmm6); |
|
1713 subsd(xmm3, xmm5); |
|
1714 andl(eax, 32752); |
|
1715 subl(eax, 18416); |
|
1716 sarl(eax, 4); |
|
1717 cvtsi2sdl(xmm7, eax); |
|
1718 mulpd(xmm5, xmm0); |
|
1719 jmp(L_2TAG_PACKET_4_0_2); |
|
1720 |
|
1721 bind(L_2TAG_PACKET_10_0_2); |
|
1722 movq(xmm0, Address(rsp, 8)); |
|
1723 movq(xmm3, Address(rsp, 8)); |
|
1724 movdl(edx, xmm3); |
|
1725 psrlq(xmm3, 32); |
|
1726 movdl(ecx, xmm3); |
|
1727 orl(edx, ecx); |
|
1728 cmpl(edx, 0); |
|
1729 jcc(Assembler::equal, L_2TAG_PACKET_9_0_2); |
|
1730 xorpd(xmm3, xmm3); |
|
1731 movl(eax, 18416); |
|
1732 pinsrw(xmm3, eax, 3); |
|
1733 mulsd(xmm0, xmm3); |
|
1734 xorpd(xmm2, xmm2); |
|
1735 movl(eax, 16368); |
|
1736 pinsrw(xmm2, eax, 3); |
|
1737 movdqu(xmm3, xmm0); |
|
1738 pextrw(eax, xmm0, 3); |
|
1739 por(xmm0, xmm2); |
|
1740 movl(ecx, 18416); |
|
1741 psrlq(xmm0, 27); |
|
1742 movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL |
|
1743 psrld(xmm0, 2); |
|
1744 rcpps(xmm0, xmm0); |
|
1745 psllq(xmm3, 12); |
|
1746 movdqu(xmm6, ExternalAddress(HIGHSIGMASK)); //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL |
|
1747 psrlq(xmm3, 12); |
|
1748 mulss(xmm0, xmm7); |
|
1749 movl(edx, -1024); |
|
1750 movdl(xmm5, edx); |
|
1751 por(xmm3, xmm1); |
|
1752 paddd(xmm0, xmm4); |
|
1753 psllq(xmm5, 32); |
|
1754 movdl(edx, xmm0); |
|
1755 psllq(xmm0, 29); |
|
1756 pand(xmm5, xmm3); |
|
1757 movl(tmp1, INT_MIN); |
|
1758 pand(xmm0, xmm6); |
|
1759 subsd(xmm3, xmm5); |
|
1760 andl(eax, 32752); |
|
1761 subl(eax, 18416); |
|
1762 sarl(eax, 4); |
|
1763 cvtsi2sdl(xmm7, eax); |
|
1764 mulpd(xmm5, xmm0); |
|
1765 jmp(L_2TAG_PACKET_4_0_2); |
|
1766 |
|
1767 bind(L_2TAG_PACKET_5_0_2); |
|
1768 cmpl(eax, 0); |
|
1769 jcc(Assembler::less, L_2TAG_PACKET_11_0_2); |
|
1770 cmpl(eax, 752); |
|
1771 jcc(Assembler::aboveEqual, L_2TAG_PACKET_12_0_2); |
|
1772 addsd(xmm0, xmm7); |
|
1773 movq(xmm2, ExternalAddress(HALFMASK)); //0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL |
|
1774 addpd(xmm3, xmm0); |
|
1775 xorpd(xmm6, xmm6); |
|
1776 movl(eax, 17080); |
|
1777 pinsrw(xmm6, eax, 3); |
|
1778 pshufd(xmm0, xmm3, 238); |
|
1779 addsd(xmm0, xmm3); |
|
1780 movdqu(xmm3, xmm5); |
|
1781 addsd(xmm5, xmm0); |
|
1782 movdqu(xmm4, xmm2); |
|
1783 subsd(xmm3, xmm5); |
|
1784 movdqu(xmm7, xmm5); |
|
1785 pand(xmm5, xmm2); |
|
1786 movdqu(xmm2, xmm1); |
|
1787 pand(xmm4, xmm1); |
|
1788 subsd(xmm7, xmm5); |
|
1789 addsd(xmm0, xmm3); |
|
1790 subsd(xmm1, xmm4); |
|
1791 mulsd(xmm4, xmm5); |
|
1792 addsd(xmm0, xmm7); |
|
1793 mulsd(xmm2, xmm0); |
|
1794 movdqu(xmm7, xmm6); |
|
1795 mulsd(xmm1, xmm5); |
|
1796 addsd(xmm6, xmm4); |
|
1797 movdl(eax, xmm6); |
|
1798 subsd(xmm6, xmm7); |
|
1799 lea(tmp4, ExternalAddress(T_exp)); |
|
1800 addsd(xmm2, xmm1); |
|
1801 movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL |
|
1802 movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL |
|
1803 subsd(xmm4, xmm6); |
|
1804 pextrw(edx, xmm6, 3); |
|
1805 movl(ecx, eax); |
|
1806 andl(eax, 255); |
|
1807 addl(eax, eax); |
|
1808 movdqu(xmm5, Address(tmp4, rax, Address::times_8, 0)); |
|
1809 addsd(xmm2, xmm4); |
|
1810 sarl(ecx, 8); |
|
1811 movl(eax, ecx); |
|
1812 sarl(ecx, 1); |
|
1813 subl(eax, ecx); |
|
1814 shll(ecx, 20); |
|
1815 xorl(ecx, tmp1); |
|
1816 movdl(xmm6, ecx); |
|
1817 movq(xmm1, ExternalAddress(32 + e_coeff)); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL |
|
1818 andl(edx, 32767); |
|
1819 cmpl(edx, 16529); |
|
1820 jcc(Assembler::above, L_2TAG_PACKET_12_0_2); |
|
1821 pshufd(xmm0, xmm2, 68); |
|
1822 pshufd(xmm4, xmm2, 68); |
|
1823 mulpd(xmm0, xmm0); |
|
1824 mulpd(xmm7, xmm4); |
|
1825 pshufd(xmm6, xmm6, 17); |
|
1826 mulsd(xmm1, xmm2); |
|
1827 mulsd(xmm0, xmm0); |
|
1828 paddd(xmm5, xmm6); |
|
1829 addpd(xmm3, xmm7); |
|
1830 mulsd(xmm1, xmm5); |
|
1831 pshufd(xmm6, xmm5, 238); |
|
1832 mulpd(xmm0, xmm3); |
|
1833 addsd(xmm1, xmm6); |
|
1834 pshufd(xmm3, xmm0, 238); |
|
1835 mulsd(xmm0, xmm5); |
|
1836 mulsd(xmm3, xmm5); |
|
1837 shll(eax, 4); |
|
1838 xorpd(xmm4, xmm4); |
|
1839 addl(eax, 16368); |
|
1840 pinsrw(xmm4, eax, 3); |
|
1841 addsd(xmm0, xmm1); |
|
1842 addsd(xmm0, xmm3); |
|
1843 movdqu(xmm1, xmm0); |
|
1844 addsd(xmm0, xmm5); |
|
1845 mulsd(xmm0, xmm4); |
|
1846 pextrw(eax, xmm0, 3); |
|
1847 andl(eax, 32752); |
|
1848 jcc(Assembler::equal, L_2TAG_PACKET_13_0_2); |
|
1849 cmpl(eax, 32752); |
|
1850 jcc(Assembler::equal, L_2TAG_PACKET_14_0_2); |
|
1851 jmp(B1_5); |
|
1852 |
|
1853 bind(L_2TAG_PACKET_6_0_2); |
|
1854 movq(xmm1, Address(rsp, 16)); |
|
1855 movq(xmm0, Address(rsp, 8)); |
|
1856 movdqu(xmm2, xmm0); |
|
1857 movdl(eax, xmm2); |
|
1858 psrlq(xmm2, 20); |
|
1859 movdl(edx, xmm2); |
|
1860 orl(eax, edx); |
|
1861 jcc(Assembler::equal, L_2TAG_PACKET_15_0_2); |
|
1862 movdl(eax, xmm1); |
|
1863 psrlq(xmm1, 32); |
|
1864 movdl(edx, xmm1); |
|
1865 movl(ecx, edx); |
|
1866 addl(edx, edx); |
|
1867 orl(eax, edx); |
|
1868 jcc(Assembler::equal, L_2TAG_PACKET_16_0_2); |
|
1869 addsd(xmm0, xmm0); |
|
1870 jmp(B1_5); |
|
1871 |
|
1872 bind(L_2TAG_PACKET_16_0_2); |
|
1873 xorpd(xmm0, xmm0); |
|
1874 movl(eax, 16368); |
|
1875 pinsrw(xmm0, eax, 3); |
|
1876 movl(Address(rsp, 0), 29); |
|
1877 jmp(L_2TAG_PACKET_17_0_2); |
|
1878 |
|
1879 bind(L_2TAG_PACKET_18_0_2); |
|
1880 movq(xmm0, Address(rsp, 16)); |
|
1881 addpd(xmm0, xmm0); |
|
1882 jmp(B1_5); |
|
1883 |
|
1884 bind(L_2TAG_PACKET_15_0_2); |
|
1885 movdl(eax, xmm1); |
|
1886 movdqu(xmm2, xmm1); |
|
1887 psrlq(xmm1, 32); |
|
1888 movdl(edx, xmm1); |
|
1889 movl(ecx, edx); |
|
1890 addl(edx, edx); |
|
1891 orl(eax, edx); |
|
1892 jcc(Assembler::equal, L_2TAG_PACKET_19_0_2); |
|
1893 pextrw(eax, xmm2, 3); |
|
1894 andl(eax, 32752); |
|
1895 cmpl(eax, 32752); |
|
1896 jcc(Assembler::notEqual, L_2TAG_PACKET_20_0_2); |
|
1897 movdl(eax, xmm2); |
|
1898 psrlq(xmm2, 20); |
|
1899 movdl(edx, xmm2); |
|
1900 orl(eax, edx); |
|
1901 jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2); |
|
1902 |
|
1903 bind(L_2TAG_PACKET_20_0_2); |
|
1904 pextrw(eax, xmm0, 3); |
|
1905 testl(eax, 32768); |
|
1906 jcc(Assembler::notEqual, L_2TAG_PACKET_21_0_2); |
|
1907 testl(ecx, INT_MIN); |
|
1908 jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2); |
|
1909 jmp(B1_5); |
|
1910 |
|
1911 bind(L_2TAG_PACKET_23_0_2); |
|
1912 movq(xmm1, Address(rsp, 16)); |
|
1913 movdl(eax, xmm1); |
|
1914 testl(eax, 1); |
|
1915 jcc(Assembler::notEqual, L_2TAG_PACKET_24_0_2); |
|
1916 testl(eax, 2); |
|
1917 jcc(Assembler::notEqual, L_2TAG_PACKET_25_0_2); |
|
1918 jmp(L_2TAG_PACKET_24_0_2); |
|
1919 |
|
1920 bind(L_2TAG_PACKET_21_0_2); |
|
1921 shrl(ecx, 20); |
|
1922 andl(ecx, 2047); |
|
1923 cmpl(ecx, 1075); |
|
1924 jcc(Assembler::above, L_2TAG_PACKET_24_0_2); |
|
1925 jcc(Assembler::equal, L_2TAG_PACKET_26_0_2); |
|
1926 cmpl(ecx, 1074); |
|
1927 jcc(Assembler::above, L_2TAG_PACKET_23_0_2); |
|
1928 cmpl(ecx, 1023); |
|
1929 jcc(Assembler::below, L_2TAG_PACKET_24_0_2); |
|
1930 movq(xmm1, Address(rsp, 16)); |
|
1931 movl(eax, 17208); |
|
1932 xorpd(xmm3, xmm3); |
|
1933 pinsrw(xmm3, eax, 3); |
|
1934 movdqu(xmm4, xmm3); |
|
1935 addsd(xmm3, xmm1); |
|
1936 subsd(xmm4, xmm3); |
|
1937 addsd(xmm1, xmm4); |
|
1938 pextrw(eax, xmm1, 3); |
|
1939 andl(eax, 32752); |
|
1940 jcc(Assembler::notEqual, L_2TAG_PACKET_24_0_2); |
|
1941 movdl(eax, xmm3); |
|
1942 andl(eax, 1); |
|
1943 jcc(Assembler::equal, L_2TAG_PACKET_24_0_2); |
|
1944 |
|
1945 bind(L_2TAG_PACKET_25_0_2); |
|
1946 movq(xmm1, Address(rsp, 16)); |
|
1947 pextrw(eax, xmm1, 3); |
|
1948 andl(eax, 32768); |
|
1949 jcc(Assembler::notEqual, L_2TAG_PACKET_27_0_2); |
|
1950 jmp(B1_5); |
|
1951 |
|
1952 bind(L_2TAG_PACKET_27_0_2); |
|
1953 xorpd(xmm0, xmm0); |
|
1954 movl(eax, 32768); |
|
1955 pinsrw(xmm0, eax, 3); |
|
1956 jmp(B1_5); |
|
1957 |
|
1958 bind(L_2TAG_PACKET_24_0_2); |
|
1959 movq(xmm1, Address(rsp, 16)); |
|
1960 pextrw(eax, xmm1, 3); |
|
1961 andl(eax, 32768); |
|
1962 jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2); |
|
1963 xorpd(xmm0, xmm0); |
|
1964 movl(eax, 32752); |
|
1965 pinsrw(xmm0, eax, 3); |
|
1966 jmp(B1_5); |
|
1967 |
|
1968 bind(L_2TAG_PACKET_26_0_2); |
|
1969 movq(xmm1, Address(rsp, 16)); |
|
1970 movdl(eax, xmm1); |
|
1971 andl(eax, 1); |
|
1972 jcc(Assembler::equal, L_2TAG_PACKET_24_0_2); |
|
1973 jmp(L_2TAG_PACKET_25_0_2); |
|
1974 |
|
1975 bind(L_2TAG_PACKET_28_0_2); |
|
1976 movdl(eax, xmm1); |
|
1977 psrlq(xmm1, 20); |
|
1978 movdl(edx, xmm1); |
|
1979 orl(eax, edx); |
|
1980 jcc(Assembler::equal, L_2TAG_PACKET_29_0_2); |
|
1981 movq(xmm0, Address(rsp, 16)); |
|
1982 addsd(xmm0, xmm0); |
|
1983 jmp(B1_5); |
|
1984 |
|
1985 bind(L_2TAG_PACKET_29_0_2); |
|
1986 movq(xmm0, Address(rsp, 8)); |
|
1987 pextrw(eax, xmm0, 3); |
|
1988 cmpl(eax, 49136); |
|
1989 jcc(Assembler::notEqual, L_2TAG_PACKET_30_0_2); |
|
1990 movdl(ecx, xmm0); |
|
1991 psrlq(xmm0, 20); |
|
1992 movdl(edx, xmm0); |
|
1993 orl(ecx, edx); |
|
1994 jcc(Assembler::notEqual, L_2TAG_PACKET_30_0_2); |
|
1995 xorpd(xmm0, xmm0); |
|
1996 movl(eax, 32760); |
|
1997 pinsrw(xmm0, eax, 3); |
|
1998 jmp(B1_5); |
|
1999 |
|
2000 bind(L_2TAG_PACKET_30_0_2); |
|
2001 movq(xmm1, Address(rsp, 16)); |
|
2002 andl(eax, 32752); |
|
2003 subl(eax, 16368); |
|
2004 pextrw(edx, xmm1, 3); |
|
2005 xorpd(xmm0, xmm0); |
|
2006 xorl(eax, edx); |
|
2007 andl(eax, 32768); |
|
2008 jcc(Assembler::equal, L_2TAG_PACKET_31_0_2); |
|
2009 jmp(B1_5); |
|
2010 |
|
2011 bind(L_2TAG_PACKET_31_0_2); |
|
2012 movl(ecx, 32752); |
|
2013 pinsrw(xmm0, ecx, 3); |
|
2014 jmp(B1_5); |
|
2015 |
|
2016 bind(L_2TAG_PACKET_32_0_2); |
|
2017 movdl(eax, xmm1); |
|
2018 cmpl(edx, 17184); |
|
2019 jcc(Assembler::above, L_2TAG_PACKET_33_0_2); |
|
2020 testl(eax, 1); |
|
2021 jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2); |
|
2022 testl(eax, 2); |
|
2023 jcc(Assembler::equal, L_2TAG_PACKET_35_0_2); |
|
2024 jmp(L_2TAG_PACKET_36_0_2); |
|
2025 |
|
2026 bind(L_2TAG_PACKET_33_0_2); |
|
2027 testl(eax, 1); |
|
2028 jcc(Assembler::equal, L_2TAG_PACKET_35_0_2); |
|
2029 jmp(L_2TAG_PACKET_36_0_2); |
|
2030 |
|
2031 bind(L_2TAG_PACKET_7_0_2); |
|
2032 movq(xmm2, Address(rsp, 8)); |
|
2033 movdl(eax, xmm2); |
|
2034 psrlq(xmm2, 31); |
|
2035 movdl(ecx, xmm2); |
|
2036 orl(eax, ecx); |
|
2037 jcc(Assembler::equal, L_2TAG_PACKET_9_0_2); |
|
2038 movq(xmm1, Address(rsp, 16)); |
|
2039 pextrw(edx, xmm1, 3); |
|
2040 movdl(eax, xmm1); |
|
2041 movdqu(xmm2, xmm1); |
|
2042 psrlq(xmm2, 32); |
|
2043 movdl(ecx, xmm2); |
|
2044 addl(ecx, ecx); |
|
2045 orl(ecx, eax); |
|
2046 jcc(Assembler::equal, L_2TAG_PACKET_37_0_2); |
|
2047 andl(edx, 32752); |
|
2048 cmpl(edx, 32752); |
|
2049 jcc(Assembler::equal, L_2TAG_PACKET_28_0_2); |
|
2050 cmpl(edx, 17200); |
|
2051 jcc(Assembler::above, L_2TAG_PACKET_35_0_2); |
|
2052 cmpl(edx, 17184); |
|
2053 jcc(Assembler::aboveEqual, L_2TAG_PACKET_32_0_2); |
|
2054 cmpl(edx, 16368); |
|
2055 jcc(Assembler::below, L_2TAG_PACKET_34_0_2); |
|
2056 movl(eax, 17208); |
|
2057 xorpd(xmm2, xmm2); |
|
2058 pinsrw(xmm2, eax, 3); |
|
2059 movdqu(xmm4, xmm2); |
|
2060 addsd(xmm2, xmm1); |
|
2061 subsd(xmm4, xmm2); |
|
2062 addsd(xmm1, xmm4); |
|
2063 pextrw(eax, xmm1, 3); |
|
2064 andl(eax, 32767); |
|
2065 jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2); |
|
2066 movdl(eax, xmm2); |
|
2067 andl(eax, 1); |
|
2068 jcc(Assembler::equal, L_2TAG_PACKET_35_0_2); |
|
2069 |
|
2070 bind(L_2TAG_PACKET_36_0_2); |
|
2071 xorpd(xmm1, xmm1); |
|
2072 movl(edx, 30704); |
|
2073 pinsrw(xmm1, edx, 3); |
|
2074 movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL |
|
2075 movq(xmm4, Address(rsp, 8)); |
|
2076 pextrw(eax, xmm4, 3); |
|
2077 movl(edx, 8192); |
|
2078 movdl(xmm4, edx); |
|
2079 andl(eax, 32767); |
|
2080 subl(eax, 16); |
|
2081 jcc(Assembler::less, L_2TAG_PACKET_10_0_2); |
|
2082 movl(edx, eax); |
|
2083 andl(edx, 32752); |
|
2084 subl(edx, 16368); |
|
2085 movl(ecx, edx); |
|
2086 sarl(edx, 31); |
|
2087 addl(ecx, edx); |
|
2088 xorl(ecx, edx); |
|
2089 addl(ecx, 16); |
|
2090 bsrl(ecx, ecx); |
|
2091 movl(tmp1, INT_MIN); |
|
2092 jmp(L_2TAG_PACKET_1_0_2); |
|
2093 |
|
2094 bind(L_2TAG_PACKET_34_0_2); |
|
2095 xorpd(xmm1, xmm1); |
|
2096 movl(eax, 32752); |
|
2097 pinsrw(xmm1, eax, 3); |
|
2098 xorpd(xmm0, xmm0); |
|
2099 mulsd(xmm0, xmm1); |
|
2100 movl(Address(rsp, 0), 28); |
|
2101 jmp(L_2TAG_PACKET_17_0_2); |
|
2102 |
|
2103 bind(L_2TAG_PACKET_35_0_2); |
|
2104 xorpd(xmm1, xmm1); |
|
2105 movl(edx, 30704); |
|
2106 pinsrw(xmm1, edx, 3); |
|
2107 movq(xmm2, ExternalAddress(LOG2_E)); //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL |
|
2108 movq(xmm4, Address(rsp, 8)); |
|
2109 pextrw(eax, xmm4, 3); |
|
2110 movl(edx, 8192); |
|
2111 movdl(xmm4, edx); |
|
2112 andl(eax, 32767); |
|
2113 subl(eax, 16); |
|
2114 jcc(Assembler::less, L_2TAG_PACKET_8_0_2); |
|
2115 movl(edx, eax); |
|
2116 andl(edx, 32752); |
|
2117 subl(edx, 16368); |
|
2118 movl(ecx, edx); |
|
2119 sarl(edx, 31); |
|
2120 addl(ecx, edx); |
|
2121 xorl(ecx, edx); |
|
2122 addl(ecx, 16); |
|
2123 bsrl(ecx, ecx); |
|
2124 movl(tmp1, 0); |
|
2125 jmp(L_2TAG_PACKET_1_0_2); |
|
2126 |
|
2127 bind(L_2TAG_PACKET_19_0_2); |
|
2128 xorpd(xmm0, xmm0); |
|
2129 movl(eax, 16368); |
|
2130 pinsrw(xmm0, eax, 3); |
|
2131 jmp(B1_5); |
|
2132 |
|
2133 bind(L_2TAG_PACKET_22_0_2); |
|
2134 xorpd(xmm0, xmm0); |
|
2135 jmp(B1_5); |
|
2136 |
|
2137 bind(L_2TAG_PACKET_11_0_2); |
|
2138 addl(eax, 384); |
|
2139 cmpl(eax, 0); |
|
2140 jcc(Assembler::less, L_2TAG_PACKET_38_0_2); |
|
2141 mulsd(xmm5, xmm1); |
|
2142 addsd(xmm0, xmm7); |
|
2143 shrl(tmp1, 31); |
|
2144 addpd(xmm3, xmm0); |
|
2145 pshufd(xmm0, xmm3, 238); |
|
2146 addsd(xmm3, xmm0); |
|
2147 lea(tmp4, ExternalAddress(log2)); //0xfefa39efUL, 0x3fe62e42UL, 0xfefa39efUL, 0xbfe62e42UL |
|
2148 movq(xmm4, Address(tmp4, tmp1, Address::times_8, 0)); |
|
2149 mulsd(xmm1, xmm3); |
|
2150 xorpd(xmm0, xmm0); |
|
2151 movl(eax, 16368); |
|
2152 shll(tmp1, 15); |
|
2153 orl(eax, tmp1); |
|
2154 pinsrw(xmm0, eax, 3); |
|
2155 addsd(xmm5, xmm1); |
|
2156 mulsd(xmm5, xmm4); |
|
2157 addsd(xmm0, xmm5); |
|
2158 jmp(B1_5); |
|
2159 |
|
2160 bind(L_2TAG_PACKET_38_0_2); |
|
2161 |
|
2162 bind(L_2TAG_PACKET_37_0_2); |
|
2163 xorpd(xmm0, xmm0); |
|
2164 movl(eax, 16368); |
|
2165 pinsrw(xmm0, eax, 3); |
|
2166 jmp(B1_5); |
|
2167 |
|
2168 bind(L_2TAG_PACKET_39_0_2); |
|
2169 xorpd(xmm0, xmm0); |
|
2170 movl(eax, 16368); |
|
2171 pinsrw(xmm0, eax, 3); |
|
2172 movl(Address(rsp, 0), 26); |
|
2173 jmp(L_2TAG_PACKET_17_0_2); |
|
2174 |
|
2175 bind(L_2TAG_PACKET_9_0_2); |
|
2176 movq(xmm1, Address(rsp, 16)); |
|
2177 movdqu(xmm2, xmm1); |
|
2178 pextrw(eax, xmm1, 3); |
|
2179 andl(eax, 32752); |
|
2180 cmpl(eax, 32752); |
|
2181 jcc(Assembler::notEqual, L_2TAG_PACKET_40_0_2); |
|
2182 movdl(eax, xmm2); |
|
2183 psrlq(xmm2, 20); |
|
2184 movdl(edx, xmm2); |
|
2185 orl(eax, edx); |
|
2186 jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2); |
|
2187 |
|
2188 bind(L_2TAG_PACKET_40_0_2); |
|
2189 movdl(eax, xmm1); |
|
2190 psrlq(xmm1, 32); |
|
2191 movdl(edx, xmm1); |
|
2192 movl(ecx, edx); |
|
2193 addl(edx, edx); |
|
2194 orl(eax, edx); |
|
2195 jcc(Assembler::equal, L_2TAG_PACKET_39_0_2); |
|
2196 shrl(edx, 21); |
|
2197 cmpl(edx, 1075); |
|
2198 jcc(Assembler::above, L_2TAG_PACKET_41_0_2); |
|
2199 jcc(Assembler::equal, L_2TAG_PACKET_42_0_2); |
|
2200 cmpl(edx, 1023); |
|
2201 jcc(Assembler::below, L_2TAG_PACKET_41_0_2); |
|
2202 movq(xmm1, Address(rsp, 16)); |
|
2203 movl(eax, 17208); |
|
2204 xorpd(xmm3, xmm3); |
|
2205 pinsrw(xmm3, eax, 3); |
|
2206 movdqu(xmm4, xmm3); |
|
2207 addsd(xmm3, xmm1); |
|
2208 subsd(xmm4, xmm3); |
|
2209 addsd(xmm1, xmm4); |
|
2210 pextrw(eax, xmm1, 3); |
|
2211 andl(eax, 32752); |
|
2212 jcc(Assembler::notEqual, L_2TAG_PACKET_41_0_2); |
|
2213 movdl(eax, xmm3); |
|
2214 andl(eax, 1); |
|
2215 jcc(Assembler::equal, L_2TAG_PACKET_41_0_2); |
|
2216 |
|
2217 bind(L_2TAG_PACKET_43_0_2); |
|
2218 movq(xmm0, Address(rsp, 8)); |
|
2219 testl(ecx, INT_MIN); |
|
2220 jcc(Assembler::notEqual, L_2TAG_PACKET_44_0_2); |
|
2221 jmp(B1_5); |
|
2222 |
|
2223 bind(L_2TAG_PACKET_42_0_2); |
|
2224 movq(xmm1, Address(rsp, 16)); |
|
2225 movdl(eax, xmm1); |
|
2226 testl(eax, 1); |
|
2227 jcc(Assembler::notEqual, L_2TAG_PACKET_43_0_2); |
|
2228 |
|
2229 bind(L_2TAG_PACKET_41_0_2); |
|
2230 testl(ecx, INT_MIN); |
|
2231 jcc(Assembler::equal, L_2TAG_PACKET_22_0_2); |
|
2232 xorpd(xmm0, xmm0); |
|
2233 |
|
2234 bind(L_2TAG_PACKET_44_0_2); |
|
2235 movl(eax, 16368); |
|
2236 xorpd(xmm1, xmm1); |
|
2237 pinsrw(xmm1, eax, 3); |
|
2238 divsd(xmm1, xmm0); |
|
2239 movdqu(xmm0, xmm1); |
|
2240 movl(Address(rsp, 0), 27); |
|
2241 jmp(L_2TAG_PACKET_17_0_2); |
|
2242 |
|
2243 bind(L_2TAG_PACKET_12_0_2); |
|
2244 movq(xmm2, Address(rsp, 8)); |
|
2245 movq(xmm6, Address(rsp, 16)); |
|
2246 pextrw(eax, xmm2, 3); |
|
2247 pextrw(edx, xmm6, 3); |
|
2248 movl(ecx, 32752); |
|
2249 andl(ecx, edx); |
|
2250 cmpl(ecx, 32752); |
|
2251 jcc(Assembler::equal, L_2TAG_PACKET_45_0_2); |
|
2252 andl(eax, 32752); |
|
2253 subl(eax, 16368); |
|
2254 xorl(edx, eax); |
|
2255 testl(edx, 32768); |
|
2256 jcc(Assembler::notEqual, L_2TAG_PACKET_46_0_2); |
|
2257 |
|
2258 bind(L_2TAG_PACKET_47_0_2); |
|
2259 movl(eax, 32736); |
|
2260 pinsrw(xmm0, eax, 3); |
|
2261 shrl(tmp1, 16); |
|
2262 orl(eax, tmp1); |
|
2263 pinsrw(xmm1, eax, 3); |
|
2264 mulsd(xmm0, xmm1); |
|
2265 |
|
2266 bind(L_2TAG_PACKET_14_0_2); |
|
2267 movl(Address(rsp, 0), 24); |
|
2268 jmp(L_2TAG_PACKET_17_0_2); |
|
2269 |
|
2270 bind(L_2TAG_PACKET_46_0_2); |
|
2271 movl(eax, 16); |
|
2272 pinsrw(xmm0, eax, 3); |
|
2273 mulsd(xmm0, xmm0); |
|
2274 testl(tmp1, INT_MIN); |
|
2275 jcc(Assembler::equal, L_2TAG_PACKET_48_0_2); |
|
2276 mov64(tmp2, 0x8000000000000000); |
|
2277 movdq(xmm2, tmp2); |
|
2278 xorpd(xmm0, xmm2); |
|
2279 |
|
2280 bind(L_2TAG_PACKET_48_0_2); |
|
2281 movl(Address(rsp, 0), 25); |
|
2282 jmp(L_2TAG_PACKET_17_0_2); |
|
2283 |
|
2284 bind(L_2TAG_PACKET_13_0_2); |
|
2285 pextrw(ecx, xmm5, 3); |
|
2286 pextrw(edx, xmm4, 3); |
|
2287 movl(eax, -1); |
|
2288 andl(ecx, 32752); |
|
2289 subl(ecx, 16368); |
|
2290 andl(edx, 32752); |
|
2291 addl(edx, ecx); |
|
2292 movl(ecx, -31); |
|
2293 sarl(edx, 4); |
|
2294 subl(ecx, edx); |
|
2295 jcc(Assembler::lessEqual, L_2TAG_PACKET_49_0_2); |
|
2296 cmpl(ecx, 20); |
|
2297 jcc(Assembler::above, L_2TAG_PACKET_50_0_2); |
|
2298 shll(eax); |
|
2299 |
|
2300 bind(L_2TAG_PACKET_49_0_2); |
|
2301 movdl(xmm0, eax); |
|
2302 psllq(xmm0, 32); |
|
2303 pand(xmm0, xmm5); |
|
2304 subsd(xmm5, xmm0); |
|
2305 addsd(xmm5, xmm1); |
|
2306 mulsd(xmm0, xmm4); |
|
2307 mulsd(xmm5, xmm4); |
|
2308 addsd(xmm0, xmm5); |
|
2309 |
|
2310 bind(L_2TAG_PACKET_50_0_2); |
|
2311 jmp(L_2TAG_PACKET_48_0_2); |
|
2312 |
|
2313 bind(L_2TAG_PACKET_2_0_2); |
|
2314 movw(ecx, Address(rsp, 22)); |
|
2315 movl(edx, INT_MIN); |
|
2316 movdl(xmm1, rdx); |
|
2317 xorpd(xmm7, xmm7); |
|
2318 paddd(xmm0, xmm4); |
|
2319 movdl(edx, xmm0); |
|
2320 psllq(xmm0, 29); |
|
2321 paddq(xmm1, xmm3); |
|
2322 pand(xmm5, xmm1); |
|
2323 andl(ecx, 32752); |
|
2324 cmpl(ecx, 16560); |
|
2325 jcc(Assembler::less, L_2TAG_PACKET_3_0_2); |
|
2326 pand(xmm0, xmm6); |
|
2327 subsd(xmm3, xmm5); |
|
2328 addl(eax, 16351); |
|
2329 shrl(eax, 4); |
|
2330 subl(eax, 1022); |
|
2331 cvtsi2sdl(xmm7, eax); |
|
2332 mulpd(xmm5, xmm0); |
|
2333 lea(r11, ExternalAddress(L_tbl)); |
|
2334 movq(xmm4, ExternalAddress(coeff_h)); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL |
|
2335 mulsd(xmm3, xmm0); |
|
2336 movq(xmm6, ExternalAddress(coeff_h)); //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL |
|
2337 subsd(xmm5, xmm2); |
|
2338 movq(xmm1, ExternalAddress(8 + coeff_h)); //0x00000000UL, 0xbf5dabe1UL |
|
2339 pshufd(xmm2, xmm3, 68); |
|
2340 unpcklpd(xmm5, xmm3); |
|
2341 addsd(xmm3, xmm5); |
|
2342 movq(xmm0, ExternalAddress(8 + coeff_h)); //0x00000000UL, 0xbf5dabe1UL |
|
2343 andl(edx, 16760832); |
|
2344 shrl(edx, 10); |
|
2345 addpd(xmm7, Address(tmp4, edx, Address::times_1, -3648)); |
|
2346 mulsd(xmm4, xmm5); |
|
2347 mulsd(xmm0, xmm5); |
|
2348 mulsd(xmm6, xmm2); |
|
2349 mulsd(xmm1, xmm2); |
|
2350 movdqu(xmm2, xmm5); |
|
2351 mulsd(xmm4, xmm5); |
|
2352 addsd(xmm5, xmm0); |
|
2353 movdqu(xmm0, xmm7); |
|
2354 addsd(xmm2, xmm3); |
|
2355 addsd(xmm7, xmm5); |
|
2356 mulsd(xmm6, xmm2); |
|
2357 subsd(xmm0, xmm7); |
|
2358 movdqu(xmm2, xmm7); |
|
2359 addsd(xmm7, xmm4); |
|
2360 addsd(xmm0, xmm5); |
|
2361 subsd(xmm2, xmm7); |
|
2362 addsd(xmm4, xmm2); |
|
2363 pshufd(xmm2, xmm5, 238); |
|
2364 movdqu(xmm5, xmm7); |
|
2365 addsd(xmm7, xmm2); |
|
2366 addsd(xmm4, xmm0); |
|
2367 movdqu(xmm0, ExternalAddress(coeff)); //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL |
|
2368 subsd(xmm5, xmm7); |
|
2369 addsd(xmm6, xmm4); |
|
2370 movdqu(xmm4, xmm7); |
|
2371 addsd(xmm5, xmm2); |
|
2372 addsd(xmm7, xmm1); |
|
2373 movdqu(xmm2, ExternalAddress(64 + coeff)); //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL |
|
2374 subsd(xmm4, xmm7); |
|
2375 addsd(xmm6, xmm5); |
|
2376 addsd(xmm4, xmm1); |
|
2377 pshufd(xmm5, xmm7, 238); |
|
2378 movapd(xmm1, xmm7); |
|
2379 addsd(xmm7, xmm5); |
|
2380 subsd(xmm1, xmm7); |
|
2381 addsd(xmm1, xmm5); |
|
2382 movdqu(xmm5, ExternalAddress(80 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL |
|
2383 pshufd(xmm3, xmm3, 68); |
|
2384 addsd(xmm6, xmm4); |
|
2385 addsd(xmm6, xmm1); |
|
2386 movdqu(xmm1, ExternalAddress(32 + coeff)); //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL |
|
2387 mulpd(xmm0, xmm3); |
|
2388 mulpd(xmm2, xmm3); |
|
2389 pshufd(xmm4, xmm3, 68); |
|
2390 mulpd(xmm3, xmm3); |
|
2391 addpd(xmm0, xmm1); |
|
2392 addpd(xmm5, xmm2); |
|
2393 mulsd(xmm4, xmm3); |
|
2394 movq(xmm2, ExternalAddress(HIGHMASK_LOG_X)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL |
|
2395 mulpd(xmm3, xmm3); |
|
2396 movq(xmm1, Address(rsp, 16)); |
|
2397 movw(ecx, Address(rsp, 22)); |
|
2398 mulpd(xmm0, xmm4); |
|
2399 pextrw(eax, xmm7, 3); |
|
2400 mulpd(xmm5, xmm4); |
|
2401 mulpd(xmm0, xmm3); |
|
2402 movq(xmm4, ExternalAddress(8 + HIGHMASK_Y)); //0x00000000UL, 0xffffffffUL |
|
2403 pand(xmm2, xmm7); |
|
2404 addsd(xmm5, xmm6); |
|
2405 subsd(xmm7, xmm2); |
|
2406 addpd(xmm5, xmm0); |
|
2407 andl(eax, 32752); |
|
2408 subl(eax, 16368); |
|
2409 andl(ecx, 32752); |
|
2410 cmpl(ecx, 32752); |
|
2411 jcc(Assembler::equal, L_2TAG_PACKET_45_0_2); |
|
2412 addl(ecx, eax); |
|
2413 cmpl(ecx, 16576); |
|
2414 jcc(Assembler::aboveEqual, L_2TAG_PACKET_51_0_2); |
|
2415 pshufd(xmm0, xmm5, 238); |
|
2416 pand(xmm4, xmm1); |
|
2417 movdqu(xmm3, xmm1); |
|
2418 addsd(xmm5, xmm0); |
|
2419 subsd(xmm1, xmm4); |
|
2420 xorpd(xmm6, xmm6); |
|
2421 movl(edx, 17080); |
|
2422 pinsrw(xmm6, edx, 3); |
|
2423 addsd(xmm7, xmm5); |
|
2424 mulsd(xmm4, xmm2); |
|
2425 mulsd(xmm1, xmm2); |
|
2426 movdqu(xmm5, xmm6); |
|
2427 mulsd(xmm3, xmm7); |
|
2428 addsd(xmm6, xmm4); |
|
2429 addsd(xmm1, xmm3); |
|
2430 movdqu(xmm7, ExternalAddress(e_coeff)); //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL |
|
2431 movdl(edx, xmm6); |
|
2432 subsd(xmm6, xmm5); |
|
2433 lea(tmp4, ExternalAddress(T_exp)); |
|
2434 movdqu(xmm3, ExternalAddress(16 + e_coeff)); //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL |
|
2435 movq(xmm2, ExternalAddress(32 + e_coeff)); //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL |
|
2436 subsd(xmm4, xmm6); |
|
2437 movl(ecx, edx); |
|
2438 andl(edx, 255); |
|
2439 addl(edx, edx); |
|
2440 movdqu(xmm5, Address(tmp4, edx, Address::times_8, 0)); |
|
2441 addsd(xmm4, xmm1); |
|
2442 pextrw(edx, xmm6, 3); |
|
2443 shrl(ecx, 8); |
|
2444 movl(eax, ecx); |
|
2445 shrl(ecx, 1); |
|
2446 subl(eax, ecx); |
|
2447 shll(ecx, 20); |
|
2448 movdl(xmm6, ecx); |
|
2449 pshufd(xmm0, xmm4, 68); |
|
2450 pshufd(xmm1, xmm4, 68); |
|
2451 mulpd(xmm0, xmm0); |
|
2452 mulpd(xmm7, xmm1); |
|
2453 pshufd(xmm6, xmm6, 17); |
|
2454 mulsd(xmm2, xmm4); |
|
2455 andl(edx, 32767); |
|
2456 cmpl(edx, 16529); |
|
2457 jcc(Assembler::above, L_2TAG_PACKET_12_0_2); |
|
2458 mulsd(xmm0, xmm0); |
|
2459 paddd(xmm5, xmm6); |
|
2460 addpd(xmm3, xmm7); |
|
2461 mulsd(xmm2, xmm5); |
|
2462 pshufd(xmm6, xmm5, 238); |
|
2463 mulpd(xmm0, xmm3); |
|
2464 addsd(xmm2, xmm6); |
|
2465 pshufd(xmm3, xmm0, 238); |
|
2466 addl(eax, 1023); |
|
2467 shll(eax, 20); |
|
2468 orl(eax, tmp1); |
|
2469 movdl(xmm4, eax); |
|
2470 mulsd(xmm0, xmm5); |
|
2471 mulsd(xmm3, xmm5); |
|
2472 addsd(xmm0, xmm2); |
|
2473 psllq(xmm4, 32); |
|
2474 addsd(xmm0, xmm3); |
|
2475 movdqu(xmm1, xmm0); |
|
2476 addsd(xmm0, xmm5); |
|
2477 mulsd(xmm0, xmm4); |
|
2478 pextrw(eax, xmm0, 3); |
|
2479 andl(eax, 32752); |
|
2480 jcc(Assembler::equal, L_2TAG_PACKET_13_0_2); |
|
2481 cmpl(eax, 32752); |
|
2482 jcc(Assembler::equal, L_2TAG_PACKET_14_0_2); |
|
2483 |
|
2484 bind(L_2TAG_PACKET_52_0_2); |
|
2485 jmp(B1_5); |
|
2486 |
|
2487 bind(L_2TAG_PACKET_45_0_2); |
|
2488 movq(xmm0, Address(rsp, 8)); |
|
2489 xorpd(xmm2, xmm2); |
|
2490 movl(eax, 49136); |
|
2491 pinsrw(xmm2, eax, 3); |
|
2492 addsd(xmm2, xmm0); |
|
2493 pextrw(eax, xmm2, 3); |
|
2494 cmpl(eax, 0); |
|
2495 jcc(Assembler::notEqual, L_2TAG_PACKET_53_0_2); |
|
2496 xorpd(xmm0, xmm0); |
|
2497 movl(eax, 32760); |
|
2498 pinsrw(xmm0, eax, 3); |
|
2499 jmp(B1_5); |
|
2500 |
|
2501 bind(L_2TAG_PACKET_53_0_2); |
|
2502 movq(xmm1, Address(rsp, 16)); |
|
2503 movdl(edx, xmm1); |
|
2504 movdqu(xmm3, xmm1); |
|
2505 psrlq(xmm3, 20); |
|
2506 movdl(ecx, xmm3); |
|
2507 orl(ecx, edx); |
|
2508 jcc(Assembler::equal, L_2TAG_PACKET_54_0_2); |
|
2509 addsd(xmm1, xmm1); |
|
2510 movdqu(xmm0, xmm1); |
|
2511 jmp(B1_5); |
|
2512 |
|
2513 bind(L_2TAG_PACKET_51_0_2); |
|
2514 pextrw(eax, xmm1, 3); |
|
2515 pextrw(ecx, xmm2, 3); |
|
2516 xorl(eax, ecx); |
|
2517 testl(eax, 32768); |
|
2518 jcc(Assembler::equal, L_2TAG_PACKET_47_0_2); |
|
2519 jmp(L_2TAG_PACKET_46_0_2); |
|
2520 |
|
2521 bind(L_2TAG_PACKET_54_0_2); |
|
2522 pextrw(eax, xmm0, 3); |
|
2523 andl(eax, 32752); |
|
2524 pextrw(edx, xmm1, 3); |
|
2525 xorpd(xmm0, xmm0); |
|
2526 subl(eax, 16368); |
|
2527 xorl(eax, edx); |
|
2528 testl(eax, 32768); |
|
2529 jcc(Assembler::equal, L_2TAG_PACKET_55_0_2); |
|
2530 jmp(B1_5); |
|
2531 |
|
2532 bind(L_2TAG_PACKET_55_0_2); |
|
2533 movl(edx, 32752); |
|
2534 pinsrw(xmm0, edx, 3); |
|
2535 jmp(B1_5); |
|
2536 |
|
2537 bind(L_2TAG_PACKET_17_0_2); |
|
2538 movq(Address(rsp, 24), xmm0); |
|
2539 |
|
2540 bind(B1_3); |
|
2541 movq(xmm0, Address(rsp, 24)); |
|
2542 |
|
2543 bind(L_2TAG_PACKET_56_0_2); |
|
2544 |
|
2545 bind(B1_5); |
|
2546 addq(rsp, 40); |
|
2547 } |
|
2548 |
|
2549 /******************************************************************************/ |
|
2550 // ALGORITHM DESCRIPTION - SIN() |
|
2551 // --------------------- |
|
2552 // |
|
2553 // 1. RANGE REDUCTION |
|
2554 // |
|
2555 // We perform an initial range reduction from X to r with |
|
2556 // |
|
2557 // X =~= N * pi/32 + r |
|
2558 // |
|
2559 // so that |r| <= pi/64 + epsilon. We restrict inputs to those |
|
2560 // where |N| <= 932560. Beyond this, the range reduction is |
|
2561 // insufficiently accurate. For extremely small inputs, |
|
2562 // denormalization can occur internally, impacting performance. |
|
2563 // This means that the main path is actually only taken for |
|
2564 // 2^-252 <= |X| < 90112. |
|
2565 // |
|
2566 // To avoid branches, we perform the range reduction to full |
|
2567 // accuracy each time. |
|
2568 // |
|
2569 // X - N * (P_1 + P_2 + P_3) |
|
2570 // |
|
2571 // where P_1 and P_2 are 32-bit numbers (so multiplication by N |
|
2572 // is exact) and P_3 is a 53-bit number. Together, these |
|
2573 // approximate pi well enough for all cases in the restricted |
|
2574 // range. |
|
2575 // |
|
2576 // The main reduction sequence is: |
|
2577 // |
|
2578 // y = 32/pi * x |
|
2579 // N = integer(y) |
|
2580 // (computed by adding and subtracting off SHIFTER) |
|
2581 // |
|
2582 // m_1 = N * P_1 |
|
2583 // m_2 = N * P_2 |
|
2584 // r_1 = x - m_1 |
|
2585 // r = r_1 - m_2 |
|
2586 // (this r can be used for most of the calculation) |
|
2587 // |
|
2588 // c_1 = r_1 - r |
|
2589 // m_3 = N * P_3 |
|
2590 // c_2 = c_1 - m_2 |
|
2591 // c = c_2 - m_3 |
|
2592 // |
|
2593 // 2. MAIN ALGORITHM |
|
2594 // |
|
2595 // The algorithm uses a table lookup based on B = M * pi / 32 |
|
2596 // where M = N mod 64. The stored values are: |
|
2597 // sigma closest power of 2 to cos(B) |
|
2598 // C_hl 53-bit cos(B) - sigma |
|
2599 // S_hi + S_lo 2 * 53-bit sin(B) |
|
2600 // |
|
2601 // The computation is organized as follows: |
|
2602 // |
|
2603 // sin(B + r + c) = [sin(B) + sigma * r] + |
|
2604 // r * (cos(B) - sigma) + |
|
2605 // sin(B) * [cos(r + c) - 1] + |
|
2606 // cos(B) * [sin(r + c) - r] |
|
2607 // |
|
2608 // which is approximately: |
|
2609 // |
|
2610 // [S_hi + sigma * r] + |
|
2611 // C_hl * r + |
|
2612 // S_lo + S_hi * [(cos(r) - 1) - r * c] + |
|
2613 // (C_hl + sigma) * [(sin(r) - r) + c] |
|
2614 // |
|
2615 // and this is what is actually computed. We separate this sum |
|
2616 // into four parts: |
|
2617 // |
|
2618 // hi + med + pols + corr |
|
2619 // |
|
2620 // where |
|
2621 // |
|
2622 // hi = S_hi + sigma r |
|
2623 // med = C_hl * r |
|
2624 // pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) |
|
2625 // corr = S_lo + c * ((C_hl + sigma) - S_hi * r) |
|
2626 // |
|
2627 // 3. POLYNOMIAL |
|
2628 // |
|
2629 // The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * |
|
2630 // (sin(r) - r) can be rearranged freely, since it is quite |
|
2631 // small, so we exploit parallelism to the fullest. |
|
2632 // |
|
2633 // psc4 = SC_4 * r_1 |
|
2634 // msc4 = psc4 * r |
|
2635 // r2 = r * r |
|
2636 // msc2 = SC_2 * r2 |
|
2637 // r4 = r2 * r2 |
|
2638 // psc3 = SC_3 + msc4 |
|
2639 // psc1 = SC_1 + msc2 |
|
2640 // msc3 = r4 * psc3 |
|
2641 // sincospols = psc1 + msc3 |
|
2642 // pols = sincospols * |
|
2643 // <S_hi * r^2 | (C_hl + sigma) * r^3> |
|
2644 // |
|
2645 // 4. CORRECTION TERM |
|
2646 // |
|
2647 // This is where the "c" component of the range reduction is |
|
2648 // taken into account; recall that just "r" is used for most of |
|
2649 // the calculation. |
|
2650 // |
|
2651 // -c = m_3 - c_2 |
|
2652 // -d = S_hi * r - (C_hl + sigma) |
|
2653 // corr = -c * -d + S_lo |
|
2654 // |
|
2655 // 5. COMPENSATED SUMMATIONS |
|
2656 // |
|
2657 // The two successive compensated summations add up the high |
|
2658 // and medium parts, leaving just the low parts to add up at |
|
2659 // the end. |
|
2660 // |
|
2661 // rs = sigma * r |
|
2662 // res_int = S_hi + rs |
|
2663 // k_0 = S_hi - res_int |
|
2664 // k_2 = k_0 + rs |
|
2665 // med = C_hl * r |
|
2666 // res_hi = res_int + med |
|
2667 // k_1 = res_int - res_hi |
|
2668 // k_3 = k_1 + med |
|
2669 // |
|
2670 // 6. FINAL SUMMATION |
|
2671 // |
|
2672 // We now add up all the small parts: |
|
2673 // |
|
2674 // res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 |
|
2675 // |
|
2676 // Now the overall result is just: |
|
2677 // |
|
2678 // res_hi + res_lo |
|
2679 // |
|
2680 // 7. SMALL ARGUMENTS |
|
2681 // |
|
2682 // If |x| < SNN (SNN meaning the smallest normal number), we |
|
2683 // simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we |
|
2684 // do 2^-55 * (2^55 * x - x). |
|
2685 // |
|
2686 // Special cases: |
|
2687 // sin(NaN) = quiet NaN, and raise invalid exception |
|
2688 // sin(INF) = NaN and raise invalid exception |
|
2689 // sin(+/-0) = +/-0 |
|
2690 // |
|
2691 /******************************************************************************/ |
|
2692 |
|
2693 ALIGNED_(16) juint _ONEHALF[] = |
|
2694 { |
|
2695 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
|
2696 }; |
|
2697 |
|
2698 ALIGNED_(16) juint _P_2[] = |
|
2699 { |
|
2700 0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL |
|
2701 }; |
|
2702 |
|
2703 ALIGNED_(16) juint _SC_4[] = |
|
2704 { |
|
2705 0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL |
|
2706 }; |
|
2707 |
|
2708 ALIGNED_(16) juint _Ctable[] = |
|
2709 { |
|
2710 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
2711 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, |
|
2712 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, |
|
2713 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, |
|
2714 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, |
|
2715 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, |
|
2716 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, |
|
2717 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, |
|
2718 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, |
|
2719 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, |
|
2720 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, |
|
2721 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, |
|
2722 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, |
|
2723 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, |
|
2724 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, |
|
2725 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, |
|
2726 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, |
|
2727 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, |
|
2728 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, |
|
2729 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, |
|
2730 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, |
|
2731 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, |
|
2732 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, |
|
2733 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, |
|
2734 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, |
|
2735 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, |
|
2736 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
2737 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, |
|
2738 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, |
|
2739 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, |
|
2740 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, |
|
2741 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, |
|
2742 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, |
|
2743 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, |
|
2744 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, |
|
2745 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, |
|
2746 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, |
|
2747 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, |
|
2748 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, |
|
2749 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, |
|
2750 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, |
|
2751 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, |
|
2752 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, |
|
2753 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, |
|
2754 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, |
|
2755 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, |
|
2756 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, |
|
2757 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, |
|
2758 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, |
|
2759 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, |
|
2760 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, |
|
2761 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
2762 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, |
|
2763 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, |
|
2764 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, |
|
2765 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, |
|
2766 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, |
|
2767 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, |
|
2768 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, |
|
2769 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, |
|
2770 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, |
|
2771 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, |
|
2772 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, |
|
2773 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, |
|
2774 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, |
|
2775 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, |
|
2776 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, |
|
2777 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, |
|
2778 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, |
|
2779 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, |
|
2780 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, |
|
2781 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, |
|
2782 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, |
|
2783 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, |
|
2784 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, |
|
2785 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, |
|
2786 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, |
|
2787 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, |
|
2788 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, |
|
2789 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, |
|
2790 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, |
|
2791 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, |
|
2792 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, |
|
2793 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, |
|
2794 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, |
|
2795 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, |
|
2796 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, |
|
2797 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, |
|
2798 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, |
|
2799 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, |
|
2800 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, |
|
2801 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, |
|
2802 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, |
|
2803 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, |
|
2804 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, |
|
2805 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, |
|
2806 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, |
|
2807 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, |
|
2808 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, |
|
2809 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, |
|
2810 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, |
|
2811 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, |
|
2812 0x00000000UL, 0x3ff00000UL |
|
2813 }; |
|
2814 |
|
2815 ALIGNED_(16) juint _SC_2[] = |
|
2816 { |
|
2817 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL |
|
2818 }; |
|
2819 |
|
2820 ALIGNED_(16) juint _SC_3[] = |
|
2821 { |
|
2822 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL |
|
2823 }; |
|
2824 |
|
2825 ALIGNED_(16) juint _SC_1[] = |
|
2826 { |
|
2827 0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL |
|
2828 }; |
|
2829 |
|
2830 ALIGNED_(16) juint _PI_INV_TABLE[] = |
|
2831 { |
|
2832 0x00000000UL, 0x00000000UL, 0xa2f9836eUL, 0x4e441529UL, 0xfc2757d1UL, |
|
2833 0xf534ddc0UL, 0xdb629599UL, 0x3c439041UL, 0xfe5163abUL, 0xdebbc561UL, |
|
2834 0xb7246e3aUL, 0x424dd2e0UL, 0x06492eeaUL, 0x09d1921cUL, 0xfe1deb1cUL, |
|
2835 0xb129a73eUL, 0xe88235f5UL, 0x2ebb4484UL, 0xe99c7026UL, 0xb45f7e41UL, |
|
2836 0x3991d639UL, 0x835339f4UL, 0x9c845f8bUL, 0xbdf9283bUL, 0x1ff897ffUL, |
|
2837 0xde05980fUL, 0xef2f118bUL, 0x5a0a6d1fUL, 0x6d367ecfUL, 0x27cb09b7UL, |
|
2838 0x4f463f66UL, 0x9e5fea2dUL, 0x7527bac7UL, 0xebe5f17bUL, 0x3d0739f7UL, |
|
2839 0x8a5292eaUL, 0x6bfb5fb1UL, 0x1f8d5d08UL, 0x56033046UL, 0xfc7b6babUL, |
|
2840 0xf0cfbc21UL |
|
2841 }; |
|
2842 |
|
2843 ALIGNED_(8) juint _PI_4[] = |
|
2844 { |
|
2845 0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL |
|
2846 }; |
|
2847 |
|
2848 ALIGNED_(8) juint _PI32INV[] = |
|
2849 { |
|
2850 0x6dc9c883UL, 0x40245f30UL |
|
2851 }; |
|
2852 |
|
2853 ALIGNED_(8) juint _SHIFTER[] = |
|
2854 { |
|
2855 0x00000000UL, 0x43380000UL |
|
2856 }; |
|
2857 |
|
2858 ALIGNED_(8) juint _SIGN_MASK[] = |
|
2859 { |
|
2860 0x00000000UL, 0x80000000UL |
|
2861 }; |
|
2862 |
|
2863 ALIGNED_(8) juint _P_3[] = |
|
2864 { |
|
2865 0x2e037073UL, 0x3b63198aUL |
|
2866 }; |
|
2867 |
|
2868 ALIGNED_(8) juint _ALL_ONES[] = |
|
2869 { |
|
2870 0xffffffffUL, 0x3fefffffUL |
|
2871 }; |
|
2872 |
|
2873 ALIGNED_(8) juint _TWO_POW_55[] = |
|
2874 { |
|
2875 0x00000000UL, 0x43600000UL |
|
2876 }; |
|
2877 |
|
2878 ALIGNED_(8) juint _TWO_POW_M55[] = |
|
2879 { |
|
2880 0x00000000UL, 0x3c800000ULL |
|
2881 }; |
|
2882 |
|
2883 ALIGNED_(8) juint _P_1[] = |
|
2884 { |
|
2885 0x54400000UL, 0x3fb921fbUL |
|
2886 }; |
|
2887 |
|
2888 ALIGNED_(8) juint _NEG_ZERO[] = |
|
2889 { |
|
2890 0x00000000UL, 0x80000000UL |
|
2891 }; |
|
2892 |
|
2893 void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ebx, Register ecx, Register edx, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { |
|
2894 Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; |
|
2895 Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; |
|
2896 Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; |
|
2897 Label L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1; |
|
2898 Label L_2TAG_PACKET_12_0_1, B1_1, B1_2, B1_4, start; |
|
2899 |
|
2900 assert_different_registers(tmp1, tmp2, tmp3, tmp4, eax, ebx, ecx, edx); |
|
2901 address ONEHALF = (address)_ONEHALF; |
|
2902 address P_2 = (address)_P_2; |
|
2903 address SC_4 = (address)_SC_4; |
|
2904 address Ctable = (address)_Ctable; |
|
2905 address SC_2 = (address)_SC_2; |
|
2906 address SC_3 = (address)_SC_3; |
|
2907 address SC_1 = (address)_SC_1; |
|
2908 address PI_INV_TABLE = (address)_PI_INV_TABLE; |
|
2909 address PI_4 = (address)_PI_4; |
|
2910 address PI32INV = (address)_PI32INV; |
|
2911 address SHIFTER = (address)_SHIFTER; |
|
2912 address SIGN_MASK = (address)_SIGN_MASK; |
|
2913 address P_3 = (address)_P_3; |
|
2914 address ALL_ONES = (address)_ALL_ONES; |
|
2915 address TWO_POW_55 = (address)_TWO_POW_55; |
|
2916 address TWO_POW_M55 = (address)_TWO_POW_M55; |
|
2917 address P_1 = (address)_P_1; |
|
2918 address NEG_ZERO = (address)_NEG_ZERO; |
|
2919 |
|
2920 bind(start); |
|
2921 push(rbx); |
|
2922 subq(rsp, 16); |
|
2923 movsd(Address(rsp, 8), xmm0); |
|
2924 movl(eax, Address(rsp, 12)); |
|
2925 movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL |
|
2926 movq(xmm2, ExternalAddress(SHIFTER)); //0x00000000UL, 0x43380000UL |
|
2927 andl(eax, 2147418112); |
|
2928 subl(eax, 808452096); |
|
2929 cmpl(eax, 281346048); |
|
2930 jcc(Assembler::above, L_2TAG_PACKET_0_0_1); |
|
2931 mulsd(xmm1, xmm0); |
|
2932 movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
|
2933 movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL |
|
2934 pand(xmm4, xmm0); |
|
2935 por(xmm5, xmm4); |
|
2936 addpd(xmm1, xmm5); |
|
2937 cvttsd2sil(edx, xmm1); |
|
2938 cvtsi2sdl(xmm1, edx); |
|
2939 movdqu(xmm6, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL |
|
2940 mov64(r8, 0x3fb921fb54400000); |
|
2941 movdq(xmm3, r8); |
|
2942 movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL |
|
2943 pshufd(xmm4, xmm0, 68); |
|
2944 mulsd(xmm3, xmm1); |
|
2945 movddup(xmm1, xmm1); |
|
2946 andl(edx, 63); |
|
2947 shll(edx, 5); |
|
2948 lea(rax, ExternalAddress(Ctable)); |
|
2949 addq(rax, rdx); |
|
2950 mulpd(xmm6, xmm1); |
|
2951 mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL |
|
2952 subsd(xmm4, xmm3); |
|
2953 movq(xmm7, Address(rax, 8)); |
|
2954 subsd(xmm0, xmm3); |
|
2955 movddup(xmm3, xmm4); |
|
2956 subsd(xmm4, xmm6); |
|
2957 pshufd(xmm0, xmm0, 68); |
|
2958 movdqu(xmm2, Address(rax, 0)); |
|
2959 mulpd(xmm5, xmm0); |
|
2960 subpd(xmm0, xmm6); |
|
2961 mulsd(xmm7, xmm4); |
|
2962 subsd(xmm3, xmm4); |
|
2963 mulpd(xmm5, xmm0); |
|
2964 mulpd(xmm0, xmm0); |
|
2965 subsd(xmm3, xmm6); |
|
2966 movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL |
|
2967 subsd(xmm1, xmm3); |
|
2968 movq(xmm3, Address(rax, 24)); |
|
2969 addsd(xmm2, xmm3); |
|
2970 subsd(xmm7, xmm2); |
|
2971 mulsd(xmm2, xmm4); |
|
2972 mulpd(xmm6, xmm0); |
|
2973 mulsd(xmm3, xmm4); |
|
2974 mulpd(xmm2, xmm0); |
|
2975 mulpd(xmm0, xmm0); |
|
2976 addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL |
|
2977 mulsd(xmm4, Address(rax, 0)); |
|
2978 addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL |
|
2979 mulpd(xmm5, xmm0); |
|
2980 movdqu(xmm0, xmm3); |
|
2981 addsd(xmm3, Address(rax, 8)); |
|
2982 mulpd(xmm1, xmm7); |
|
2983 movdqu(xmm7, xmm4); |
|
2984 addsd(xmm4, xmm3); |
|
2985 addpd(xmm6, xmm5); |
|
2986 movq(xmm5, Address(rax, 8)); |
|
2987 subsd(xmm5, xmm3); |
|
2988 subsd(xmm3, xmm4); |
|
2989 addsd(xmm1, Address(rax, 16)); |
|
2990 mulpd(xmm6, xmm2); |
|
2991 addsd(xmm5, xmm0); |
|
2992 addsd(xmm3, xmm7); |
|
2993 addsd(xmm1, xmm5); |
|
2994 addsd(xmm1, xmm3); |
|
2995 addsd(xmm1, xmm6); |
|
2996 unpckhpd(xmm6, xmm6); |
|
2997 movdqu(xmm0, xmm4); |
|
2998 addsd(xmm1, xmm6); |
|
2999 addsd(xmm0, xmm1); |
|
3000 jmp(B1_4); |
|
3001 |
|
3002 bind(L_2TAG_PACKET_0_0_1); |
|
3003 jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); |
|
3004 shrl(eax, 20); |
|
3005 cmpl(eax, 3325); |
|
3006 jcc(Assembler::notEqual, L_2TAG_PACKET_2_0_1); |
|
3007 mulsd(xmm0, ExternalAddress(ALL_ONES)); //0xffffffffUL, 0x3fefffffUL |
|
3008 jmp(B1_4); |
|
3009 |
|
3010 bind(L_2TAG_PACKET_2_0_1); |
|
3011 movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL |
|
3012 mulsd(xmm3, xmm0); |
|
3013 subsd(xmm3, xmm0); |
|
3014 mulsd(xmm3, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL |
|
3015 jmp(B1_4); |
|
3016 |
|
3017 bind(L_2TAG_PACKET_1_0_1); |
|
3018 pextrw(eax, xmm0, 3); |
|
3019 andl(eax, 32752); |
|
3020 cmpl(eax, 32752); |
|
3021 jcc(Assembler::equal, L_2TAG_PACKET_3_0_1); |
|
3022 pextrw(ecx, xmm0, 3); |
|
3023 andl(ecx, 32752); |
|
3024 subl(ecx, 16224); |
|
3025 shrl(ecx, 7); |
|
3026 andl(ecx, 65532); |
|
3027 lea(r11, ExternalAddress(PI_INV_TABLE)); |
|
3028 addq(rcx, r11); |
|
3029 movdq(rax, xmm0); |
|
3030 movl(r10, Address(rcx, 20)); |
|
3031 movl(r8, Address(rcx, 24)); |
|
3032 movl(edx, eax); |
|
3033 shrq(rax, 21); |
|
3034 orl(eax, INT_MIN); |
|
3035 shrl(eax, 11); |
|
3036 movl(r9, r10); |
|
3037 imulq(r10, rdx); |
|
3038 imulq(r9, rax); |
|
3039 imulq(r8, rax); |
|
3040 movl(rsi, Address(rcx, 16)); |
|
3041 movl(rdi, Address(rcx, 12)); |
|
3042 movl(r11, r10); |
|
3043 shrq(r10, 32); |
|
3044 addq(r9, r10); |
|
3045 addq(r11, r8); |
|
3046 movl(r8, r11); |
|
3047 shrq(r11, 32); |
|
3048 addq(r9, r11); |
|
3049 movl(r10, rsi); |
|
3050 imulq(rsi, rdx); |
|
3051 imulq(r10, rax); |
|
3052 movl(r11, rdi); |
|
3053 imulq(rdi, rdx); |
|
3054 movl(ebx, rsi); |
|
3055 shrq(rsi, 32); |
|
3056 addq(r9, rbx); |
|
3057 movl(ebx, r9); |
|
3058 shrq(r9, 32); |
|
3059 addq(r10, rsi); |
|
3060 addq(r10, r9); |
|
3061 shlq(rbx, 32); |
|
3062 orq(r8, rbx); |
|
3063 imulq(r11, rax); |
|
3064 movl(r9, Address(rcx, 8)); |
|
3065 movl(rsi, Address(rcx, 4)); |
|
3066 movl(ebx, rdi); |
|
3067 shrq(rdi, 32); |
|
3068 addq(r10, rbx); |
|
3069 movl(ebx, r10); |
|
3070 shrq(r10, 32); |
|
3071 addq(r11, rdi); |
|
3072 addq(r11, r10); |
|
3073 movq(rdi, r9); |
|
3074 imulq(r9, rdx); |
|
3075 imulq(rdi, rax); |
|
3076 movl(r10, r9); |
|
3077 shrq(r9, 32); |
|
3078 addq(r11, r10); |
|
3079 movl(r10, r11); |
|
3080 shrq(r11, 32); |
|
3081 addq(rdi, r9); |
|
3082 addq(rdi, r11); |
|
3083 movq(r9, rsi); |
|
3084 imulq(rsi, rdx); |
|
3085 imulq(r9, rax); |
|
3086 shlq(r10, 32); |
|
3087 orq(r10, rbx); |
|
3088 movl(eax, Address(rcx, 0)); |
|
3089 movl(r11, rsi); |
|
3090 shrq(rsi, 32); |
|
3091 addq(rdi, r11); |
|
3092 movl(r11, rdi); |
|
3093 shrq(rdi, 32); |
|
3094 addq(r9, rsi); |
|
3095 addq(r9, rdi); |
|
3096 imulq(rdx, rax); |
|
3097 pextrw(ebx, xmm0, 3); |
|
3098 lea(rdi, ExternalAddress(PI_INV_TABLE)); |
|
3099 subq(rcx, rdi); |
|
3100 addl(ecx, ecx); |
|
3101 addl(ecx, ecx); |
|
3102 addl(ecx, ecx); |
|
3103 addl(ecx, 19); |
|
3104 movl(rsi, 32768); |
|
3105 andl(rsi, ebx); |
|
3106 shrl(ebx, 4); |
|
3107 andl(ebx, 2047); |
|
3108 subl(ebx, 1023); |
|
3109 subl(ecx, ebx); |
|
3110 addq(r9, rdx); |
|
3111 movl(edx, ecx); |
|
3112 addl(edx, 32); |
|
3113 cmpl(ecx, 1); |
|
3114 jcc(Assembler::less, L_2TAG_PACKET_4_0_1); |
|
3115 negl(ecx); |
|
3116 addl(ecx, 29); |
|
3117 shll(r9); |
|
3118 movl(rdi, r9); |
|
3119 andl(r9, 536870911); |
|
3120 testl(r9, 268435456); |
|
3121 jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1); |
|
3122 shrl(r9); |
|
3123 movl(ebx, 0); |
|
3124 shlq(r9, 32); |
|
3125 orq(r9, r11); |
|
3126 |
|
3127 bind(L_2TAG_PACKET_6_0_1); |
|
3128 |
|
3129 bind(L_2TAG_PACKET_7_0_1); |
|
3130 |
|
3131 cmpq(r9, 0); |
|
3132 jcc(Assembler::equal, L_2TAG_PACKET_8_0_1); |
|
3133 |
|
3134 bind(L_2TAG_PACKET_9_0_1); |
|
3135 bsrq(r11, r9); |
|
3136 movl(ecx, 29); |
|
3137 subl(ecx, r11); |
|
3138 jcc(Assembler::lessEqual, L_2TAG_PACKET_10_0_1); |
|
3139 shlq(r9); |
|
3140 movq(rax, r10); |
|
3141 shlq(r10); |
|
3142 addl(edx, ecx); |
|
3143 negl(ecx); |
|
3144 addl(ecx, 64); |
|
3145 shrq(rax); |
|
3146 shrq(r8); |
|
3147 orq(r9, rax); |
|
3148 orq(r10, r8); |
|
3149 |
|
3150 bind(L_2TAG_PACKET_11_0_1); |
|
3151 cvtsi2sdq(xmm0, r9); |
|
3152 shrq(r10, 1); |
|
3153 cvtsi2sdq(xmm3, r10); |
|
3154 xorpd(xmm4, xmm4); |
|
3155 shll(edx, 4); |
|
3156 negl(edx); |
|
3157 addl(edx, 16368); |
|
3158 orl(edx, rsi); |
|
3159 xorl(edx, ebx); |
|
3160 pinsrw(xmm4, edx, 3); |
|
3161 movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL |
|
3162 movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL |
|
3163 xorpd(xmm5, xmm5); |
|
3164 subl(edx, 1008); |
|
3165 pinsrw(xmm5, edx, 3); |
|
3166 mulsd(xmm0, xmm4); |
|
3167 shll(rsi, 16); |
|
3168 sarl(rsi, 31); |
|
3169 mulsd(xmm3, xmm5); |
|
3170 movdqu(xmm1, xmm0); |
|
3171 mulsd(xmm0, xmm2); |
|
3172 shrl(rdi, 29); |
|
3173 addsd(xmm1, xmm3); |
|
3174 mulsd(xmm3, xmm2); |
|
3175 addl(rdi, rsi); |
|
3176 xorl(rdi, rsi); |
|
3177 mulsd(xmm6, xmm1); |
|
3178 movl(eax, rdi); |
|
3179 addsd(xmm6, xmm3); |
|
3180 movdqu(xmm2, xmm0); |
|
3181 addsd(xmm0, xmm6); |
|
3182 subsd(xmm2, xmm0); |
|
3183 addsd(xmm6, xmm2); |
|
3184 |
|
3185 bind(L_2TAG_PACKET_12_0_1); |
|
3186 movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL |
|
3187 mulsd(xmm1, xmm0); |
|
3188 movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
|
3189 movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL |
|
3190 pand(xmm4, xmm0); |
|
3191 por(xmm5, xmm4); |
|
3192 addpd(xmm1, xmm5); |
|
3193 cvttsd2sil(edx, xmm1); |
|
3194 cvtsi2sdl(xmm1, edx); |
|
3195 movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL |
|
3196 movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL |
|
3197 mulsd(xmm3, xmm1); |
|
3198 unpcklpd(xmm1, xmm1); |
|
3199 shll(eax, 3); |
|
3200 addl(edx, 1865216); |
|
3201 movdqu(xmm4, xmm0); |
|
3202 addl(edx, eax); |
|
3203 andl(edx, 63); |
|
3204 movdqu(xmm5, ExternalAddress(SC_4)); //0x54400000UL, 0x3fb921fbUL |
|
3205 lea(rax, ExternalAddress(Ctable)); |
|
3206 shll(edx, 5); |
|
3207 addq(rax, rdx); |
|
3208 mulpd(xmm2, xmm1); |
|
3209 subsd(xmm0, xmm3); |
|
3210 mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL |
|
3211 subsd(xmm4, xmm3); |
|
3212 movq(xmm7, Address(rax, 8)); |
|
3213 unpcklpd(xmm0, xmm0); |
|
3214 movdqu(xmm3, xmm4); |
|
3215 subsd(xmm4, xmm2); |
|
3216 mulpd(xmm5, xmm0); |
|
3217 subpd(xmm0, xmm2); |
|
3218 mulsd(xmm7, xmm4); |
|
3219 subsd(xmm3, xmm4); |
|
3220 mulpd(xmm5, xmm0); |
|
3221 mulpd(xmm0, xmm0); |
|
3222 subsd(xmm3, xmm2); |
|
3223 movdqu(xmm2, Address(rax, 0)); |
|
3224 subsd(xmm1, xmm3); |
|
3225 movq(xmm3, Address(rax, 24)); |
|
3226 addsd(xmm2, xmm3); |
|
3227 subsd(xmm7, xmm2); |
|
3228 subsd(xmm1, xmm6); |
|
3229 movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL |
|
3230 mulsd(xmm2, xmm4); |
|
3231 mulpd(xmm6, xmm0); |
|
3232 mulsd(xmm3, xmm4); |
|
3233 mulpd(xmm2, xmm0); |
|
3234 mulpd(xmm0, xmm0); |
|
3235 addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL |
|
3236 mulsd(xmm4, Address(rax, 0)); |
|
3237 addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL |
|
3238 mulpd(xmm5, xmm0); |
|
3239 movdqu(xmm0, xmm3); |
|
3240 addsd(xmm3, Address(rax, 8)); |
|
3241 mulpd(xmm1, xmm7); |
|
3242 movdqu(xmm7, xmm4); |
|
3243 addsd(xmm4, xmm3); |
|
3244 addpd(xmm6, xmm5); |
|
3245 movq(xmm5, Address(rax, 8)); |
|
3246 subsd(xmm5, xmm3); |
|
3247 subsd(xmm3, xmm4); |
|
3248 addsd(xmm1, Address(rax, 16)); |
|
3249 mulpd(xmm6, xmm2); |
|
3250 addsd(xmm5, xmm0); |
|
3251 addsd(xmm3, xmm7); |
|
3252 addsd(xmm1, xmm5); |
|
3253 addsd(xmm1, xmm3); |
|
3254 addsd(xmm1, xmm6); |
|
3255 unpckhpd(xmm6, xmm6); |
|
3256 movdqu(xmm0, xmm4); |
|
3257 addsd(xmm1, xmm6); |
|
3258 addsd(xmm0, xmm1); |
|
3259 jmp(B1_4); |
|
3260 |
|
3261 bind(L_2TAG_PACKET_8_0_1); |
|
3262 addl(edx, 64); |
|
3263 movq(r9, r10); |
|
3264 movq(r10, r8); |
|
3265 movl(r8, 0); |
|
3266 cmpq(r9, 0); |
|
3267 jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1); |
|
3268 addl(edx, 64); |
|
3269 movq(r9, r10); |
|
3270 movq(r10, r8); |
|
3271 cmpq(r9, 0); |
|
3272 jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1); |
|
3273 xorpd(xmm0, xmm0); |
|
3274 xorpd(xmm6, xmm6); |
|
3275 jmp(L_2TAG_PACKET_12_0_1); |
|
3276 |
|
3277 bind(L_2TAG_PACKET_10_0_1); |
|
3278 jcc(Assembler::equal, L_2TAG_PACKET_11_0_1); |
|
3279 negl(ecx); |
|
3280 shrq(r10); |
|
3281 movq(rax, r9); |
|
3282 shrq(r9); |
|
3283 subl(edx, ecx); |
|
3284 negl(ecx); |
|
3285 addl(ecx, 64); |
|
3286 shlq(rax); |
|
3287 orq(r10, rax); |
|
3288 jmp(L_2TAG_PACKET_11_0_1); |
|
3289 |
|
3290 bind(L_2TAG_PACKET_4_0_1); |
|
3291 negl(ecx); |
|
3292 shlq(r9, 32); |
|
3293 orq(r9, r11); |
|
3294 shlq(r9); |
|
3295 movq(rdi, r9); |
|
3296 testl(r9, INT_MIN); |
|
3297 jcc(Assembler::notEqual, L_2TAG_PACKET_13_0_1); |
|
3298 shrl(r9); |
|
3299 movl(ebx, 0); |
|
3300 shrq(rdi, 3); |
|
3301 jmp(L_2TAG_PACKET_7_0_1); |
|
3302 |
|
3303 bind(L_2TAG_PACKET_5_0_1); |
|
3304 shrl(r9); |
|
3305 movl(ebx, 536870912); |
|
3306 shrl(ebx); |
|
3307 shlq(r9, 32); |
|
3308 orq(r9, r11); |
|
3309 shlq(rbx, 32); |
|
3310 addl(rdi, 536870912); |
|
3311 movl(rcx, 0); |
|
3312 movl(r11, 0); |
|
3313 subq(rcx, r8); |
|
3314 sbbq(r11, r10); |
|
3315 sbbq(rbx, r9); |
|
3316 movq(r8, rcx); |
|
3317 movq(r10, r11); |
|
3318 movq(r9, rbx); |
|
3319 movl(ebx, 32768); |
|
3320 jmp(L_2TAG_PACKET_6_0_1); |
|
3321 |
|
3322 bind(L_2TAG_PACKET_13_0_1); |
|
3323 shrl(r9); |
|
3324 mov64(rbx, 0x100000000); |
|
3325 shrq(rbx); |
|
3326 movl(rcx, 0); |
|
3327 movl(r11, 0); |
|
3328 subq(rcx, r8); |
|
3329 sbbq(r11, r10); |
|
3330 sbbq(rbx, r9); |
|
3331 movq(r8, rcx); |
|
3332 movq(r10, r11); |
|
3333 movq(r9, rbx); |
|
3334 movl(ebx, 32768); |
|
3335 shrq(rdi, 3); |
|
3336 addl(rdi, 536870912); |
|
3337 jmp(L_2TAG_PACKET_7_0_1); |
|
3338 |
|
3339 bind(L_2TAG_PACKET_3_0_1); |
|
3340 movq(xmm0, Address(rsp, 8)); |
|
3341 mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL |
|
3342 movq(Address(rsp, 0), xmm0); |
|
3343 |
|
3344 bind(L_2TAG_PACKET_14_0_1); |
|
3345 |
|
3346 bind(B1_4); |
|
3347 addq(rsp, 16); |
|
3348 pop(rbx); |
|
3349 } |
|
3350 |
|
3351 /******************************************************************************/ |
|
3352 // ALGORITHM DESCRIPTION - COS() |
|
3353 // --------------------- |
|
3354 // |
|
3355 // 1. RANGE REDUCTION |
|
3356 // |
|
3357 // We perform an initial range reduction from X to r with |
|
3358 // |
|
3359 // X =~= N * pi/32 + r |
|
3360 // |
|
3361 // so that |r| <= pi/64 + epsilon. We restrict inputs to those |
|
3362 // where |N| <= 932560. Beyond this, the range reduction is |
|
3363 // insufficiently accurate. For extremely small inputs, |
|
3364 // denormalization can occur internally, impacting performance. |
|
3365 // This means that the main path is actually only taken for |
|
3366 // 2^-252 <= |X| < 90112. |
|
3367 // |
|
3368 // To avoid branches, we perform the range reduction to full |
|
3369 // accuracy each time. |
|
3370 // |
|
3371 // X - N * (P_1 + P_2 + P_3) |
|
3372 // |
|
3373 // where P_1 and P_2 are 32-bit numbers (so multiplication by N |
|
3374 // is exact) and P_3 is a 53-bit number. Together, these |
|
3375 // approximate pi well enough for all cases in the restricted |
|
3376 // range. |
|
3377 // |
|
3378 // The main reduction sequence is: |
|
3379 // |
|
3380 // y = 32/pi * x |
|
3381 // N = integer(y) |
|
3382 // (computed by adding and subtracting off SHIFTER) |
|
3383 // |
|
3384 // m_1 = N * P_1 |
|
3385 // m_2 = N * P_2 |
|
3386 // r_1 = x - m_1 |
|
3387 // r = r_1 - m_2 |
|
3388 // (this r can be used for most of the calculation) |
|
3389 // |
|
3390 // c_1 = r_1 - r |
|
3391 // m_3 = N * P_3 |
|
3392 // c_2 = c_1 - m_2 |
|
3393 // c = c_2 - m_3 |
|
3394 // |
|
3395 // 2. MAIN ALGORITHM |
|
3396 // |
|
3397 // The algorithm uses a table lookup based on B = M * pi / 32 |
|
3398 // where M = N mod 64. The stored values are: |
|
3399 // sigma closest power of 2 to cos(B) |
|
3400 // C_hl 53-bit cos(B) - sigma |
|
3401 // S_hi + S_lo 2 * 53-bit sin(B) |
|
3402 // |
|
3403 // The computation is organized as follows: |
|
3404 // |
|
3405 // sin(B + r + c) = [sin(B) + sigma * r] + |
|
3406 // r * (cos(B) - sigma) + |
|
3407 // sin(B) * [cos(r + c) - 1] + |
|
3408 // cos(B) * [sin(r + c) - r] |
|
3409 // |
|
3410 // which is approximately: |
|
3411 // |
|
3412 // [S_hi + sigma * r] + |
|
3413 // C_hl * r + |
|
3414 // S_lo + S_hi * [(cos(r) - 1) - r * c] + |
|
3415 // (C_hl + sigma) * [(sin(r) - r) + c] |
|
3416 // |
|
3417 // and this is what is actually computed. We separate this sum |
|
3418 // into four parts: |
|
3419 // |
|
3420 // hi + med + pols + corr |
|
3421 // |
|
3422 // where |
|
3423 // |
|
3424 // hi = S_hi + sigma r |
|
3425 // med = C_hl * r |
|
3426 // pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) |
|
3427 // corr = S_lo + c * ((C_hl + sigma) - S_hi * r) |
|
3428 // |
|
3429 // 3. POLYNOMIAL |
|
3430 // |
|
3431 // The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * |
|
3432 // (sin(r) - r) can be rearranged freely, since it is quite |
|
3433 // small, so we exploit parallelism to the fullest. |
|
3434 // |
|
3435 // psc4 = SC_4 * r_1 |
|
3436 // msc4 = psc4 * r |
|
3437 // r2 = r * r |
|
3438 // msc2 = SC_2 * r2 |
|
3439 // r4 = r2 * r2 |
|
3440 // psc3 = SC_3 + msc4 |
|
3441 // psc1 = SC_1 + msc2 |
|
3442 // msc3 = r4 * psc3 |
|
3443 // sincospols = psc1 + msc3 |
|
3444 // pols = sincospols * |
|
3445 // <S_hi * r^2 | (C_hl + sigma) * r^3> |
|
3446 // |
|
3447 // 4. CORRECTION TERM |
|
3448 // |
|
3449 // This is where the "c" component of the range reduction is |
|
3450 // taken into account; recall that just "r" is used for most of |
|
3451 // the calculation. |
|
3452 // |
|
3453 // -c = m_3 - c_2 |
|
3454 // -d = S_hi * r - (C_hl + sigma) |
|
3455 // corr = -c * -d + S_lo |
|
3456 // |
|
3457 // 5. COMPENSATED SUMMATIONS |
|
3458 // |
|
3459 // The two successive compensated summations add up the high |
|
3460 // and medium parts, leaving just the low parts to add up at |
|
3461 // the end. |
|
3462 // |
|
3463 // rs = sigma * r |
|
3464 // res_int = S_hi + rs |
|
3465 // k_0 = S_hi - res_int |
|
3466 // k_2 = k_0 + rs |
|
3467 // med = C_hl * r |
|
3468 // res_hi = res_int + med |
|
3469 // k_1 = res_int - res_hi |
|
3470 // k_3 = k_1 + med |
|
3471 // |
|
3472 // 6. FINAL SUMMATION |
|
3473 // |
|
3474 // We now add up all the small parts: |
|
3475 // |
|
3476 // res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 |
|
3477 // |
|
3478 // Now the overall result is just: |
|
3479 // |
|
3480 // res_hi + res_lo |
|
3481 // |
|
3482 // 7. SMALL ARGUMENTS |
|
3483 // |
|
3484 // Inputs with |X| < 2^-252 are treated specially as |
|
3485 // 1 - |x|. |
|
3486 // |
|
3487 // Special cases: |
|
3488 // cos(NaN) = quiet NaN, and raise invalid exception |
|
3489 // cos(INF) = NaN and raise invalid exception |
|
3490 // cos(0) = 1 |
|
3491 // |
|
3492 /******************************************************************************/ |
|
3493 |
|
3494 ALIGNED_(8) juint _ONE[] = |
|
3495 { |
|
3496 0x00000000UL, 0x3ff00000UL |
|
3497 }; |
|
3498 |
|
3499 void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) { |
|
3500 Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; |
|
3501 Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; |
|
3502 Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; |
|
3503 Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start; |
|
3504 |
|
3505 assert_different_registers(r8, r9, r10, r11, eax, ecx, edx); |
|
3506 |
|
3507 address ONEHALF = (address)_ONEHALF; |
|
3508 address P_2 = (address)_P_2; |
|
3509 address SC_4 = (address)_SC_4; |
|
3510 address Ctable = (address)_Ctable; |
|
3511 address SC_2 = (address)_SC_2; |
|
3512 address SC_3 = (address)_SC_3; |
|
3513 address SC_1 = (address)_SC_1; |
|
3514 address PI_INV_TABLE = (address)_PI_INV_TABLE; |
|
3515 address PI_4 = (address)_PI_4; |
|
3516 address PI32INV = (address)_PI32INV; |
|
3517 address SIGN_MASK = (address)_SIGN_MASK; |
|
3518 address P_1 = (address)_P_1; |
|
3519 address P_3 = (address)_P_3; |
|
3520 address ONE = (address)_ONE; |
|
3521 address NEG_ZERO = (address)_NEG_ZERO; |
|
3522 |
|
3523 bind(start); |
|
3524 push(rbx); |
|
3525 subq(rsp, 16); |
|
3526 movsd(Address(rsp, 8), xmm0); |
|
3527 |
|
3528 bind(B1_2); |
|
3529 movl(eax, Address(rsp, 12)); |
|
3530 movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL |
|
3531 andl(eax, 2147418112); |
|
3532 subl(eax, 808452096); |
|
3533 cmpl(eax, 281346048); |
|
3534 jcc(Assembler::above, L_2TAG_PACKET_0_0_1); |
|
3535 mulsd(xmm1, xmm0); |
|
3536 movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
|
3537 movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL |
|
3538 pand(xmm4, xmm0); |
|
3539 por(xmm5, xmm4); |
|
3540 addpd(xmm1, xmm5); |
|
3541 cvttsd2sil(edx, xmm1); |
|
3542 cvtsi2sdl(xmm1, edx); |
|
3543 movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL |
|
3544 movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL |
|
3545 mulsd(xmm3, xmm1); |
|
3546 unpcklpd(xmm1, xmm1); |
|
3547 addq(rdx, 1865232); |
|
3548 movdqu(xmm4, xmm0); |
|
3549 andq(rdx, 63); |
|
3550 movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL |
|
3551 lea(rax, ExternalAddress(Ctable)); |
|
3552 shlq(rdx, 5); |
|
3553 addq(rax, rdx); |
|
3554 mulpd(xmm2, xmm1); |
|
3555 subsd(xmm0, xmm3); |
|
3556 mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL |
|
3557 subsd(xmm4, xmm3); |
|
3558 movq(xmm7, Address(rax, 8)); |
|
3559 unpcklpd(xmm0, xmm0); |
|
3560 movdqu(xmm3, xmm4); |
|
3561 subsd(xmm4, xmm2); |
|
3562 mulpd(xmm5, xmm0); |
|
3563 subpd(xmm0, xmm2); |
|
3564 movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL |
|
3565 mulsd(xmm7, xmm4); |
|
3566 subsd(xmm3, xmm4); |
|
3567 mulpd(xmm5, xmm0); |
|
3568 mulpd(xmm0, xmm0); |
|
3569 subsd(xmm3, xmm2); |
|
3570 movdqu(xmm2, Address(rax, 0)); |
|
3571 subsd(xmm1, xmm3); |
|
3572 movq(xmm3, Address(rax, 24)); |
|
3573 addsd(xmm2, xmm3); |
|
3574 subsd(xmm7, xmm2); |
|
3575 mulsd(xmm2, xmm4); |
|
3576 mulpd(xmm6, xmm0); |
|
3577 mulsd(xmm3, xmm4); |
|
3578 mulpd(xmm2, xmm0); |
|
3579 mulpd(xmm0, xmm0); |
|
3580 addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL |
|
3581 mulsd(xmm4, Address(rax, 0)); |
|
3582 addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL |
|
3583 mulpd(xmm5, xmm0); |
|
3584 movdqu(xmm0, xmm3); |
|
3585 addsd(xmm3, Address(rax, 8)); |
|
3586 mulpd(xmm1, xmm7); |
|
3587 movdqu(xmm7, xmm4); |
|
3588 addsd(xmm4, xmm3); |
|
3589 addpd(xmm6, xmm5); |
|
3590 movq(xmm5, Address(rax, 8)); |
|
3591 subsd(xmm5, xmm3); |
|
3592 subsd(xmm3, xmm4); |
|
3593 addsd(xmm1, Address(rax, 16)); |
|
3594 mulpd(xmm6, xmm2); |
|
3595 addsd(xmm0, xmm5); |
|
3596 addsd(xmm3, xmm7); |
|
3597 addsd(xmm0, xmm1); |
|
3598 addsd(xmm0, xmm3); |
|
3599 addsd(xmm0, xmm6); |
|
3600 unpckhpd(xmm6, xmm6); |
|
3601 addsd(xmm0, xmm6); |
|
3602 addsd(xmm0, xmm4); |
|
3603 jmp(B1_4); |
|
3604 |
|
3605 bind(L_2TAG_PACKET_0_0_1); |
|
3606 jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); |
|
3607 pextrw(eax, xmm0, 3); |
|
3608 andl(eax, 32767); |
|
3609 pinsrw(xmm0, eax, 3); |
|
3610 movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL |
|
3611 subsd(xmm1, xmm0); |
|
3612 movdqu(xmm0, xmm1); |
|
3613 jmp(B1_4); |
|
3614 |
|
3615 bind(L_2TAG_PACKET_1_0_1); |
|
3616 pextrw(eax, xmm0, 3); |
|
3617 andl(eax, 32752); |
|
3618 cmpl(eax, 32752); |
|
3619 jcc(Assembler::equal, L_2TAG_PACKET_2_0_1); |
|
3620 pextrw(ecx, xmm0, 3); |
|
3621 andl(ecx, 32752); |
|
3622 subl(ecx, 16224); |
|
3623 shrl(ecx, 7); |
|
3624 andl(ecx, 65532); |
|
3625 lea(r11, ExternalAddress(PI_INV_TABLE)); |
|
3626 addq(rcx, r11); |
|
3627 movdq(rax, xmm0); |
|
3628 movl(r10, Address(rcx, 20)); |
|
3629 movl(r8, Address(rcx, 24)); |
|
3630 movl(edx, eax); |
|
3631 shrq(rax, 21); |
|
3632 orl(eax, INT_MIN); |
|
3633 shrl(eax, 11); |
|
3634 movl(r9, r10); |
|
3635 imulq(r10, rdx); |
|
3636 imulq(r9, rax); |
|
3637 imulq(r8, rax); |
|
3638 movl(rsi, Address(rcx, 16)); |
|
3639 movl(rdi, Address(rcx, 12)); |
|
3640 movl(r11, r10); |
|
3641 shrq(r10, 32); |
|
3642 addq(r9, r10); |
|
3643 addq(r11, r8); |
|
3644 movl(r8, r11); |
|
3645 shrq(r11, 32); |
|
3646 addq(r9, r11); |
|
3647 movl(r10, rsi); |
|
3648 imulq(rsi, rdx); |
|
3649 imulq(r10, rax); |
|
3650 movl(r11, rdi); |
|
3651 imulq(rdi, rdx); |
|
3652 movl(rbx, rsi); |
|
3653 shrq(rsi, 32); |
|
3654 addq(r9, rbx); |
|
3655 movl(rbx, r9); |
|
3656 shrq(r9, 32); |
|
3657 addq(r10, rsi); |
|
3658 addq(r10, r9); |
|
3659 shlq(rbx, 32); |
|
3660 orq(r8, rbx); |
|
3661 imulq(r11, rax); |
|
3662 movl(r9, Address(rcx, 8)); |
|
3663 movl(rsi, Address(rcx, 4)); |
|
3664 movl(rbx, rdi); |
|
3665 shrq(rdi, 32); |
|
3666 addq(r10, rbx); |
|
3667 movl(rbx, r10); |
|
3668 shrq(r10, 32); |
|
3669 addq(r11, rdi); |
|
3670 addq(r11, r10); |
|
3671 movq(rdi, r9); |
|
3672 imulq(r9, rdx); |
|
3673 imulq(rdi, rax); |
|
3674 movl(r10, r9); |
|
3675 shrq(r9, 32); |
|
3676 addq(r11, r10); |
|
3677 movl(r10, r11); |
|
3678 shrq(r11, 32); |
|
3679 addq(rdi, r9); |
|
3680 addq(rdi, r11); |
|
3681 movq(r9, rsi); |
|
3682 imulq(rsi, rdx); |
|
3683 imulq(r9, rax); |
|
3684 shlq(r10, 32); |
|
3685 orq(r10, rbx); |
|
3686 movl(eax, Address(rcx, 0)); |
|
3687 movl(r11, rsi); |
|
3688 shrq(rsi, 32); |
|
3689 addq(rdi, r11); |
|
3690 movl(r11, rdi); |
|
3691 shrq(rdi, 32); |
|
3692 addq(r9, rsi); |
|
3693 addq(r9, rdi); |
|
3694 imulq(rdx, rax); |
|
3695 pextrw(rbx, xmm0, 3); |
|
3696 lea(rdi, ExternalAddress(PI_INV_TABLE)); |
|
3697 subq(rcx, rdi); |
|
3698 addl(ecx, ecx); |
|
3699 addl(ecx, ecx); |
|
3700 addl(ecx, ecx); |
|
3701 addl(ecx, 19); |
|
3702 movl(rsi, 32768); |
|
3703 andl(rsi, rbx); |
|
3704 shrl(rbx, 4); |
|
3705 andl(rbx, 2047); |
|
3706 subl(rbx, 1023); |
|
3707 subl(ecx, rbx); |
|
3708 addq(r9, rdx); |
|
3709 movl(edx, ecx); |
|
3710 addl(edx, 32); |
|
3711 cmpl(ecx, 1); |
|
3712 jcc(Assembler::less, L_2TAG_PACKET_3_0_1); |
|
3713 negl(ecx); |
|
3714 addl(ecx, 29); |
|
3715 shll(r9); |
|
3716 movl(rdi, r9); |
|
3717 andl(r9, 536870911); |
|
3718 testl(r9, 268435456); |
|
3719 jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); |
|
3720 shrl(r9); |
|
3721 movl(rbx, 0); |
|
3722 shlq(r9, 32); |
|
3723 orq(r9, r11); |
|
3724 |
|
3725 bind(L_2TAG_PACKET_5_0_1); |
|
3726 |
|
3727 bind(L_2TAG_PACKET_6_0_1); |
|
3728 cmpq(r9, 0); |
|
3729 jcc(Assembler::equal, L_2TAG_PACKET_7_0_1); |
|
3730 |
|
3731 bind(L_2TAG_PACKET_8_0_1); |
|
3732 bsrq(r11, r9); |
|
3733 movl(ecx, 29); |
|
3734 subl(ecx, r11); |
|
3735 jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1); |
|
3736 shlq(r9); |
|
3737 movq(rax, r10); |
|
3738 shlq(r10); |
|
3739 addl(edx, ecx); |
|
3740 negl(ecx); |
|
3741 addl(ecx, 64); |
|
3742 shrq(rax); |
|
3743 shrq(r8); |
|
3744 orq(r9, rax); |
|
3745 orq(r10, r8); |
|
3746 |
|
3747 bind(L_2TAG_PACKET_10_0_1); |
|
3748 cvtsi2sdq(xmm0, r9); |
|
3749 shrq(r10, 1); |
|
3750 cvtsi2sdq(xmm3, r10); |
|
3751 xorpd(xmm4, xmm4); |
|
3752 shll(edx, 4); |
|
3753 negl(edx); |
|
3754 addl(edx, 16368); |
|
3755 orl(edx, rsi); |
|
3756 xorl(edx, rbx); |
|
3757 pinsrw(xmm4, edx, 3); |
|
3758 movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL |
|
3759 movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL |
|
3760 xorpd(xmm5, xmm5); |
|
3761 subl(edx, 1008); |
|
3762 pinsrw(xmm5, edx, 3); |
|
3763 mulsd(xmm0, xmm4); |
|
3764 shll(rsi, 16); |
|
3765 sarl(rsi, 31); |
|
3766 mulsd(xmm3, xmm5); |
|
3767 movdqu(xmm1, xmm0); |
|
3768 mulsd(xmm0, xmm2); |
|
3769 shrl(rdi, 29); |
|
3770 addsd(xmm1, xmm3); |
|
3771 mulsd(xmm3, xmm2); |
|
3772 addl(rdi, rsi); |
|
3773 xorl(rdi, rsi); |
|
3774 mulsd(xmm6, xmm1); |
|
3775 movl(eax, rdi); |
|
3776 addsd(xmm6, xmm3); |
|
3777 movdqu(xmm2, xmm0); |
|
3778 addsd(xmm0, xmm6); |
|
3779 subsd(xmm2, xmm0); |
|
3780 addsd(xmm6, xmm2); |
|
3781 |
|
3782 bind(L_2TAG_PACKET_11_0_1); |
|
3783 movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL |
|
3784 mulsd(xmm1, xmm0); |
|
3785 movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL |
|
3786 movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL |
|
3787 pand(xmm4, xmm0); |
|
3788 por(xmm5, xmm4); |
|
3789 addpd(xmm1, xmm5); |
|
3790 cvttsd2siq(rdx, xmm1); |
|
3791 cvtsi2sdq(xmm1, rdx); |
|
3792 movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL |
|
3793 movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL |
|
3794 mulsd(xmm3, xmm1); |
|
3795 unpcklpd(xmm1, xmm1); |
|
3796 shll(eax, 3); |
|
3797 addl(edx, 1865232); |
|
3798 movdqu(xmm4, xmm0); |
|
3799 addl(edx, eax); |
|
3800 andl(edx, 63); |
|
3801 movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL |
|
3802 lea(rax, ExternalAddress(Ctable)); |
|
3803 shll(edx, 5); |
|
3804 addq(rax, rdx); |
|
3805 mulpd(xmm2, xmm1); |
|
3806 subsd(xmm0, xmm3); |
|
3807 mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL |
|
3808 subsd(xmm4, xmm3); |
|
3809 movq(xmm7, Address(rax, 8)); |
|
3810 unpcklpd(xmm0, xmm0); |
|
3811 movdqu(xmm3, xmm4); |
|
3812 subsd(xmm4, xmm2); |
|
3813 mulpd(xmm5, xmm0); |
|
3814 subpd(xmm0, xmm2); |
|
3815 mulsd(xmm7, xmm4); |
|
3816 subsd(xmm3, xmm4); |
|
3817 mulpd(xmm5, xmm0); |
|
3818 mulpd(xmm0, xmm0); |
|
3819 subsd(xmm3, xmm2); |
|
3820 movdqu(xmm2, Address(rax, 0)); |
|
3821 subsd(xmm1, xmm3); |
|
3822 movq(xmm3, Address(rax, 24)); |
|
3823 addsd(xmm2, xmm3); |
|
3824 subsd(xmm7, xmm2); |
|
3825 subsd(xmm1, xmm6); |
|
3826 movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL |
|
3827 mulsd(xmm2, xmm4); |
|
3828 mulpd(xmm6, xmm0); |
|
3829 mulsd(xmm3, xmm4); |
|
3830 mulpd(xmm2, xmm0); |
|
3831 mulpd(xmm0, xmm0); |
|
3832 addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL |
|
3833 mulsd(xmm4, Address(rax, 0)); |
|
3834 addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL |
|
3835 mulpd(xmm5, xmm0); |
|
3836 movdqu(xmm0, xmm3); |
|
3837 addsd(xmm3, Address(rax, 8)); |
|
3838 mulpd(xmm1, xmm7); |
|
3839 movdqu(xmm7, xmm4); |
|
3840 addsd(xmm4, xmm3); |
|
3841 addpd(xmm6, xmm5); |
|
3842 movq(xmm5, Address(rax, 8)); |
|
3843 subsd(xmm5, xmm3); |
|
3844 subsd(xmm3, xmm4); |
|
3845 addsd(xmm1, Address(rax, 16)); |
|
3846 mulpd(xmm6, xmm2); |
|
3847 addsd(xmm5, xmm0); |
|
3848 addsd(xmm3, xmm7); |
|
3849 addsd(xmm1, xmm5); |
|
3850 addsd(xmm1, xmm3); |
|
3851 addsd(xmm1, xmm6); |
|
3852 unpckhpd(xmm6, xmm6); |
|
3853 movdqu(xmm0, xmm4); |
|
3854 addsd(xmm1, xmm6); |
|
3855 addsd(xmm0, xmm1); |
|
3856 jmp(B1_4); |
|
3857 |
|
3858 bind(L_2TAG_PACKET_7_0_1); |
|
3859 addl(edx, 64); |
|
3860 movq(r9, r10); |
|
3861 movq(r10, r8); |
|
3862 movl(r8, 0); |
|
3863 cmpq(r9, 0); |
|
3864 jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); |
|
3865 addl(edx, 64); |
|
3866 movq(r9, r10); |
|
3867 movq(r10, r8); |
|
3868 cmpq(r9, 0); |
|
3869 jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); |
|
3870 xorpd(xmm0, xmm0); |
|
3871 xorpd(xmm6, xmm6); |
|
3872 jmp(L_2TAG_PACKET_11_0_1); |
|
3873 |
|
3874 bind(L_2TAG_PACKET_9_0_1); |
|
3875 jcc(Assembler::equal, L_2TAG_PACKET_10_0_1); |
|
3876 negl(ecx); |
|
3877 shrq(r10); |
|
3878 movq(rax, r9); |
|
3879 shrq(r9); |
|
3880 subl(edx, ecx); |
|
3881 negl(ecx); |
|
3882 addl(ecx, 64); |
|
3883 shlq(rax); |
|
3884 orq(r10, rax); |
|
3885 jmp(L_2TAG_PACKET_10_0_1); |
|
3886 bind(L_2TAG_PACKET_3_0_1); |
|
3887 negl(ecx); |
|
3888 shlq(r9, 32); |
|
3889 orq(r9, r11); |
|
3890 shlq(r9); |
|
3891 movq(rdi, r9); |
|
3892 testl(r9, INT_MIN); |
|
3893 jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1); |
|
3894 shrl(r9); |
|
3895 movl(rbx, 0); |
|
3896 shrq(rdi, 3); |
|
3897 jmp(L_2TAG_PACKET_6_0_1); |
|
3898 |
|
3899 bind(L_2TAG_PACKET_4_0_1); |
|
3900 shrl(r9); |
|
3901 movl(rbx, 536870912); |
|
3902 shrl(rbx); |
|
3903 shlq(r9, 32); |
|
3904 orq(r9, r11); |
|
3905 shlq(rbx, 32); |
|
3906 addl(rdi, 536870912); |
|
3907 movl(rcx, 0); |
|
3908 movl(r11, 0); |
|
3909 subq(rcx, r8); |
|
3910 sbbq(r11, r10); |
|
3911 sbbq(rbx, r9); |
|
3912 movq(r8, rcx); |
|
3913 movq(r10, r11); |
|
3914 movq(r9, rbx); |
|
3915 movl(rbx, 32768); |
|
3916 jmp(L_2TAG_PACKET_5_0_1); |
|
3917 |
|
3918 bind(L_2TAG_PACKET_12_0_1); |
|
3919 shrl(r9); |
|
3920 mov64(rbx, 0x100000000); |
|
3921 shrq(rbx); |
|
3922 movl(rcx, 0); |
|
3923 movl(r11, 0); |
|
3924 subq(rcx, r8); |
|
3925 sbbq(r11, r10); |
|
3926 sbbq(rbx, r9); |
|
3927 movq(r8, rcx); |
|
3928 movq(r10, r11); |
|
3929 movq(r9, rbx); |
|
3930 movl(rbx, 32768); |
|
3931 shrq(rdi, 3); |
|
3932 addl(rdi, 536870912); |
|
3933 jmp(L_2TAG_PACKET_6_0_1); |
|
3934 |
|
3935 bind(L_2TAG_PACKET_2_0_1); |
|
3936 movsd(xmm0, Address(rsp, 8)); |
|
3937 mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL |
|
3938 movq(Address(rsp, 0), xmm0); |
|
3939 |
|
3940 bind(L_2TAG_PACKET_13_0_1); |
|
3941 |
|
3942 bind(B1_4); |
|
3943 addq(rsp, 16); |
|
3944 pop(rbx); |
|
3945 } |