|
1 /* |
|
2 * Copyright (c) 2016, Intel Corporation. |
|
3 * Intel Math Library (LIBM) Source Code |
|
4 * |
|
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
6 * |
|
7 * This code is free software; you can redistribute it and/or modify it |
|
8 * under the terms of the GNU General Public License version 2 only, as |
|
9 * published by the Free Software Foundation. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 * |
|
25 */ |
|
26 |
|
27 #include "precompiled.hpp" |
|
28 #include "asm/assembler.hpp" |
|
29 #include "asm/assembler.inline.hpp" |
|
30 #include "runtime/stubRoutines.hpp" |
|
31 #include "macroAssembler_x86.hpp" |
|
32 |
|
33 #ifdef _MSC_VER |
|
34 #define ALIGNED_(x) __declspec(align(x)) |
|
35 #else |
|
36 #define ALIGNED_(x) __attribute__ ((aligned(x))) |
|
37 #endif |
|
38 |
|
39 /******************************************************************************/ |
|
40 // ALGORITHM DESCRIPTION - LOG10() |
|
41 // --------------------- |
|
42 // |
|
43 // Let x=2^k * mx, mx in [1,2) |
|
44 // |
|
45 // Get B~1/mx based on the output of rcpss instruction (B0) |
|
46 // B = int((B0*LH*2^7+0.5))/2^7 |
|
47 // LH is a short approximation for log10(e) |
|
48 // |
|
49 // Reduced argument: r=B*mx-LH (computed accurately in high and low parts) |
|
50 // |
|
51 // Result: k*log10(2) - log(B) + p(r) |
|
52 // p(r) is a degree 7 polynomial |
|
53 // -log(B) read from data table (high, low parts) |
|
54 // Result is formed from high and low parts |
|
55 // |
|
56 // Special cases: |
|
57 // log10(0) = -INF with divide-by-zero exception raised |
|
58 // log10(1) = +0 |
|
59 // log10(x) = NaN with invalid exception raised if x < -0, including -INF |
|
60 // log10(+INF) = +INF |
|
61 // |
|
62 /******************************************************************************/ |
|
63 |
|
64 #ifdef _LP64 |
|
65 // The 64 bit code is at most SSE2 compliant |
|
66 ALIGNED_(16) juint _HIGHSIGMASK_log10[] = |
|
67 { |
|
68 0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL |
|
69 }; |
|
70 |
|
71 ALIGNED_(16) juint _LOG10_E[] = |
|
72 { |
|
73 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
|
74 }; |
|
75 |
|
76 ALIGNED_(16) juint _L_tbl_log10[] = |
|
77 { |
|
78 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, |
|
79 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, |
|
80 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, |
|
81 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, |
|
82 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, |
|
83 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, |
|
84 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, |
|
85 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, |
|
86 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, |
|
87 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, |
|
88 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, |
|
89 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, |
|
90 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, |
|
91 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, |
|
92 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, |
|
93 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, |
|
94 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, |
|
95 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, |
|
96 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, |
|
97 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, |
|
98 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, |
|
99 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, |
|
100 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, |
|
101 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, |
|
102 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, |
|
103 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, |
|
104 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, |
|
105 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, |
|
106 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, |
|
107 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, |
|
108 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, |
|
109 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, |
|
110 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, |
|
111 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, |
|
112 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, |
|
113 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, |
|
114 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, |
|
115 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, |
|
116 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, |
|
117 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, |
|
118 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, |
|
119 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, |
|
120 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, |
|
121 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, |
|
122 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, |
|
123 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, |
|
124 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, |
|
125 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, |
|
126 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, |
|
127 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, |
|
128 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, |
|
129 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, |
|
130 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, |
|
131 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, |
|
132 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, |
|
133 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, |
|
134 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, |
|
135 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, |
|
136 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, |
|
137 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, |
|
138 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, |
|
139 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, |
|
140 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, |
|
141 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, |
|
142 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, |
|
143 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, |
|
144 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, |
|
145 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, |
|
146 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, |
|
147 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, |
|
148 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, |
|
149 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, |
|
150 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, |
|
151 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, |
|
152 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, |
|
153 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, |
|
154 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, |
|
155 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, |
|
156 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, |
|
157 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, |
|
158 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, |
|
159 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, |
|
160 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, |
|
161 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, |
|
162 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, |
|
163 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, |
|
164 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, |
|
165 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, |
|
166 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, |
|
167 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, |
|
168 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, |
|
169 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, |
|
170 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, |
|
171 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, |
|
172 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, |
|
173 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, |
|
174 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, |
|
175 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, |
|
176 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, |
|
177 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, |
|
178 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, |
|
179 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, |
|
180 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
181 0x00000000UL |
|
182 }; |
|
183 |
|
184 ALIGNED_(16) juint _log2_log10[] = |
|
185 { |
|
186 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL |
|
187 }; |
|
188 |
|
189 ALIGNED_(16) juint _coeff_log10[] = |
|
190 { |
|
191 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, |
|
192 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, |
|
193 0xdc77b115UL, 0xbff27af2UL |
|
194 }; |
|
195 |
|
196 // Registers: |
|
197 // input: xmm0 |
|
198 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
199 // rax, rdx, rcx, tmp - r11 |
|
200 |
|
201 // Code generated by Intel C compiler for LIBM library |
|
202 |
|
203 void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) { |
|
204 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
|
205 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
|
206 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start; |
|
207 |
|
208 assert_different_registers(r11, eax, ecx, edx); |
|
209 |
|
210 address HIGHSIGMASK = (address)_HIGHSIGMASK_log10; |
|
211 address LOG10_E = (address)_LOG10_E; |
|
212 address L_tbl = (address)_L_tbl_log10; |
|
213 address log2 = (address)_log2_log10; |
|
214 address coeff = (address)_coeff_log10; |
|
215 |
|
216 bind(start); |
|
217 subq(rsp, 24); |
|
218 movsd(Address(rsp, 0), xmm0); |
|
219 |
|
220 bind(B1_2); |
|
221 xorpd(xmm2, xmm2); |
|
222 movl(eax, 16368); |
|
223 pinsrw(xmm2, eax, 3); |
|
224 movl(ecx, 1054736384); |
|
225 movdl(xmm7, ecx); |
|
226 xorpd(xmm3, xmm3); |
|
227 movl(edx, 30704); |
|
228 pinsrw(xmm3, edx, 3); |
|
229 movdqu(xmm1, xmm0); |
|
230 movl(edx, 32768); |
|
231 movdl(xmm4, edx); |
|
232 movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL |
|
233 pextrw(eax, xmm0, 3); |
|
234 por(xmm0, xmm2); |
|
235 movl(ecx, 16352); |
|
236 psrlq(xmm0, 27); |
|
237 movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
|
238 psrld(xmm0, 2); |
|
239 rcpps(xmm0, xmm0); |
|
240 psllq(xmm1, 12); |
|
241 pshufd(xmm6, xmm5, 78); |
|
242 psrlq(xmm1, 12); |
|
243 subl(eax, 16); |
|
244 cmpl(eax, 32736); |
|
245 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
|
246 |
|
247 bind(L_2TAG_PACKET_1_0_2); |
|
248 mulss(xmm0, xmm7); |
|
249 por(xmm1, xmm3); |
|
250 lea(r11, ExternalAddress(L_tbl)); |
|
251 andpd(xmm5, xmm1); |
|
252 paddd(xmm0, xmm4); |
|
253 subsd(xmm1, xmm5); |
|
254 movdl(edx, xmm0); |
|
255 psllq(xmm0, 29); |
|
256 andpd(xmm0, xmm6); |
|
257 andl(eax, 32752); |
|
258 subl(eax, ecx); |
|
259 cvtsi2sdl(xmm7, eax); |
|
260 mulpd(xmm5, xmm0); |
|
261 mulsd(xmm1, xmm0); |
|
262 movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL |
|
263 movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL |
|
264 subsd(xmm5, xmm2); |
|
265 andl(edx, 16711680); |
|
266 shrl(edx, 12); |
|
267 movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504)); |
|
268 movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL |
|
269 addsd(xmm1, xmm5); |
|
270 movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL |
|
271 mulsd(xmm6, xmm7); |
|
272 pshufd(xmm5, xmm1, 68); |
|
273 mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL |
|
274 mulsd(xmm3, xmm1); |
|
275 addsd(xmm0, xmm6); |
|
276 mulpd(xmm4, xmm5); |
|
277 movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL |
|
278 mulpd(xmm5, xmm5); |
|
279 addpd(xmm4, xmm2); |
|
280 mulpd(xmm3, xmm5); |
|
281 pshufd(xmm2, xmm0, 228); |
|
282 addsd(xmm0, xmm1); |
|
283 mulsd(xmm4, xmm1); |
|
284 subsd(xmm2, xmm0); |
|
285 mulsd(xmm6, xmm1); |
|
286 addsd(xmm1, xmm2); |
|
287 pshufd(xmm2, xmm0, 238); |
|
288 mulsd(xmm5, xmm5); |
|
289 addsd(xmm7, xmm2); |
|
290 addsd(xmm1, xmm6); |
|
291 addpd(xmm4, xmm3); |
|
292 addsd(xmm1, xmm7); |
|
293 mulpd(xmm4, xmm5); |
|
294 addsd(xmm1, xmm4); |
|
295 pshufd(xmm5, xmm4, 238); |
|
296 addsd(xmm1, xmm5); |
|
297 addsd(xmm0, xmm1); |
|
298 jmp(B1_5); |
|
299 |
|
300 bind(L_2TAG_PACKET_0_0_2); |
|
301 movq(xmm0, Address(rsp, 0)); |
|
302 movq(xmm1, Address(rsp, 0)); |
|
303 addl(eax, 16); |
|
304 cmpl(eax, 32768); |
|
305 jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); |
|
306 cmpl(eax, 16); |
|
307 jcc(Assembler::below, L_2TAG_PACKET_3_0_2); |
|
308 |
|
309 bind(L_2TAG_PACKET_4_0_2); |
|
310 addsd(xmm0, xmm0); |
|
311 jmp(B1_5); |
|
312 |
|
313 bind(L_2TAG_PACKET_5_0_2); |
|
314 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
|
315 cmpl(edx, 0); |
|
316 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
|
317 jmp(L_2TAG_PACKET_6_0_2); |
|
318 |
|
319 bind(L_2TAG_PACKET_3_0_2); |
|
320 xorpd(xmm1, xmm1); |
|
321 addsd(xmm1, xmm0); |
|
322 movdl(edx, xmm1); |
|
323 psrlq(xmm1, 32); |
|
324 movdl(ecx, xmm1); |
|
325 orl(edx, ecx); |
|
326 cmpl(edx, 0); |
|
327 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
|
328 xorpd(xmm1, xmm1); |
|
329 movl(eax, 18416); |
|
330 pinsrw(xmm1, eax, 3); |
|
331 mulsd(xmm0, xmm1); |
|
332 xorpd(xmm2, xmm2); |
|
333 movl(eax, 16368); |
|
334 pinsrw(xmm2, eax, 3); |
|
335 movdqu(xmm1, xmm0); |
|
336 pextrw(eax, xmm0, 3); |
|
337 por(xmm0, xmm2); |
|
338 movl(ecx, 18416); |
|
339 psrlq(xmm0, 27); |
|
340 movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
|
341 psrld(xmm0, 2); |
|
342 rcpps(xmm0, xmm0); |
|
343 psllq(xmm1, 12); |
|
344 pshufd(xmm6, xmm5, 78); |
|
345 psrlq(xmm1, 12); |
|
346 jmp(L_2TAG_PACKET_1_0_2); |
|
347 |
|
348 bind(L_2TAG_PACKET_2_0_2); |
|
349 movdl(edx, xmm1); |
|
350 psrlq(xmm1, 32); |
|
351 movdl(ecx, xmm1); |
|
352 addl(ecx, ecx); |
|
353 cmpl(ecx, -2097152); |
|
354 jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); |
|
355 orl(edx, ecx); |
|
356 cmpl(edx, 0); |
|
357 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
|
358 |
|
359 bind(L_2TAG_PACKET_6_0_2); |
|
360 xorpd(xmm1, xmm1); |
|
361 xorpd(xmm0, xmm0); |
|
362 movl(eax, 32752); |
|
363 pinsrw(xmm1, eax, 3); |
|
364 mulsd(xmm0, xmm1); |
|
365 movl(Address(rsp, 16), 9); |
|
366 jmp(L_2TAG_PACKET_8_0_2); |
|
367 |
|
368 bind(L_2TAG_PACKET_7_0_2); |
|
369 xorpd(xmm1, xmm1); |
|
370 xorpd(xmm0, xmm0); |
|
371 movl(eax, 49136); |
|
372 pinsrw(xmm0, eax, 3); |
|
373 divsd(xmm0, xmm1); |
|
374 movl(Address(rsp, 16), 8); |
|
375 |
|
376 bind(L_2TAG_PACKET_8_0_2); |
|
377 movq(Address(rsp, 8), xmm0); |
|
378 |
|
379 bind(B1_3); |
|
380 movq(xmm0, Address(rsp, 8)); |
|
381 |
|
382 bind(L_2TAG_PACKET_9_0_2); |
|
383 |
|
384 bind(B1_5); |
|
385 addq(rsp, 24); |
|
386 |
|
387 } |
|
388 #else |
|
389 // The 32 bit code is at most SSE2 compliant |
|
390 ALIGNED_(16) juint _static_const_table_log10[] = |
|
391 { |
|
392 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, |
|
393 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, |
|
394 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, |
|
395 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, |
|
396 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, |
|
397 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, |
|
398 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, |
|
399 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, |
|
400 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, |
|
401 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, |
|
402 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, |
|
403 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, |
|
404 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, |
|
405 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, |
|
406 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, |
|
407 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, |
|
408 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, |
|
409 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, |
|
410 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, |
|
411 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, |
|
412 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, |
|
413 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, |
|
414 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, |
|
415 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, |
|
416 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, |
|
417 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, |
|
418 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, |
|
419 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, |
|
420 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, |
|
421 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, |
|
422 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, |
|
423 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, |
|
424 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, |
|
425 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, |
|
426 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, |
|
427 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, |
|
428 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, |
|
429 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, |
|
430 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, |
|
431 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, |
|
432 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, |
|
433 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, |
|
434 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, |
|
435 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, |
|
436 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, |
|
437 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, |
|
438 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, |
|
439 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, |
|
440 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, |
|
441 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, |
|
442 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, |
|
443 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, |
|
444 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, |
|
445 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, |
|
446 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, |
|
447 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, |
|
448 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, |
|
449 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, |
|
450 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, |
|
451 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, |
|
452 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, |
|
453 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, |
|
454 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, |
|
455 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, |
|
456 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, |
|
457 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, |
|
458 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, |
|
459 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, |
|
460 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, |
|
461 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, |
|
462 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, |
|
463 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, |
|
464 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, |
|
465 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, |
|
466 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, |
|
467 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, |
|
468 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, |
|
469 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, |
|
470 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, |
|
471 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, |
|
472 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, |
|
473 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, |
|
474 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, |
|
475 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, |
|
476 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, |
|
477 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, |
|
478 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, |
|
479 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, |
|
480 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, |
|
481 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, |
|
482 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, |
|
483 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, |
|
484 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, |
|
485 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, |
|
486 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, |
|
487 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, |
|
488 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, |
|
489 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, |
|
490 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, |
|
491 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, |
|
492 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, |
|
493 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, |
|
494 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
495 0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL, |
|
496 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, |
|
497 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, |
|
498 0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL, |
|
499 0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
|
500 }; |
|
501 //registers, |
|
502 // input: xmm0 |
|
503 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
504 // rax, rdx, rcx, rbx (tmp) |
|
505 |
|
506 void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { |
|
507 |
|
508 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
|
509 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
|
510 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start; |
|
511 |
|
512 assert_different_registers(tmp, eax, ecx, edx); |
|
513 |
|
514 address static_const_table_log10 = (address)_static_const_table_log10; |
|
515 |
|
516 bind(start); |
|
517 subl(rsp, 104); |
|
518 movl(Address(rsp, 40), tmp); |
|
519 lea(tmp, ExternalAddress(static_const_table_log10)); |
|
520 xorpd(xmm2, xmm2); |
|
521 movl(eax, 16368); |
|
522 pinsrw(xmm2, eax, 3); |
|
523 movl(ecx, 1054736384); |
|
524 movdl(xmm7, ecx); |
|
525 xorpd(xmm3, xmm3); |
|
526 movl(edx, 30704); |
|
527 pinsrw(xmm3, edx, 3); |
|
528 movsd(xmm0, Address(rsp, 112)); |
|
529 movdqu(xmm1, xmm0); |
|
530 movl(edx, 32768); |
|
531 movdl(xmm4, edx); |
|
532 movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL |
|
533 pextrw(eax, xmm0, 3); |
|
534 por(xmm0, xmm2); |
|
535 movl(ecx, 16352); |
|
536 psllq(xmm0, 5); |
|
537 movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL |
|
538 psrlq(xmm0, 34); |
|
539 rcpss(xmm0, xmm0); |
|
540 psllq(xmm1, 12); |
|
541 pshufd(xmm6, xmm5, 78); |
|
542 psrlq(xmm1, 12); |
|
543 subl(eax, 16); |
|
544 cmpl(eax, 32736); |
|
545 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
|
546 |
|
547 bind(L_2TAG_PACKET_1_0_2); |
|
548 mulss(xmm0, xmm7); |
|
549 por(xmm1, xmm3); |
|
550 andpd(xmm5, xmm1); |
|
551 paddd(xmm0, xmm4); |
|
552 subsd(xmm1, xmm5); |
|
553 movdl(edx, xmm0); |
|
554 psllq(xmm0, 29); |
|
555 andpd(xmm0, xmm6); |
|
556 andl(eax, 32752); |
|
557 subl(eax, ecx); |
|
558 cvtsi2sdl(xmm7, eax); |
|
559 mulpd(xmm5, xmm0); |
|
560 mulsd(xmm1, xmm0); |
|
561 movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL |
|
562 movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL |
|
563 subsd(xmm5, xmm2); |
|
564 andl(edx, 16711680); |
|
565 shrl(edx, 12); |
|
566 movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504)); |
|
567 movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL |
|
568 addsd(xmm1, xmm5); |
|
569 movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL |
|
570 mulsd(xmm6, xmm7); |
|
571 pshufd(xmm5, xmm1, 68); |
|
572 mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL |
|
573 mulsd(xmm3, xmm1); |
|
574 addsd(xmm0, xmm6); |
|
575 mulpd(xmm4, xmm5); |
|
576 movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL |
|
577 mulpd(xmm5, xmm5); |
|
578 addpd(xmm4, xmm2); |
|
579 mulpd(xmm3, xmm5); |
|
580 pshufd(xmm2, xmm0, 228); |
|
581 addsd(xmm0, xmm1); |
|
582 mulsd(xmm4, xmm1); |
|
583 subsd(xmm2, xmm0); |
|
584 mulsd(xmm6, xmm1); |
|
585 addsd(xmm1, xmm2); |
|
586 pshufd(xmm2, xmm0, 238); |
|
587 mulsd(xmm5, xmm5); |
|
588 addsd(xmm7, xmm2); |
|
589 addsd(xmm1, xmm6); |
|
590 addpd(xmm4, xmm3); |
|
591 addsd(xmm1, xmm7); |
|
592 mulpd(xmm4, xmm5); |
|
593 addsd(xmm1, xmm4); |
|
594 pshufd(xmm5, xmm4, 238); |
|
595 addsd(xmm1, xmm5); |
|
596 addsd(xmm0, xmm1); |
|
597 jmp(L_2TAG_PACKET_2_0_2); |
|
598 |
|
599 bind(L_2TAG_PACKET_0_0_2); |
|
600 movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL |
|
601 movdqu(xmm1, xmm0); |
|
602 addl(eax, 16); |
|
603 cmpl(eax, 32768); |
|
604 jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); |
|
605 cmpl(eax, 16); |
|
606 jcc(Assembler::below, L_2TAG_PACKET_4_0_2); |
|
607 |
|
608 bind(L_2TAG_PACKET_5_0_2); |
|
609 addsd(xmm0, xmm0); |
|
610 jmp(L_2TAG_PACKET_2_0_2); |
|
611 |
|
612 bind(L_2TAG_PACKET_6_0_2); |
|
613 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
|
614 cmpl(edx, 0); |
|
615 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
|
616 jmp(L_2TAG_PACKET_7_0_2); |
|
617 |
|
618 bind(L_2TAG_PACKET_3_0_2); |
|
619 movdl(edx, xmm1); |
|
620 psrlq(xmm1, 32); |
|
621 movdl(ecx, xmm1); |
|
622 addl(ecx, ecx); |
|
623 cmpl(ecx, -2097152); |
|
624 jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); |
|
625 orl(edx, ecx); |
|
626 cmpl(edx, 0); |
|
627 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
|
628 |
|
629 bind(L_2TAG_PACKET_7_0_2); |
|
630 xorpd(xmm1, xmm1); |
|
631 xorpd(xmm0, xmm0); |
|
632 movl(eax, 32752); |
|
633 pinsrw(xmm1, eax, 3); |
|
634 movl(edx, 9); |
|
635 mulsd(xmm0, xmm1); |
|
636 |
|
637 bind(L_2TAG_PACKET_9_0_2); |
|
638 movsd(Address(rsp, 0), xmm0); |
|
639 movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL |
|
640 fld_d(Address(rsp, 0)); |
|
641 jmp(L_2TAG_PACKET_10_0_2); |
|
642 |
|
643 bind(L_2TAG_PACKET_8_0_2); |
|
644 xorpd(xmm1, xmm1); |
|
645 xorpd(xmm0, xmm0); |
|
646 movl(eax, 49136); |
|
647 pinsrw(xmm0, eax, 3); |
|
648 divsd(xmm0, xmm1); |
|
649 movl(edx, 8); |
|
650 jmp(L_2TAG_PACKET_9_0_2); |
|
651 |
|
652 bind(L_2TAG_PACKET_4_0_2); |
|
653 movdl(edx, xmm1); |
|
654 psrlq(xmm1, 32); |
|
655 movdl(ecx, xmm1); |
|
656 orl(edx, ecx); |
|
657 cmpl(edx, 0); |
|
658 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
|
659 xorpd(xmm1, xmm1); |
|
660 movl(eax, 18416); |
|
661 pinsrw(xmm1, eax, 3); |
|
662 mulsd(xmm0, xmm1); |
|
663 xorpd(xmm2, xmm2); |
|
664 movl(eax, 16368); |
|
665 pinsrw(xmm2, eax, 3); |
|
666 movdqu(xmm1, xmm0); |
|
667 pextrw(eax, xmm0, 3); |
|
668 por(xmm0, xmm2); |
|
669 movl(ecx, 18416); |
|
670 psllq(xmm0, 5); |
|
671 movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL |
|
672 psrlq(xmm0, 34); |
|
673 rcpss(xmm0, xmm0); |
|
674 psllq(xmm1, 12); |
|
675 pshufd(xmm6, xmm5, 78); |
|
676 psrlq(xmm1, 12); |
|
677 jmp(L_2TAG_PACKET_1_0_2); |
|
678 |
|
679 bind(L_2TAG_PACKET_2_0_2); |
|
680 movsd(Address(rsp, 24), xmm0); |
|
681 fld_d(Address(rsp, 24)); |
|
682 |
|
683 bind(L_2TAG_PACKET_10_0_2); |
|
684 movl(tmp, Address(rsp, 40)); |
|
685 |
|
686 } |
|
687 #endif |