|
1 /* |
|
2 * Copyright (c) 2016, Intel Corporation. |
|
3 * Intel Math Library (LIBM) Source Code |
|
4 * |
|
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
6 * |
|
7 * This code is free software; you can redistribute it and/or modify it |
|
8 * under the terms of the GNU General Public License version 2 only, as |
|
9 * published by the Free Software Foundation. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 * |
|
25 */ |
|
26 |
|
27 #include "precompiled.hpp" |
|
28 #include "asm/assembler.hpp" |
|
29 #include "asm/assembler.inline.hpp" |
|
30 #include "macroAssembler_x86.hpp" |
|
31 |
|
32 #ifdef _MSC_VER |
|
33 #define ALIGNED_(x) __declspec(align(x)) |
|
34 #else |
|
35 #define ALIGNED_(x) __attribute__ ((aligned(x))) |
|
36 #endif |
|
37 |
|
38 /******************************************************************************/ |
|
39 // ALGORITHM DESCRIPTION - LOG() |
|
40 // --------------------- |
|
41 // |
|
42 // x=2^k * mx, mx in [1,2) |
|
43 // |
|
44 // Get B~1/mx based on the output of rcpss instruction (B0) |
|
45 // B = int((B0*2^7+0.5))/2^7 |
|
46 // |
|
47 // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) |
|
48 // |
|
49 // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and |
|
50 // p(r) is a degree 7 polynomial |
|
51 // -log(B) read from data table (high, low parts) |
|
52 // Result is formed from high and low parts |
|
53 // |
|
54 // Special cases: |
|
55 // log(NaN) = quiet NaN, and raise invalid exception |
|
56 // log(+INF) = that INF |
|
57 // log(0) = -INF with divide-by-zero exception raised |
|
58 // log(1) = +0 |
|
59 // log(x) = NaN with invalid exception raised if x < -0, including -INF |
|
60 // |
|
61 /******************************************************************************/ |
|
62 |
|
63 #ifdef _LP64 |
|
64 // The 64 bit code is at most SSE2 compliant |
|
65 ALIGNED_(16) juint _L_tbl[] = |
|
66 { |
|
67 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, |
|
68 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, |
|
69 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, |
|
70 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, |
|
71 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, |
|
72 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, |
|
73 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, |
|
74 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, |
|
75 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, |
|
76 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, |
|
77 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, |
|
78 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, |
|
79 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, |
|
80 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, |
|
81 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, |
|
82 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, |
|
83 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, |
|
84 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, |
|
85 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, |
|
86 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, |
|
87 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, |
|
88 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, |
|
89 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, |
|
90 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, |
|
91 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, |
|
92 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, |
|
93 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, |
|
94 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, |
|
95 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, |
|
96 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, |
|
97 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, |
|
98 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, |
|
99 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, |
|
100 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, |
|
101 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, |
|
102 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, |
|
103 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, |
|
104 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, |
|
105 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, |
|
106 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, |
|
107 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, |
|
108 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, |
|
109 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, |
|
110 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, |
|
111 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, |
|
112 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, |
|
113 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, |
|
114 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, |
|
115 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, |
|
116 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, |
|
117 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, |
|
118 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, |
|
119 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, |
|
120 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, |
|
121 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, |
|
122 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, |
|
123 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, |
|
124 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, |
|
125 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, |
|
126 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, |
|
127 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, |
|
128 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, |
|
129 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, |
|
130 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, |
|
131 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, |
|
132 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, |
|
133 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, |
|
134 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, |
|
135 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, |
|
136 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, |
|
137 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, |
|
138 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, |
|
139 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, |
|
140 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, |
|
141 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, |
|
142 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, |
|
143 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, |
|
144 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, |
|
145 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, |
|
146 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, |
|
147 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, |
|
148 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, |
|
149 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, |
|
150 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, |
|
151 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, |
|
152 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, |
|
153 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, |
|
154 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, |
|
155 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, |
|
156 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, |
|
157 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, |
|
158 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, |
|
159 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, |
|
160 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, |
|
161 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, |
|
162 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, |
|
163 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, |
|
164 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, |
|
165 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, |
|
166 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, |
|
167 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, |
|
168 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, |
|
169 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
170 0x80000000UL |
|
171 }; |
|
172 |
|
173 ALIGNED_(16) juint _log2[] = |
|
174 { |
|
175 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL |
|
176 }; |
|
177 |
|
178 ALIGNED_(16) juint _coeff[] = |
|
179 { |
|
180 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, |
|
181 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, |
|
182 0x00000000UL, 0xbfe00000UL |
|
183 }; |
|
184 |
|
185 //registers, |
|
186 // input: xmm0 |
|
187 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
188 // rax, rdx, rcx, r8, r11 |
|
189 |
|
190 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { |
|
191 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
|
192 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
|
193 Label L_2TAG_PACKET_8_0_2; |
|
194 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; |
|
195 |
|
196 assert_different_registers(tmp1, tmp2, eax, ecx, edx); |
|
197 jmp(start); |
|
198 address L_tbl = (address)_L_tbl; |
|
199 address log2 = (address)_log2; |
|
200 address coeff = (address)_coeff; |
|
201 |
|
202 bind(start); |
|
203 subq(rsp, 24); |
|
204 movsd(Address(rsp, 0), xmm0); |
|
205 mov64(rax, 0x3ff0000000000000); |
|
206 movdq(xmm2, rax); |
|
207 mov64(rdx, 0x77f0000000000000); |
|
208 movdq(xmm3, rdx); |
|
209 movl(ecx, 32768); |
|
210 movdl(xmm4, rcx); |
|
211 mov64(tmp1, 0xffffe00000000000); |
|
212 movdq(xmm5, tmp1); |
|
213 movdqu(xmm1, xmm0); |
|
214 pextrw(eax, xmm0, 3); |
|
215 por(xmm0, xmm2); |
|
216 movl(ecx, 16352); |
|
217 psrlq(xmm0, 27); |
|
218 lea(tmp2, ExternalAddress(L_tbl)); |
|
219 psrld(xmm0, 2); |
|
220 rcpps(xmm0, xmm0); |
|
221 psllq(xmm1, 12); |
|
222 pshufd(xmm6, xmm5, 228); |
|
223 psrlq(xmm1, 12); |
|
224 subl(eax, 16); |
|
225 cmpl(eax, 32736); |
|
226 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
|
227 |
|
228 bind(L_2TAG_PACKET_1_0_2); |
|
229 paddd(xmm0, xmm4); |
|
230 por(xmm1, xmm3); |
|
231 movdl(edx, xmm0); |
|
232 psllq(xmm0, 29); |
|
233 pand(xmm5, xmm1); |
|
234 pand(xmm0, xmm6); |
|
235 subsd(xmm1, xmm5); |
|
236 mulpd(xmm5, xmm0); |
|
237 andl(eax, 32752); |
|
238 subl(eax, ecx); |
|
239 cvtsi2sdl(xmm7, eax); |
|
240 mulsd(xmm1, xmm0); |
|
241 movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL |
|
242 movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL |
|
243 subsd(xmm5, xmm2); |
|
244 andl(edx, 16711680); |
|
245 shrl(edx, 12); |
|
246 movdqu(xmm0, Address(tmp2, edx)); |
|
247 movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL |
|
248 addsd(xmm1, xmm5); |
|
249 movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL |
|
250 mulsd(xmm6, xmm7); |
|
251 if (VM_Version::supports_sse3()) { |
|
252 movddup(xmm5, xmm1); |
|
253 } |
|
254 else { |
|
255 movdqu(xmm5, xmm1); |
|
256 movlhps(xmm5, xmm5); |
|
257 } |
|
258 mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL |
|
259 mulsd(xmm3, xmm1); |
|
260 addsd(xmm0, xmm6); |
|
261 mulpd(xmm4, xmm5); |
|
262 mulpd(xmm5, xmm5); |
|
263 if (VM_Version::supports_sse3()) { |
|
264 movddup(xmm6, xmm0); |
|
265 } |
|
266 else { |
|
267 movdqu(xmm6, xmm0); |
|
268 movlhps(xmm6, xmm6); |
|
269 } |
|
270 addsd(xmm0, xmm1); |
|
271 addpd(xmm4, xmm2); |
|
272 mulpd(xmm3, xmm5); |
|
273 subsd(xmm6, xmm0); |
|
274 mulsd(xmm4, xmm1); |
|
275 pshufd(xmm2, xmm0, 238); |
|
276 addsd(xmm1, xmm6); |
|
277 mulsd(xmm5, xmm5); |
|
278 addsd(xmm7, xmm2); |
|
279 addpd(xmm4, xmm3); |
|
280 addsd(xmm1, xmm7); |
|
281 mulpd(xmm4, xmm5); |
|
282 addsd(xmm1, xmm4); |
|
283 pshufd(xmm5, xmm4, 238); |
|
284 addsd(xmm1, xmm5); |
|
285 addsd(xmm0, xmm1); |
|
286 jmp(B1_5); |
|
287 |
|
288 bind(L_2TAG_PACKET_0_0_2); |
|
289 movq(xmm0, Address(rsp, 0)); |
|
290 movq(xmm1, Address(rsp, 0)); |
|
291 addl(eax, 16); |
|
292 cmpl(eax, 32768); |
|
293 jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); |
|
294 cmpl(eax, 16); |
|
295 jcc(Assembler::below, L_2TAG_PACKET_3_0_2); |
|
296 |
|
297 bind(L_2TAG_PACKET_4_0_2); |
|
298 addsd(xmm0, xmm0); |
|
299 jmp(B1_5); |
|
300 |
|
301 bind(L_2TAG_PACKET_5_0_2); |
|
302 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
|
303 cmpl(edx, 0); |
|
304 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
|
305 jmp(L_2TAG_PACKET_6_0_2); |
|
306 |
|
307 bind(L_2TAG_PACKET_3_0_2); |
|
308 xorpd(xmm1, xmm1); |
|
309 addsd(xmm1, xmm0); |
|
310 movdl(edx, xmm1); |
|
311 psrlq(xmm1, 32); |
|
312 movdl(ecx, xmm1); |
|
313 orl(edx, ecx); |
|
314 cmpl(edx, 0); |
|
315 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
|
316 xorpd(xmm1, xmm1); |
|
317 movl(eax, 18416); |
|
318 pinsrw(xmm1, eax, 3); |
|
319 mulsd(xmm0, xmm1); |
|
320 movdqu(xmm1, xmm0); |
|
321 pextrw(eax, xmm0, 3); |
|
322 por(xmm0, xmm2); |
|
323 psrlq(xmm0, 27); |
|
324 movl(ecx, 18416); |
|
325 psrld(xmm0, 2); |
|
326 rcpps(xmm0, xmm0); |
|
327 psllq(xmm1, 12); |
|
328 pshufd(xmm6, xmm5, 228); |
|
329 psrlq(xmm1, 12); |
|
330 jmp(L_2TAG_PACKET_1_0_2); |
|
331 |
|
332 bind(L_2TAG_PACKET_2_0_2); |
|
333 movdl(edx, xmm1); |
|
334 psrlq(xmm1, 32); |
|
335 movdl(ecx, xmm1); |
|
336 addl(ecx, ecx); |
|
337 cmpl(ecx, -2097152); |
|
338 jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); |
|
339 orl(edx, ecx); |
|
340 cmpl(edx, 0); |
|
341 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
|
342 |
|
343 bind(L_2TAG_PACKET_6_0_2); |
|
344 xorpd(xmm1, xmm1); |
|
345 xorpd(xmm0, xmm0); |
|
346 movl(eax, 32752); |
|
347 pinsrw(xmm1, eax, 3); |
|
348 mulsd(xmm0, xmm1); |
|
349 movl(Address(rsp, 16), 3); |
|
350 jmp(L_2TAG_PACKET_8_0_2); |
|
351 bind(L_2TAG_PACKET_7_0_2); |
|
352 xorpd(xmm1, xmm1); |
|
353 xorpd(xmm0, xmm0); |
|
354 movl(eax, 49136); |
|
355 pinsrw(xmm0, eax, 3); |
|
356 divsd(xmm0, xmm1); |
|
357 movl(Address(rsp, 16), 2); |
|
358 |
|
359 bind(L_2TAG_PACKET_8_0_2); |
|
360 movq(Address(rsp, 8), xmm0); |
|
361 |
|
362 bind(B1_3); |
|
363 movq(xmm0, Address(rsp, 8)); |
|
364 |
|
365 bind(B1_5); |
|
366 addq(rsp, 24); |
|
367 } |
|
368 #else |
|
369 // The 32 bit code is at most SSE2 compliant |
|
370 ALIGNED_(16) juint _static_const_table_log[] = |
|
371 { |
|
372 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, |
|
373 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, |
|
374 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, |
|
375 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, |
|
376 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, |
|
377 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, |
|
378 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, |
|
379 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, |
|
380 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, |
|
381 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, |
|
382 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, |
|
383 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, |
|
384 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, |
|
385 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, |
|
386 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, |
|
387 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, |
|
388 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, |
|
389 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, |
|
390 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, |
|
391 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, |
|
392 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, |
|
393 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, |
|
394 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, |
|
395 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, |
|
396 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, |
|
397 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, |
|
398 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, |
|
399 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, |
|
400 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, |
|
401 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, |
|
402 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, |
|
403 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, |
|
404 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, |
|
405 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, |
|
406 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, |
|
407 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, |
|
408 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, |
|
409 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, |
|
410 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, |
|
411 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, |
|
412 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, |
|
413 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, |
|
414 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, |
|
415 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, |
|
416 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, |
|
417 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, |
|
418 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, |
|
419 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, |
|
420 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, |
|
421 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, |
|
422 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, |
|
423 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, |
|
424 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, |
|
425 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, |
|
426 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, |
|
427 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, |
|
428 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, |
|
429 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, |
|
430 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, |
|
431 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, |
|
432 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, |
|
433 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, |
|
434 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, |
|
435 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, |
|
436 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, |
|
437 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, |
|
438 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, |
|
439 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, |
|
440 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, |
|
441 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, |
|
442 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, |
|
443 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, |
|
444 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, |
|
445 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, |
|
446 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, |
|
447 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, |
|
448 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, |
|
449 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, |
|
450 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, |
|
451 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, |
|
452 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, |
|
453 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, |
|
454 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, |
|
455 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, |
|
456 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, |
|
457 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, |
|
458 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, |
|
459 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, |
|
460 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, |
|
461 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, |
|
462 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, |
|
463 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, |
|
464 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, |
|
465 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, |
|
466 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, |
|
467 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, |
|
468 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, |
|
469 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, |
|
470 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, |
|
471 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, |
|
472 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, |
|
473 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, |
|
474 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
|
475 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, |
|
476 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, |
|
477 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, |
|
478 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, |
|
479 0xffffe000UL |
|
480 }; |
|
481 //registers, |
|
482 // input: xmm0 |
|
483 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
484 // rax, rdx, rcx, rbx (tmp) |
|
485 |
|
486 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { |
|
487 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
|
488 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
|
489 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; |
|
490 Label L_2TAG_PACKET_10_0_2, start; |
|
491 |
|
492 assert_different_registers(tmp, eax, ecx, edx); |
|
493 jmp(start); |
|
494 address static_const_table = (address)_static_const_table_log; |
|
495 |
|
496 bind(start); |
|
497 subl(rsp, 104); |
|
498 movl(Address(rsp, 40), tmp); |
|
499 lea(tmp, ExternalAddress(static_const_table)); |
|
500 xorpd(xmm2, xmm2); |
|
501 movl(eax, 16368); |
|
502 pinsrw(xmm2, eax, 3); |
|
503 xorpd(xmm3, xmm3); |
|
504 movl(edx, 30704); |
|
505 pinsrw(xmm3, edx, 3); |
|
506 movsd(xmm0, Address(rsp, 112)); |
|
507 movapd(xmm1, xmm0); |
|
508 movl(ecx, 32768); |
|
509 movdl(xmm4, ecx); |
|
510 movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL |
|
511 pextrw(eax, xmm0, 3); |
|
512 por(xmm0, xmm2); |
|
513 psllq(xmm0, 5); |
|
514 movl(ecx, 16352); |
|
515 psrlq(xmm0, 34); |
|
516 rcpss(xmm0, xmm0); |
|
517 psllq(xmm1, 12); |
|
518 pshufd(xmm6, xmm5, 228); |
|
519 psrlq(xmm1, 12); |
|
520 subl(eax, 16); |
|
521 cmpl(eax, 32736); |
|
522 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
|
523 |
|
524 bind(L_2TAG_PACKET_1_0_2); |
|
525 paddd(xmm0, xmm4); |
|
526 por(xmm1, xmm3); |
|
527 movdl(edx, xmm0); |
|
528 psllq(xmm0, 29); |
|
529 pand(xmm5, xmm1); |
|
530 pand(xmm0, xmm6); |
|
531 subsd(xmm1, xmm5); |
|
532 mulpd(xmm5, xmm0); |
|
533 andl(eax, 32752); |
|
534 subl(eax, ecx); |
|
535 cvtsi2sdl(xmm7, eax); |
|
536 mulsd(xmm1, xmm0); |
|
537 movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL |
|
538 movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL |
|
539 subsd(xmm5, xmm2); |
|
540 andl(edx, 16711680); |
|
541 shrl(edx, 12); |
|
542 movdqu(xmm0, Address(tmp, edx)); |
|
543 movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL |
|
544 addsd(xmm1, xmm5); |
|
545 movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL |
|
546 mulsd(xmm6, xmm7); |
|
547 pshufd(xmm5, xmm1, 68); |
|
548 mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL |
|
549 mulsd(xmm3, xmm1); |
|
550 addsd(xmm0, xmm6); |
|
551 mulpd(xmm4, xmm5); |
|
552 mulpd(xmm5, xmm5); |
|
553 pshufd(xmm6, xmm0, 228); |
|
554 addsd(xmm0, xmm1); |
|
555 addpd(xmm4, xmm2); |
|
556 mulpd(xmm3, xmm5); |
|
557 subsd(xmm6, xmm0); |
|
558 mulsd(xmm4, xmm1); |
|
559 pshufd(xmm2, xmm0, 238); |
|
560 addsd(xmm1, xmm6); |
|
561 mulsd(xmm5, xmm5); |
|
562 addsd(xmm7, xmm2); |
|
563 addpd(xmm4, xmm3); |
|
564 addsd(xmm1, xmm7); |
|
565 mulpd(xmm4, xmm5); |
|
566 addsd(xmm1, xmm4); |
|
567 pshufd(xmm5, xmm4, 238); |
|
568 addsd(xmm1, xmm5); |
|
569 addsd(xmm0, xmm1); |
|
570 jmp(L_2TAG_PACKET_2_0_2); |
|
571 |
|
572 bind(L_2TAG_PACKET_0_0_2); |
|
573 movsd(xmm0, Address(rsp, 112)); |
|
574 movdqu(xmm1, xmm0); |
|
575 addl(eax, 16); |
|
576 cmpl(eax, 32768); |
|
577 jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); |
|
578 cmpl(eax, 16); |
|
579 jcc(Assembler::below, L_2TAG_PACKET_4_0_2); |
|
580 |
|
581 bind(L_2TAG_PACKET_5_0_2); |
|
582 addsd(xmm0, xmm0); |
|
583 jmp(L_2TAG_PACKET_2_0_2); |
|
584 |
|
585 bind(L_2TAG_PACKET_6_0_2); |
|
586 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
|
587 cmpl(edx, 0); |
|
588 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
|
589 jmp(L_2TAG_PACKET_7_0_2); |
|
590 |
|
591 bind(L_2TAG_PACKET_3_0_2); |
|
592 movdl(edx, xmm1); |
|
593 psrlq(xmm1, 32); |
|
594 movdl(ecx, xmm1); |
|
595 addl(ecx, ecx); |
|
596 cmpl(ecx, -2097152); |
|
597 jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); |
|
598 orl(edx, ecx); |
|
599 cmpl(edx, 0); |
|
600 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
|
601 |
|
602 bind(L_2TAG_PACKET_7_0_2); |
|
603 xorpd(xmm1, xmm1); |
|
604 xorpd(xmm0, xmm0); |
|
605 movl(eax, 32752); |
|
606 pinsrw(xmm1, eax, 3); |
|
607 movl(edx, 3); |
|
608 mulsd(xmm0, xmm1); |
|
609 |
|
610 bind(L_2TAG_PACKET_9_0_2); |
|
611 movsd(Address(rsp, 0), xmm0); |
|
612 movsd(xmm0, Address(rsp, 112)); |
|
613 fld_d(Address(rsp, 0)); |
|
614 jmp(L_2TAG_PACKET_10_0_2); |
|
615 |
|
616 bind(L_2TAG_PACKET_8_0_2); |
|
617 xorpd(xmm1, xmm1); |
|
618 xorpd(xmm0, xmm0); |
|
619 movl(eax, 49136); |
|
620 pinsrw(xmm0, eax, 3); |
|
621 divsd(xmm0, xmm1); |
|
622 movl(edx, 2); |
|
623 jmp(L_2TAG_PACKET_9_0_2); |
|
624 |
|
625 bind(L_2TAG_PACKET_4_0_2); |
|
626 movdl(edx, xmm1); |
|
627 psrlq(xmm1, 32); |
|
628 movdl(ecx, xmm1); |
|
629 orl(edx, ecx); |
|
630 cmpl(edx, 0); |
|
631 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
|
632 xorpd(xmm1, xmm1); |
|
633 movl(eax, 18416); |
|
634 pinsrw(xmm1, eax, 3); |
|
635 mulsd(xmm0, xmm1); |
|
636 movapd(xmm1, xmm0); |
|
637 pextrw(eax, xmm0, 3); |
|
638 por(xmm0, xmm2); |
|
639 psllq(xmm0, 5); |
|
640 movl(ecx, 18416); |
|
641 psrlq(xmm0, 34); |
|
642 rcpss(xmm0, xmm0); |
|
643 psllq(xmm1, 12); |
|
644 pshufd(xmm6, xmm5, 228); |
|
645 psrlq(xmm1, 12); |
|
646 jmp(L_2TAG_PACKET_1_0_2); |
|
647 |
|
648 bind(L_2TAG_PACKET_2_0_2); |
|
649 movsd(Address(rsp, 24), xmm0); |
|
650 fld_d(Address(rsp, 24)); |
|
651 |
|
652 bind(L_2TAG_PACKET_10_0_2); |
|
653 movl(tmp, Address(rsp, 40)); |
|
654 } |
|
655 #endif |