|
1 /* |
|
2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. |
|
3 * Copyright (c) 2016, Intel Corporation. All rights reserved. |
|
4 * Intel Math Library (LIBM) Source Code |
|
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
6 * |
|
7 * This code is free software; you can redistribute it and/or modify it |
|
8 * under the terms of the GNU General Public License version 2 only, as |
|
9 * published by the Free Software Foundation. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 |
|
27 package org.graalvm.compiler.lir.amd64; |
|
28 |
|
29 import static jdk.vm.ci.amd64.AMD64.r11; |
|
30 import static jdk.vm.ci.amd64.AMD64.r8; |
|
31 import static jdk.vm.ci.amd64.AMD64.rax; |
|
32 import static jdk.vm.ci.amd64.AMD64.rcx; |
|
33 import static jdk.vm.ci.amd64.AMD64.rdx; |
|
34 import static jdk.vm.ci.amd64.AMD64.rsp; |
|
35 import static jdk.vm.ci.amd64.AMD64.xmm0; |
|
36 import static jdk.vm.ci.amd64.AMD64.xmm1; |
|
37 import static jdk.vm.ci.amd64.AMD64.xmm2; |
|
38 import static jdk.vm.ci.amd64.AMD64.xmm3; |
|
39 import static jdk.vm.ci.amd64.AMD64.xmm4; |
|
40 import static jdk.vm.ci.amd64.AMD64.xmm5; |
|
41 import static jdk.vm.ci.amd64.AMD64.xmm6; |
|
42 import static jdk.vm.ci.amd64.AMD64.xmm7; |
|
43 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant; |
|
44 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress; |
|
45 |
|
46 import org.graalvm.compiler.asm.Label; |
|
47 import org.graalvm.compiler.asm.amd64.AMD64Address; |
|
48 import org.graalvm.compiler.asm.amd64.AMD64Assembler; |
|
49 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; |
|
50 import org.graalvm.compiler.lir.LIRInstructionClass; |
|
51 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; |
|
52 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; |
|
53 |
|
54 /** |
|
55 * <pre> |
|
56 * ALGORITHM DESCRIPTION - LOG10() |
|
57 * --------------------- |
|
58 * |
|
59 * Let x=2^k * mx, mx in [1,2) |
|
60 * |
|
61 * Get B~1/mx based on the output of rcpss instruction (B0) |
|
62 * B = int((B0*LH*2^7+0.5))/2^7 |
|
63 * LH is a short approximation for log10(e) |
|
64 * |
|
65 * Reduced argument: r=B*mx-LH (computed accurately in high and low parts) |
|
66 * |
|
67 * Result: k*log10(2) - log(B) + p(r) |
|
68 * p(r) is a degree 7 polynomial |
|
69 * -log(B) read from data table (high, low parts) |
|
70 * Result is formed from high and low parts. |
|
71 * |
|
72 * Special cases: |
|
73 * log10(0) = -INF with divide-by-zero exception raised |
|
74 * log10(1) = +0 |
|
75 * log10(x) = NaN with invalid exception raised if x < -0, including -INF |
|
76 * log10(+INF) = +INF |
|
77 * </pre> |
|
78 */ |
|
79 public final class AMD64MathLog10Op extends AMD64MathIntrinsicUnaryOp { |
|
80 |
|
81 public static final LIRInstructionClass<AMD64MathLog10Op> TYPE = LIRInstructionClass.create(AMD64MathLog10Op.class); |
|
82 |
|
83 public AMD64MathLog10Op() { |
|
84 super(TYPE, /* GPR */ rax, rcx, rdx, r8, r11, |
|
85 /* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7); |
|
86 } |
|
87 |
|
88 private ArrayDataPointerConstant highsigmask = pointerConstant(16, new int[]{ |
|
89 // @formatter:off |
|
90 0xf8000000, 0xffffffff, 0x00000000, 0xffffe000 |
|
91 // @formatter:on |
|
92 }); |
|
93 |
|
94 private ArrayDataPointerConstant log10E = pointerConstant(8, new int[]{ |
|
95 // @formatter:off |
|
96 0x00000000, 0x3fdbc000, |
|
97 }); |
|
98 private ArrayDataPointerConstant log10E8 = pointerConstant(8, new int[]{ |
|
99 0xbf2e4108, 0x3f5a7a6c |
|
100 // @formatter:on |
|
101 }); |
|
102 |
|
103 private ArrayDataPointerConstant lTbl = pointerConstant(16, new int[]{ |
|
104 // @formatter:off |
|
105 0x509f7800, 0x3fd34413, 0x1f12b358, 0x3d1fef31, 0x80333400, |
|
106 0x3fd32418, 0xc671d9d0, 0xbcf542bf, 0x51195000, 0x3fd30442, |
|
107 0x78a4b0c3, 0x3d18216a, 0x6fc79400, 0x3fd2e490, 0x80fa389d, |
|
108 0xbc902869, 0x89d04000, 0x3fd2c502, 0x75c2f564, 0x3d040754, |
|
109 0x4ddd1c00, 0x3fd2a598, 0xd219b2c3, 0xbcfa1d84, 0x6baa7c00, |
|
110 0x3fd28651, 0xfd9abec1, 0x3d1be6d3, 0x94028800, 0x3fd2672d, |
|
111 0xe289a455, 0xbd1ede5e, 0x78b86400, 0x3fd2482c, 0x6734d179, |
|
112 0x3d1fe79b, 0xcca3c800, 0x3fd2294d, 0x981a40b8, 0xbced34ea, |
|
113 0x439c5000, 0x3fd20a91, 0xcc392737, 0xbd1a9cc3, 0x92752c00, |
|
114 0x3fd1ebf6, 0x03c9afe7, 0x3d1e98f8, 0x6ef8dc00, 0x3fd1cd7d, |
|
115 0x71dae7f4, 0x3d08a86c, 0x8fe4dc00, 0x3fd1af25, 0xee9185a1, |
|
116 0xbcff3412, 0xace59400, 0x3fd190ee, 0xc2cab353, 0x3cf17ed9, |
|
117 0x7e925000, 0x3fd172d8, 0x6952c1b2, 0x3cf1521c, 0xbe694400, |
|
118 0x3fd154e2, 0xcacb79ca, 0xbd0bdc78, 0x26cbac00, 0x3fd1370d, |
|
119 0xf71f4de1, 0xbd01f8be, 0x72fa0800, 0x3fd11957, 0x55bf910b, |
|
120 0x3c946e2b, 0x5f106000, 0x3fd0fbc1, 0x39e639c1, 0x3d14a84b, |
|
121 0xa802a800, 0x3fd0de4a, 0xd3f31d5d, 0xbd178385, 0x0b992000, |
|
122 0x3fd0c0f3, 0x3843106f, 0xbd1f602f, 0x486ce800, 0x3fd0a3ba, |
|
123 0x8819497c, 0x3cef987a, 0x1de49400, 0x3fd086a0, 0x1caa0467, |
|
124 0x3d0faec7, 0x4c30cc00, 0x3fd069a4, 0xa4424372, 0xbd1618fc, |
|
125 0x94490000, 0x3fd04cc6, 0x946517d2, 0xbd18384b, 0xb7e84000, |
|
126 0x3fd03006, 0xe0109c37, 0xbd19a6ac, 0x798a0c00, 0x3fd01364, |
|
127 0x5121e864, 0xbd164cf7, 0x38ce8000, 0x3fcfedbf, 0x46214d1a, |
|
128 0xbcbbc402, 0xc8e62000, 0x3fcfb4ef, 0xdab93203, 0x3d1e0176, |
|
129 0x2cb02800, 0x3fcf7c5a, 0x2a2ea8e4, 0xbcfec86a, 0xeeeaa000, |
|
130 0x3fcf43fd, 0xc18e49a4, 0x3cf110a8, 0x9bb6e800, 0x3fcf0bda, |
|
131 0x923cc9c0, 0xbd15ce99, 0xc093f000, 0x3fced3ef, 0x4d4b51e9, |
|
132 0x3d1a04c7, 0xec58f800, 0x3fce9c3c, 0x163cad59, 0x3cac8260, |
|
133 0x9a907000, 0x3fce2d7d, 0x3fa93646, 0x3ce4a1c0, 0x37311000, |
|
134 0x3fcdbf99, 0x32abd1fd, 0x3d07ea9d, 0x6744b800, 0x3fcd528c, |
|
135 0x4dcbdfd4, 0xbd1b08e2, 0xe36de800, 0x3fcce653, 0x0b7b7f7f, |
|
136 0xbd1b8f03, 0x77506800, 0x3fcc7aec, 0xa821c9fb, 0x3d13c163, |
|
137 0x00ff8800, 0x3fcc1053, 0x536bca76, 0xbd074ee5, 0x70719800, |
|
138 0x3fcba684, 0xd7da9b6b, 0xbd1fbf16, 0xc6f8d800, 0x3fcb3d7d, |
|
139 0xe2220bb3, 0x3d1a295d, 0x16c15800, 0x3fcad53c, 0xe724911e, |
|
140 0xbcf55822, 0x82533800, 0x3fca6dbc, 0x6d982371, 0x3cac567c, |
|
141 0x3c19e800, 0x3fca06fc, 0x84d17d80, 0x3d1da204, 0x85ef8000, |
|
142 0x3fc9a0f8, 0x54466a6a, 0xbd002204, 0xb0ac2000, 0x3fc93bae, |
|
143 0xd601fd65, 0x3d18840c, 0x1bb9b000, 0x3fc8d71c, 0x7bf58766, |
|
144 0xbd14f897, 0x34aae800, 0x3fc8733e, 0x3af6ac24, 0xbd0f5c45, |
|
145 0x76d68000, 0x3fc81012, 0x4303e1a1, 0xbd1f9a80, 0x6af57800, |
|
146 0x3fc7ad96, 0x43fbcb46, 0x3cf4c33e, 0xa6c51000, 0x3fc74bc7, |
|
147 0x70f0eac5, 0xbd192e3b, 0xccab9800, 0x3fc6eaa3, 0xc0093dfe, |
|
148 0xbd0faf15, 0x8b60b800, 0x3fc68a28, 0xde78d5fd, 0xbc9ea4ee, |
|
149 0x9d987000, 0x3fc62a53, 0x962bea6e, 0xbd194084, 0xc9b0e800, |
|
150 0x3fc5cb22, 0x888dd999, 0x3d1fe201, 0xe1634800, 0x3fc56c93, |
|
151 0x16ada7ad, 0x3d1b1188, 0xc176c000, 0x3fc50ea4, 0x4159b5b5, |
|
152 0xbcf09c08, 0x51766000, 0x3fc4b153, 0x84393d23, 0xbcf6a89c, |
|
153 0x83695000, 0x3fc4549d, 0x9f0b8bbb, 0x3d1c4b8c, 0x538d5800, |
|
154 0x3fc3f881, 0xf49df747, 0x3cf89b99, 0xc8138000, 0x3fc39cfc, |
|
155 0xd503b834, 0xbd13b99f, 0xf0df0800, 0x3fc3420d, 0xf011b386, |
|
156 0xbd05d8be, 0xe7466800, 0x3fc2e7b2, 0xf39c7bc2, 0xbd1bb94e, |
|
157 0xcdd62800, 0x3fc28de9, 0x05e6d69b, 0xbd10ed05, 0xd015d800, |
|
158 0x3fc234b0, 0xe29b6c9d, 0xbd1ff967, 0x224ea800, 0x3fc1dc06, |
|
159 0x727711fc, 0xbcffb30d, 0x01540000, 0x3fc183e8, 0x39786c5a, |
|
160 0x3cc23f57, 0xb24d9800, 0x3fc12c54, 0xc905a342, 0x3d003a1d, |
|
161 0x82835800, 0x3fc0d54a, 0x9b9920c0, 0x3d03b25a, 0xc72ac000, |
|
162 0x3fc07ec7, 0x46f26a24, 0x3cf0fa41, 0xdd35d800, 0x3fc028ca, |
|
163 0x41d9d6dc, 0x3d034a65, 0x52474000, 0x3fbfa6a4, 0x44f66449, |
|
164 0x3d19cad3, 0x2da3d000, 0x3fbefcb8, 0x67832999, 0x3d18400f, |
|
165 0x32a10000, 0x3fbe53ce, 0x9c0e3b1a, 0xbcff62fd, 0x556b7000, |
|
166 0x3fbdabe3, 0x02976913, 0xbcf8243b, 0x97e88000, 0x3fbd04f4, |
|
167 0xec793797, 0x3d1c0578, 0x09647000, 0x3fbc5eff, 0x05fc0565, |
|
168 0xbd1d799e, 0xc6426000, 0x3fbbb9ff, 0x4625f5ed, 0x3d1f5723, |
|
169 0xf7afd000, 0x3fbb15f3, 0xdd5aae61, 0xbd1a7e1e, 0xd358b000, |
|
170 0x3fba72d8, 0x3314e4d3, 0x3d17bc91, 0x9b1f5000, 0x3fb9d0ab, |
|
171 0x9a4d514b, 0x3cf18c9b, 0x9cd4e000, 0x3fb92f69, 0x7e4496ab, |
|
172 0x3cf1f96d, 0x31f4f000, 0x3fb88f10, 0xf56479e7, 0x3d165818, |
|
173 0xbf628000, 0x3fb7ef9c, 0x26bf486d, 0xbd1113a6, 0xb526b000, |
|
174 0x3fb7510c, 0x1a1c3384, 0x3ca9898d, 0x8e31e000, 0x3fb6b35d, |
|
175 0xb3875361, 0xbd0661ac, 0xd01de000, 0x3fb6168c, 0x2a7cacfa, |
|
176 0xbd1bdf10, 0x0af23000, 0x3fb57a98, 0xff868816, 0x3cf046d0, |
|
177 0xd8ea0000, 0x3fb4df7c, 0x1515fbe7, 0xbd1fd529, 0xde3b2000, |
|
178 0x3fb44538, 0x6e59a132, 0x3d1faeee, 0xc8df9000, 0x3fb3abc9, |
|
179 0xf1322361, 0xbd198807, 0x505f1000, 0x3fb3132d, 0x0888e6ab, |
|
180 0x3d1e5380, 0x359bd000, 0x3fb27b61, 0xdfbcbb22, 0xbcfe2724, |
|
181 0x429ee000, 0x3fb1e463, 0x6eb4c58c, 0xbcfe4dd6, 0x4a673000, |
|
182 0x3fb14e31, 0x4ce1ac9b, 0x3d1ba691, 0x28b96000, 0x3fb0b8c9, |
|
183 0x8c7813b8, 0xbd0b3872, 0xc1f08000, 0x3fb02428, 0xc2bc8c2c, |
|
184 0x3cb5ea6b, 0x05a1a000, 0x3faf209c, 0x72e8f18e, 0xbce8df84, |
|
185 0xc0b5e000, 0x3fadfa6d, 0x9fdef436, 0x3d087364, 0xaf416000, |
|
186 0x3facd5c2, 0x1068c3a9, 0x3d0827e7, 0xdb356000, 0x3fabb296, |
|
187 0x120a34d3, 0x3d101a9f, 0x5dfea000, 0x3faa90e6, 0xdaded264, |
|
188 0xbd14c392, 0x6034c000, 0x3fa970ad, 0x1c9d06a9, 0xbd1b705e, |
|
189 0x194c6000, 0x3fa851e8, 0x83996ad9, 0xbd0117bc, 0xcf4ac000, |
|
190 0x3fa73492, 0xb1a94a62, 0xbca5ea42, 0xd67b4000, 0x3fa618a9, |
|
191 0x75aed8ca, 0xbd07119b, 0x9126c000, 0x3fa4fe29, 0x5291d533, |
|
192 0x3d12658f, 0x6f4d4000, 0x3fa3e50e, 0xcd2c5cd9, 0x3d1d5c70, |
|
193 0xee608000, 0x3fa2cd54, 0xd1008489, 0x3d1a4802, 0x9900e000, |
|
194 0x3fa1b6f9, 0x54fb5598, 0xbd16593f, 0x06bb6000, 0x3fa0a1f9, |
|
195 0x64ef57b4, 0xbd17636b, 0xb7940000, 0x3f9f1c9f, 0xee6a4737, |
|
196 0x3cb5d479, 0x91aa0000, 0x3f9cf7f5, 0x3a16373c, 0x3d087114, |
|
197 0x156b8000, 0x3f9ad5ed, 0x836c554a, 0x3c6900b0, 0xd4764000, |
|
198 0x3f98b67f, 0xed12f17b, 0xbcffc974, 0x77dec000, 0x3f9699a7, |
|
199 0x232ce7ea, 0x3d1e35bb, 0xbfbf4000, 0x3f947f5d, 0xd84ffa6e, |
|
200 0x3d0e0a49, 0x82c7c000, 0x3f92679c, 0x8d170e90, 0xbd14d9f2, |
|
201 0xadd20000, 0x3f90525d, 0x86d9f88e, 0x3cdeb986, 0x86f10000, |
|
202 0x3f8c7f36, 0xb9e0a517, 0x3ce29faa, 0xb75c8000, 0x3f885e9e, |
|
203 0x542568cb, 0xbd1f7bdb, 0x46b30000, 0x3f8442e8, 0xb954e7d9, |
|
204 0x3d1e5287, 0xb7e60000, 0x3f802c07, 0x22da0b17, 0xbd19fb27, |
|
205 0x6c8b0000, 0x3f7833e3, 0x821271ef, 0xbd190f96, 0x29910000, |
|
206 0x3f701936, 0xbc3491a5, 0xbd1bcf45, 0x354a0000, 0x3f600fe3, |
|
207 0xc0ff520a, 0xbd19d71c, 0x00000000, 0x00000000, 0x00000000, |
|
208 0x00000000 |
|
209 // @formatter:on |
|
210 }); |
|
211 |
|
212 private ArrayDataPointerConstant log2 = pointerConstant(8, new int[]{ |
|
213 // @formatter:off |
|
214 0x509f7800, 0x3f934413, |
|
215 }); |
|
216 private ArrayDataPointerConstant log28 = pointerConstant(8, new int[]{ |
|
217 0x1f12b358, 0x3cdfef31 |
|
218 // @formatter:on |
|
219 }); |
|
220 |
|
221 private ArrayDataPointerConstant coeff = pointerConstant(16, new int[]{ |
|
222 // @formatter:off |
|
223 0xc1a5f12e, 0x40358874, 0x64d4ef0d, 0xc0089309, |
|
224 }); |
|
225 private ArrayDataPointerConstant coeff16 = pointerConstant(16, new int[]{ |
|
226 0x385593b1, 0xc025c917, 0xdc963467, 0x3ffc6a02, |
|
227 }); |
|
228 private ArrayDataPointerConstant coeff32 = pointerConstant(16, new int[]{ |
|
229 0x7f9d3aa1, 0x4016ab9f, 0xdc77b115, 0xbff27af2 |
|
230 // @formatter:on |
|
231 }); |
|
232 |
|
233 @Override |
|
234 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { |
|
235 // Registers: |
|
236 // input: xmm0 |
|
237 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
|
238 // rax, rdx, rcx, tmp - r11 |
|
239 // Code generated by Intel C compiler for LIBM library |
|
240 |
|
241 Label block0 = new Label(); |
|
242 Label block1 = new Label(); |
|
243 Label block2 = new Label(); |
|
244 Label block3 = new Label(); |
|
245 Label block4 = new Label(); |
|
246 Label block5 = new Label(); |
|
247 Label block6 = new Label(); |
|
248 Label block7 = new Label(); |
|
249 Label block8 = new Label(); |
|
250 Label block9 = new Label(); |
|
251 |
|
252 masm.subq(rsp, 24); |
|
253 masm.movsd(new AMD64Address(rsp, 0), xmm0); |
|
254 |
|
255 masm.xorpd(xmm2, xmm2); |
|
256 masm.movl(rax, 16368); |
|
257 masm.pinsrw(xmm2, rax, 3); |
|
258 masm.movl(rcx, 1054736384); |
|
259 masm.movdl(xmm7, rcx); |
|
260 masm.xorpd(xmm3, xmm3); |
|
261 masm.movl(rdx, 30704); |
|
262 masm.pinsrw(xmm3, rdx, 3); |
|
263 masm.movdqu(xmm1, xmm0); |
|
264 masm.movl(rdx, 32768); |
|
265 masm.movdl(xmm4, rdx); |
|
266 masm.movdqu(xmm5, recordExternalAddress(crb, highsigmask)); // 0xf8000000, 0xffffffff, |
|
267 // 0x00000000, 0xffffe000 |
|
268 masm.pextrw(rax, xmm0, 3); |
|
269 masm.por(xmm0, xmm2); |
|
270 masm.movl(rcx, 16352); |
|
271 masm.psrlq(xmm0, 27); |
|
272 masm.movdqu(xmm2, recordExternalAddress(crb, log10E)); // 0x00000000, 0x3fdbc000, |
|
273 // 0xbf2e4108, 0x3f5a7a6c |
|
274 masm.psrld(xmm0, 2); |
|
275 masm.rcpps(xmm0, xmm0); |
|
276 masm.psllq(xmm1, 12); |
|
277 masm.pshufd(xmm6, xmm5, 78); |
|
278 masm.psrlq(xmm1, 12); |
|
279 masm.subl(rax, 16); |
|
280 masm.cmpl(rax, 32736); |
|
281 masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block0); |
|
282 |
|
283 masm.bind(block1); |
|
284 masm.mulss(xmm0, xmm7); |
|
285 masm.por(xmm1, xmm3); |
|
286 masm.leaq(r11, recordExternalAddress(crb, lTbl)); |
|
287 masm.andpd(xmm5, xmm1); |
|
288 masm.paddd(xmm0, xmm4); |
|
289 masm.subsd(xmm1, xmm5); |
|
290 masm.movdl(rdx, xmm0); |
|
291 masm.psllq(xmm0, 29); |
|
292 masm.andpd(xmm0, xmm6); |
|
293 masm.andl(rax, 32752); |
|
294 masm.subl(rax, rcx); |
|
295 masm.cvtsi2sdl(xmm7, rax); |
|
296 masm.mulpd(xmm5, xmm0); |
|
297 masm.mulsd(xmm1, xmm0); |
|
298 masm.movq(xmm6, recordExternalAddress(crb, log2)); // 0x509f7800, 0x3f934413, |
|
299 // 0x1f12b358, 0x3cdfef31 |
|
300 masm.movdqu(xmm3, recordExternalAddress(crb, coeff)); // 0xc1a5f12e, 0x40358874, |
|
301 // 0x64d4ef0d, 0xc0089309 |
|
302 masm.subsd(xmm5, xmm2); |
|
303 masm.andl(rdx, 16711680); |
|
304 masm.shrl(rdx, 12); |
|
305 masm.movdqu(xmm0, new AMD64Address(r11, rdx, AMD64Address.Scale.Times1, -1504)); |
|
306 masm.movdqu(xmm4, recordExternalAddress(crb, coeff16)); // 0x385593b1, 0xc025c917, |
|
307 // 0xdc963467, 0x3ffc6a02 |
|
308 masm.addsd(xmm1, xmm5); |
|
309 masm.movdqu(xmm2, recordExternalAddress(crb, coeff32)); // 0x7f9d3aa1, 0x4016ab9f, |
|
310 // 0xdc77b115, 0xbff27af2 |
|
311 masm.mulsd(xmm6, xmm7); |
|
312 masm.pshufd(xmm5, xmm1, 68); |
|
313 masm.mulsd(xmm7, recordExternalAddress(crb, log28)); // 0x1f12b358, 0x3cdfef31 |
|
314 masm.mulsd(xmm3, xmm1); |
|
315 masm.addsd(xmm0, xmm6); |
|
316 masm.mulpd(xmm4, xmm5); |
|
317 masm.movq(xmm6, recordExternalAddress(crb, log10E8)); // 0xbf2e4108, 0x3f5a7a6c |
|
318 masm.mulpd(xmm5, xmm5); |
|
319 masm.addpd(xmm4, xmm2); |
|
320 masm.mulpd(xmm3, xmm5); |
|
321 masm.pshufd(xmm2, xmm0, 228); |
|
322 masm.addsd(xmm0, xmm1); |
|
323 masm.mulsd(xmm4, xmm1); |
|
324 masm.subsd(xmm2, xmm0); |
|
325 masm.mulsd(xmm6, xmm1); |
|
326 masm.addsd(xmm1, xmm2); |
|
327 masm.pshufd(xmm2, xmm0, 238); |
|
328 masm.mulsd(xmm5, xmm5); |
|
329 masm.addsd(xmm7, xmm2); |
|
330 masm.addsd(xmm1, xmm6); |
|
331 masm.addpd(xmm4, xmm3); |
|
332 masm.addsd(xmm1, xmm7); |
|
333 masm.mulpd(xmm4, xmm5); |
|
334 masm.addsd(xmm1, xmm4); |
|
335 masm.pshufd(xmm5, xmm4, 238); |
|
336 masm.addsd(xmm1, xmm5); |
|
337 masm.addsd(xmm0, xmm1); |
|
338 masm.jmp(block9); |
|
339 |
|
340 masm.bind(block0); |
|
341 masm.movq(xmm0, new AMD64Address(rsp, 0)); |
|
342 masm.movq(xmm1, new AMD64Address(rsp, 0)); |
|
343 masm.addl(rax, 16); |
|
344 masm.cmpl(rax, 32768); |
|
345 masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block2); |
|
346 masm.cmpl(rax, 16); |
|
347 masm.jcc(AMD64Assembler.ConditionFlag.Below, block3); |
|
348 |
|
349 masm.bind(block4); |
|
350 masm.addsd(xmm0, xmm0); |
|
351 masm.jmp(block9); |
|
352 |
|
353 masm.bind(block5); |
|
354 masm.jcc(AMD64Assembler.ConditionFlag.Above, block4); |
|
355 masm.cmpl(rdx, 0); |
|
356 masm.jcc(AMD64Assembler.ConditionFlag.Above, block4); |
|
357 masm.jmp(block6); |
|
358 |
|
359 masm.bind(block3); |
|
360 masm.xorpd(xmm1, xmm1); |
|
361 masm.addsd(xmm1, xmm0); |
|
362 masm.movdl(rdx, xmm1); |
|
363 masm.psrlq(xmm1, 32); |
|
364 masm.movdl(rcx, xmm1); |
|
365 masm.orl(rdx, rcx); |
|
366 masm.cmpl(rdx, 0); |
|
367 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block7); |
|
368 masm.xorpd(xmm1, xmm1); |
|
369 masm.movl(rax, 18416); |
|
370 masm.pinsrw(xmm1, rax, 3); |
|
371 masm.mulsd(xmm0, xmm1); |
|
372 masm.xorpd(xmm2, xmm2); |
|
373 masm.movl(rax, 16368); |
|
374 masm.pinsrw(xmm2, rax, 3); |
|
375 masm.movdqu(xmm1, xmm0); |
|
376 masm.pextrw(rax, xmm0, 3); |
|
377 masm.por(xmm0, xmm2); |
|
378 masm.movl(rcx, 18416); |
|
379 masm.psrlq(xmm0, 27); |
|
380 masm.movdqu(xmm2, recordExternalAddress(crb, log10E)); // 0x00000000, 0x3fdbc000, |
|
381 // 0xbf2e4108, 0x3f5a7a6c |
|
382 masm.psrld(xmm0, 2); |
|
383 masm.rcpps(xmm0, xmm0); |
|
384 masm.psllq(xmm1, 12); |
|
385 masm.pshufd(xmm6, xmm5, 78); |
|
386 masm.psrlq(xmm1, 12); |
|
387 masm.jmp(block1); |
|
388 |
|
389 masm.bind(block2); |
|
390 masm.movdl(rdx, xmm1); |
|
391 masm.psrlq(xmm1, 32); |
|
392 masm.movdl(rcx, xmm1); |
|
393 masm.addl(rcx, rcx); |
|
394 masm.cmpl(rcx, -2097152); |
|
395 masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block5); |
|
396 masm.orl(rdx, rcx); |
|
397 masm.cmpl(rdx, 0); |
|
398 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block7); |
|
399 |
|
400 masm.bind(block6); |
|
401 masm.xorpd(xmm1, xmm1); |
|
402 masm.xorpd(xmm0, xmm0); |
|
403 masm.movl(rax, 32752); |
|
404 masm.pinsrw(xmm1, rax, 3); |
|
405 masm.mulsd(xmm0, xmm1); |
|
406 masm.movl(new AMD64Address(rsp, 16), 9); |
|
407 masm.jmp(block8); |
|
408 |
|
409 masm.bind(block7); |
|
410 masm.xorpd(xmm1, xmm1); |
|
411 masm.xorpd(xmm0, xmm0); |
|
412 masm.movl(rax, 49136); |
|
413 masm.pinsrw(xmm0, rax, 3); |
|
414 masm.divsd(xmm0, xmm1); |
|
415 masm.movl(new AMD64Address(rsp, 16), 8); |
|
416 |
|
417 masm.bind(block8); |
|
418 masm.movq(new AMD64Address(rsp, 8), xmm0); |
|
419 masm.movq(xmm0, new AMD64Address(rsp, 8)); |
|
420 |
|
421 masm.bind(block9); |
|
422 masm.addq(rsp, 24); |
|
423 } |
|
424 } |