hotspot/src/cpu/x86/vm/macroAssembler_x86_log.cpp
changeset 38018 1dc6c6f21231
equal deleted inserted replaced
38017:55047d16f141 38018:1dc6c6f21231
       
     1 /*
       
     2 * Copyright (c) 2016, Intel Corporation.
       
     3 * Intel Math Library (LIBM) Source Code
       
     4 *
       
     5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     6 *
       
     7 * This code is free software; you can redistribute it and/or modify it
       
     8 * under the terms of the GNU General Public License version 2 only, as
       
     9 * published by the Free Software Foundation.
       
    10 *
       
    11 * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14 * version 2 for more details (a copy is included in the LICENSE file that
       
    15 * accompanied this code).
       
    16 *
       
    17 * You should have received a copy of the GNU General Public License version
       
    18 * 2 along with this work; if not, write to the Free Software Foundation,
       
    19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20 *
       
    21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22 * or visit www.oracle.com if you need additional information or have any
       
    23 * questions.
       
    24 *
       
    25 */
       
    26 
       
    27 #include "precompiled.hpp"
       
    28 #include "asm/assembler.hpp"
       
    29 #include "asm/assembler.inline.hpp"
       
    30 #include "macroAssembler_x86.hpp"
       
    31 
       
    32 #ifdef _MSC_VER
       
    33 #define ALIGNED_(x) __declspec(align(x))
       
    34 #else
       
    35 #define ALIGNED_(x) __attribute__ ((aligned(x)))
       
    36 #endif
       
    37 
       
    38 /******************************************************************************/
       
    39 //                     ALGORITHM DESCRIPTION - LOG()
       
    40 //                     ---------------------
       
    41 //
       
    42 //    x=2^k * mx, mx in [1,2)
       
    43 //
       
    44 //    Get B~1/mx based on the output of rcpss instruction (B0)
       
    45 //    B = int((B0*2^7+0.5))/2^7
       
    46 //
       
    47 //    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
       
    48 //
       
    49 //    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
       
    50 //             p(r) is a degree 7 polynomial
       
    51 //             -log(B) read from data table (high, low parts)
       
    52 //             Result is formed from high and low parts
       
    53 //
       
    54 // Special cases:
       
    55 //  log(NaN) = quiet NaN, and raise invalid exception
       
    56 //  log(+INF) = that INF
       
    57 //  log(0) = -INF with divide-by-zero exception raised
       
    58 //  log(1) = +0
       
    59 //  log(x) = NaN with invalid exception raised if x < -0, including -INF
       
    60 //
       
    61 /******************************************************************************/
       
    62 
       
    63 #ifdef _LP64
       
    64 // The 64 bit code is at most SSE2 compliant
       
    65 ALIGNED_(16) juint _L_tbl[] =
       
    66 {
       
    67     0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
       
    68     0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
       
    69     0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
       
    70     0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
       
    71     0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
       
    72     0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
       
    73     0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
       
    74     0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
       
    75     0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
       
    76     0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
       
    77     0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
       
    78     0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
       
    79     0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
       
    80     0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
       
    81     0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
       
    82     0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
       
    83     0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
       
    84     0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
       
    85     0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
       
    86     0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
       
    87     0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
       
    88     0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
       
    89     0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
       
    90     0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
       
    91     0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
       
    92     0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
       
    93     0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
       
    94     0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
       
    95     0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
       
    96     0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
       
    97     0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
       
    98     0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
       
    99     0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
       
   100     0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
       
   101     0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
       
   102     0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
       
   103     0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
       
   104     0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
       
   105     0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
       
   106     0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
       
   107     0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
       
   108     0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
       
   109     0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
       
   110     0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
       
   111     0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
       
   112     0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
       
   113     0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
       
   114     0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
       
   115     0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
       
   116     0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
       
   117     0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
       
   118     0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
       
   119     0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
       
   120     0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
       
   121     0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
       
   122     0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
       
   123     0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
       
   124     0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
       
   125     0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
       
   126     0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
       
   127     0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
       
   128     0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
       
   129     0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
       
   130     0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
       
   131     0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
       
   132     0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
       
   133     0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
       
   134     0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
       
   135     0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
       
   136     0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
       
   137     0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
       
   138     0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
       
   139     0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
       
   140     0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
       
   141     0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
       
   142     0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
       
   143     0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
       
   144     0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
       
   145     0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
       
   146     0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
       
   147     0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
       
   148     0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
       
   149     0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
       
   150     0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
       
   151     0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
       
   152     0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
       
   153     0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
       
   154     0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
       
   155     0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
       
   156     0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
       
   157     0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
       
   158     0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
       
   159     0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
       
   160     0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
       
   161     0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
       
   162     0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
       
   163     0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
       
   164     0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
       
   165     0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
       
   166     0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
       
   167     0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
       
   168     0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
       
   169     0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
       
   170     0x80000000UL
       
   171 };
       
   172 
       
   173 ALIGNED_(16) juint _log2[] =
       
   174 {
       
   175     0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
       
   176 };
       
   177 
       
   178 ALIGNED_(16) juint _coeff[] =
       
   179 {
       
   180     0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
       
   181     0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
       
   182     0x00000000UL, 0xbfe00000UL
       
   183 };
       
   184 
       
   185 //registers,
       
   186 // input: xmm0
       
   187 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
       
   188 //          rax, rdx, rcx, r8, r11
       
   189 
       
   190 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
       
   191   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
       
   192   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
       
   193   Label L_2TAG_PACKET_8_0_2;
       
   194   Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
       
   195 
       
   196   assert_different_registers(tmp1, tmp2, eax, ecx, edx);
       
   197   jmp(start);
       
   198   address L_tbl = (address)_L_tbl;
       
   199   address log2 = (address)_log2;
       
   200   address coeff = (address)_coeff;
       
   201 
       
   202   bind(start);
       
   203   subq(rsp, 24);
       
   204   movsd(Address(rsp, 0), xmm0);
       
   205   mov64(rax, 0x3ff0000000000000);
       
   206   movdq(xmm2, rax);
       
   207   mov64(rdx, 0x77f0000000000000);
       
   208   movdq(xmm3, rdx);
       
   209   movl(ecx, 32768);
       
   210   movdl(xmm4, rcx);
       
   211   mov64(tmp1, 0xffffe00000000000);
       
   212   movdq(xmm5, tmp1);
       
   213   movdqu(xmm1, xmm0);
       
   214   pextrw(eax, xmm0, 3);
       
   215   por(xmm0, xmm2);
       
   216   movl(ecx, 16352);
       
   217   psrlq(xmm0, 27);
       
   218   lea(tmp2, ExternalAddress(L_tbl));
       
   219   psrld(xmm0, 2);
       
   220   rcpps(xmm0, xmm0);
       
   221   psllq(xmm1, 12);
       
   222   pshufd(xmm6, xmm5, 228);
       
   223   psrlq(xmm1, 12);
       
   224   subl(eax, 16);
       
   225   cmpl(eax, 32736);
       
   226   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
       
   227 
       
   228   bind(L_2TAG_PACKET_1_0_2);
       
   229   paddd(xmm0, xmm4);
       
   230   por(xmm1, xmm3);
       
   231   movdl(edx, xmm0);
       
   232   psllq(xmm0, 29);
       
   233   pand(xmm5, xmm1);
       
   234   pand(xmm0, xmm6);
       
   235   subsd(xmm1, xmm5);
       
   236   mulpd(xmm5, xmm0);
       
   237   andl(eax, 32752);
       
   238   subl(eax, ecx);
       
   239   cvtsi2sdl(xmm7, eax);
       
   240   mulsd(xmm1, xmm0);
       
   241   movq(xmm6, ExternalAddress(log2));       // 0xfefa3800UL, 0x3fa62e42UL
       
   242   movdqu(xmm3, ExternalAddress(coeff));    // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
       
   243   subsd(xmm5, xmm2);
       
   244   andl(edx, 16711680);
       
   245   shrl(edx, 12);
       
   246   movdqu(xmm0, Address(tmp2, edx));
       
   247   movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
       
   248   addsd(xmm1, xmm5);
       
   249   movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
       
   250   mulsd(xmm6, xmm7);
       
   251   if (VM_Version::supports_sse3()) {
       
   252     movddup(xmm5, xmm1);
       
   253   }
       
   254   else {
       
   255     movdqu(xmm5, xmm1);
       
   256     movlhps(xmm5, xmm5);
       
   257   }
       
   258   mulsd(xmm7, ExternalAddress(8 + log2));    // 0x93c76730UL, 0x3ceef357UL
       
   259   mulsd(xmm3, xmm1);
       
   260   addsd(xmm0, xmm6);
       
   261   mulpd(xmm4, xmm5);
       
   262   mulpd(xmm5, xmm5);
       
   263   if (VM_Version::supports_sse3()) {
       
   264     movddup(xmm6, xmm0);
       
   265   }
       
   266   else {
       
   267     movdqu(xmm6, xmm0);
       
   268     movlhps(xmm6, xmm6);
       
   269   }
       
   270   addsd(xmm0, xmm1);
       
   271   addpd(xmm4, xmm2);
       
   272   mulpd(xmm3, xmm5);
       
   273   subsd(xmm6, xmm0);
       
   274   mulsd(xmm4, xmm1);
       
   275   pshufd(xmm2, xmm0, 238);
       
   276   addsd(xmm1, xmm6);
       
   277   mulsd(xmm5, xmm5);
       
   278   addsd(xmm7, xmm2);
       
   279   addpd(xmm4, xmm3);
       
   280   addsd(xmm1, xmm7);
       
   281   mulpd(xmm4, xmm5);
       
   282   addsd(xmm1, xmm4);
       
   283   pshufd(xmm5, xmm4, 238);
       
   284   addsd(xmm1, xmm5);
       
   285   addsd(xmm0, xmm1);
       
   286   jmp(B1_5);
       
   287 
       
   288   bind(L_2TAG_PACKET_0_0_2);
       
   289   movq(xmm0, Address(rsp, 0));
       
   290   movq(xmm1, Address(rsp, 0));
       
   291   addl(eax, 16);
       
   292   cmpl(eax, 32768);
       
   293   jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
       
   294   cmpl(eax, 16);
       
   295   jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
       
   296 
       
   297   bind(L_2TAG_PACKET_4_0_2);
       
   298   addsd(xmm0, xmm0);
       
   299   jmp(B1_5);
       
   300 
       
   301   bind(L_2TAG_PACKET_5_0_2);
       
   302   jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
       
   303   cmpl(edx, 0);
       
   304   jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
       
   305   jmp(L_2TAG_PACKET_6_0_2);
       
   306 
       
   307   bind(L_2TAG_PACKET_3_0_2);
       
   308   xorpd(xmm1, xmm1);
       
   309   addsd(xmm1, xmm0);
       
   310   movdl(edx, xmm1);
       
   311   psrlq(xmm1, 32);
       
   312   movdl(ecx, xmm1);
       
   313   orl(edx, ecx);
       
   314   cmpl(edx, 0);
       
   315   jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
       
   316   xorpd(xmm1, xmm1);
       
   317   movl(eax, 18416);
       
   318   pinsrw(xmm1, eax, 3);
       
   319   mulsd(xmm0, xmm1);
       
   320   movdqu(xmm1, xmm0);
       
   321   pextrw(eax, xmm0, 3);
       
   322   por(xmm0, xmm2);
       
   323   psrlq(xmm0, 27);
       
   324   movl(ecx, 18416);
       
   325   psrld(xmm0, 2);
       
   326   rcpps(xmm0, xmm0);
       
   327   psllq(xmm1, 12);
       
   328   pshufd(xmm6, xmm5, 228);
       
   329   psrlq(xmm1, 12);
       
   330   jmp(L_2TAG_PACKET_1_0_2);
       
   331 
       
   332   bind(L_2TAG_PACKET_2_0_2);
       
   333   movdl(edx, xmm1);
       
   334   psrlq(xmm1, 32);
       
   335   movdl(ecx, xmm1);
       
   336   addl(ecx, ecx);
       
   337   cmpl(ecx, -2097152);
       
   338   jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
       
   339   orl(edx, ecx);
       
   340   cmpl(edx, 0);
       
   341   jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
       
   342 
       
   343   bind(L_2TAG_PACKET_6_0_2);
       
   344   xorpd(xmm1, xmm1);
       
   345   xorpd(xmm0, xmm0);
       
   346   movl(eax, 32752);
       
   347   pinsrw(xmm1, eax, 3);
       
   348   mulsd(xmm0, xmm1);
       
   349   movl(Address(rsp, 16), 3);
       
   350   jmp(L_2TAG_PACKET_8_0_2);
       
   351   bind(L_2TAG_PACKET_7_0_2);
       
   352   xorpd(xmm1, xmm1);
       
   353   xorpd(xmm0, xmm0);
       
   354   movl(eax, 49136);
       
   355   pinsrw(xmm0, eax, 3);
       
   356   divsd(xmm0, xmm1);
       
   357   movl(Address(rsp, 16), 2);
       
   358 
       
   359   bind(L_2TAG_PACKET_8_0_2);
       
   360   movq(Address(rsp, 8), xmm0);
       
   361 
       
   362   bind(B1_3);
       
   363   movq(xmm0, Address(rsp, 8));
       
   364 
       
   365   bind(B1_5);
       
   366   addq(rsp, 24);
       
   367 }
       
   368 #else
       
   369 // The 32 bit code is at most SSE2 compliant
       
   370 ALIGNED_(16) juint _static_const_table_log[] =
       
   371 {
       
   372     0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
       
   373     0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
       
   374     0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
       
   375     0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
       
   376     0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
       
   377     0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
       
   378     0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
       
   379     0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
       
   380     0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
       
   381     0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
       
   382     0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
       
   383     0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
       
   384     0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
       
   385     0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
       
   386     0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
       
   387     0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
       
   388     0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
       
   389     0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
       
   390     0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
       
   391     0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
       
   392     0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
       
   393     0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
       
   394     0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
       
   395     0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
       
   396     0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
       
   397     0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
       
   398     0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
       
   399     0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
       
   400     0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
       
   401     0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
       
   402     0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
       
   403     0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
       
   404     0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
       
   405     0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
       
   406     0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
       
   407     0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
       
   408     0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
       
   409     0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
       
   410     0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
       
   411     0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
       
   412     0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
       
   413     0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
       
   414     0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
       
   415     0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
       
   416     0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
       
   417     0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
       
   418     0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
       
   419     0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
       
   420     0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
       
   421     0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
       
   422     0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
       
   423     0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
       
   424     0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
       
   425     0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
       
   426     0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
       
   427     0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
       
   428     0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
       
   429     0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
       
   430     0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
       
   431     0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
       
   432     0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
       
   433     0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
       
   434     0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
       
   435     0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
       
   436     0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
       
   437     0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
       
   438     0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
       
   439     0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
       
   440     0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
       
   441     0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
       
   442     0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
       
   443     0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
       
   444     0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
       
   445     0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
       
   446     0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
       
   447     0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
       
   448     0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
       
   449     0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
       
   450     0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
       
   451     0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
       
   452     0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
       
   453     0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
       
   454     0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
       
   455     0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
       
   456     0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
       
   457     0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
       
   458     0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
       
   459     0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
       
   460     0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
       
   461     0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
       
   462     0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
       
   463     0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
       
   464     0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
       
   465     0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
       
   466     0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
       
   467     0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
       
   468     0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
       
   469     0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
       
   470     0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
       
   471     0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
       
   472     0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
       
   473     0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
       
   474     0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
       
   475     0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
       
   476     0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
       
   477     0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
       
   478     0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
       
   479     0xffffe000UL
       
   480 };
       
   481 //registers,
       
   482 // input: xmm0
       
   483 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
       
   484 //          rax, rdx, rcx, rbx (tmp)
       
   485 
       
   486 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
       
   487   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
       
   488   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
       
   489   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
       
   490   Label L_2TAG_PACKET_10_0_2, start;
       
   491 
       
   492   assert_different_registers(tmp, eax, ecx, edx);
       
   493   jmp(start);
       
   494   address static_const_table = (address)_static_const_table_log;
       
   495 
       
   496   bind(start);
       
   497   subl(rsp, 104);
       
   498   movl(Address(rsp, 40), tmp);
       
   499   lea(tmp, ExternalAddress(static_const_table));
       
   500   xorpd(xmm2, xmm2);
       
   501   movl(eax, 16368);
       
   502   pinsrw(xmm2, eax, 3);
       
   503   xorpd(xmm3, xmm3);
       
   504   movl(edx, 30704);
       
   505   pinsrw(xmm3, edx, 3);
       
   506   movsd(xmm0, Address(rsp, 112));
       
   507   movapd(xmm1, xmm0);
       
   508   movl(ecx, 32768);
       
   509   movdl(xmm4, ecx);
       
   510   movsd(xmm5, Address(tmp, 2128));         // 0x00000000UL, 0xffffe000UL
       
   511   pextrw(eax, xmm0, 3);
       
   512   por(xmm0, xmm2);
       
   513   psllq(xmm0, 5);
       
   514   movl(ecx, 16352);
       
   515   psrlq(xmm0, 34);
       
   516   rcpss(xmm0, xmm0);
       
   517   psllq(xmm1, 12);
       
   518   pshufd(xmm6, xmm5, 228);
       
   519   psrlq(xmm1, 12);
       
   520   subl(eax, 16);
       
   521   cmpl(eax, 32736);
       
   522   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
       
   523 
       
   524   bind(L_2TAG_PACKET_1_0_2);
       
   525   paddd(xmm0, xmm4);
       
   526   por(xmm1, xmm3);
       
   527   movdl(edx, xmm0);
       
   528   psllq(xmm0, 29);
       
   529   pand(xmm5, xmm1);
       
   530   pand(xmm0, xmm6);
       
   531   subsd(xmm1, xmm5);
       
   532   mulpd(xmm5, xmm0);
       
   533   andl(eax, 32752);
       
   534   subl(eax, ecx);
       
   535   cvtsi2sdl(xmm7, eax);
       
   536   mulsd(xmm1, xmm0);
       
   537   movsd(xmm6, Address(tmp, 2064));         // 0xfefa3800UL, 0x3fa62e42UL
       
   538   movdqu(xmm3, Address(tmp, 2080));        // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
       
   539   subsd(xmm5, xmm2);
       
   540   andl(edx, 16711680);
       
   541   shrl(edx, 12);
       
   542   movdqu(xmm0, Address(tmp, edx));
       
   543   movdqu(xmm4, Address(tmp, 2096));        // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
       
   544   addsd(xmm1, xmm5);
       
   545   movdqu(xmm2, Address(tmp, 2112));        // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
       
   546   mulsd(xmm6, xmm7);
       
   547   pshufd(xmm5, xmm1, 68);
       
   548   mulsd(xmm7, Address(tmp, 2072));         // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
       
   549   mulsd(xmm3, xmm1);
       
   550   addsd(xmm0, xmm6);
       
   551   mulpd(xmm4, xmm5);
       
   552   mulpd(xmm5, xmm5);
       
   553   pshufd(xmm6, xmm0, 228);
       
   554   addsd(xmm0, xmm1);
       
   555   addpd(xmm4, xmm2);
       
   556   mulpd(xmm3, xmm5);
       
   557   subsd(xmm6, xmm0);
       
   558   mulsd(xmm4, xmm1);
       
   559   pshufd(xmm2, xmm0, 238);
       
   560   addsd(xmm1, xmm6);
       
   561   mulsd(xmm5, xmm5);
       
   562   addsd(xmm7, xmm2);
       
   563   addpd(xmm4, xmm3);
       
   564   addsd(xmm1, xmm7);
       
   565   mulpd(xmm4, xmm5);
       
   566   addsd(xmm1, xmm4);
       
   567   pshufd(xmm5, xmm4, 238);
       
   568   addsd(xmm1, xmm5);
       
   569   addsd(xmm0, xmm1);
       
   570   jmp(L_2TAG_PACKET_2_0_2);
       
   571 
       
   572   bind(L_2TAG_PACKET_0_0_2);
       
   573   movsd(xmm0, Address(rsp, 112));
       
   574   movdqu(xmm1, xmm0);
       
   575   addl(eax, 16);
       
   576   cmpl(eax, 32768);
       
   577   jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
       
   578   cmpl(eax, 16);
       
   579   jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
       
   580 
       
   581   bind(L_2TAG_PACKET_5_0_2);
       
   582   addsd(xmm0, xmm0);
       
   583   jmp(L_2TAG_PACKET_2_0_2);
       
   584 
       
   585   bind(L_2TAG_PACKET_6_0_2);
       
   586   jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
       
   587   cmpl(edx, 0);
       
   588   jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
       
   589   jmp(L_2TAG_PACKET_7_0_2);
       
   590 
       
   591   bind(L_2TAG_PACKET_3_0_2);
       
   592   movdl(edx, xmm1);
       
   593   psrlq(xmm1, 32);
       
   594   movdl(ecx, xmm1);
       
   595   addl(ecx, ecx);
       
   596   cmpl(ecx, -2097152);
       
   597   jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
       
   598   orl(edx, ecx);
       
   599   cmpl(edx, 0);
       
   600   jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
       
   601 
       
   602   bind(L_2TAG_PACKET_7_0_2);
       
   603   xorpd(xmm1, xmm1);
       
   604   xorpd(xmm0, xmm0);
       
   605   movl(eax, 32752);
       
   606   pinsrw(xmm1, eax, 3);
       
   607   movl(edx, 3);
       
   608   mulsd(xmm0, xmm1);
       
   609 
       
   610   bind(L_2TAG_PACKET_9_0_2);
       
   611   movsd(Address(rsp, 0), xmm0);
       
   612   movsd(xmm0, Address(rsp, 112));
       
   613   fld_d(Address(rsp, 0));
       
   614   jmp(L_2TAG_PACKET_10_0_2);
       
   615 
       
   616   bind(L_2TAG_PACKET_8_0_2);
       
   617   xorpd(xmm1, xmm1);
       
   618   xorpd(xmm0, xmm0);
       
   619   movl(eax, 49136);
       
   620   pinsrw(xmm0, eax, 3);
       
   621   divsd(xmm0, xmm1);
       
   622   movl(edx, 2);
       
   623   jmp(L_2TAG_PACKET_9_0_2);
       
   624 
       
   625   bind(L_2TAG_PACKET_4_0_2);
       
   626   movdl(edx, xmm1);
       
   627   psrlq(xmm1, 32);
       
   628   movdl(ecx, xmm1);
       
   629   orl(edx, ecx);
       
   630   cmpl(edx, 0);
       
   631   jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
       
   632   xorpd(xmm1, xmm1);
       
   633   movl(eax, 18416);
       
   634   pinsrw(xmm1, eax, 3);
       
   635   mulsd(xmm0, xmm1);
       
   636   movapd(xmm1, xmm0);
       
   637   pextrw(eax, xmm0, 3);
       
   638   por(xmm0, xmm2);
       
   639   psllq(xmm0, 5);
       
   640   movl(ecx, 18416);
       
   641   psrlq(xmm0, 34);
       
   642   rcpss(xmm0, xmm0);
       
   643   psllq(xmm1, 12);
       
   644   pshufd(xmm6, xmm5, 228);
       
   645   psrlq(xmm1, 12);
       
   646   jmp(L_2TAG_PACKET_1_0_2);
       
   647 
       
   648   bind(L_2TAG_PACKET_2_0_2);
       
   649   movsd(Address(rsp, 24), xmm0);
       
   650   fld_d(Address(rsp, 24));
       
   651 
       
   652   bind(L_2TAG_PACKET_10_0_2);
       
   653   movl(tmp, Address(rsp, 40));
       
   654 }
       
   655 #endif