hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp
changeset 1 489c9b5090e2
child 5377 bcf55c5acf4e
equal deleted inserted replaced
0:fd16c54261b3 1:489c9b5090e2
       
     1 /*
       
     2  * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    21  * have any questions.
       
    22  *
       
    23  */
       
    24 
       
    25 #include "incls/_precompiled.incl"
       
    26 #include "incls/_sharedRuntimeTrig.cpp.incl"
       
    27 
       
    28 // This file contains copies of the fdlibm routines used by
       
    29 // StrictMath. It turns out that it is almost always required to use
       
    30 // these runtime routines; the Intel CPU doesn't meet the Java
       
    31 // specification for sin/cos outside a certain limited argument range,
       
    32 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
       
    33 // also turns out that avoiding the indirect call through function
       
    34 // pointer out to libjava.so in SharedRuntime speeds these routines up
       
    35 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
       
    36 
       
    37 // Enabling optimizations in this file causes incorrect code to be
       
    38 // generated; can not figure out how to turn down optimization for one
       
    39 // file in the IDE on Windows
       
    40 #ifdef WIN32
       
    41 # pragma optimize ( "", off )
       
    42 #endif
       
    43 
       
    44 #include <math.h>
       
    45 
       
    46 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
       
    47 // [jk] this is not 100% correct because the float word order may different
       
    48 // from the byte order (e.g. on ARM)
       
    49 #ifdef VM_LITTLE_ENDIAN
       
    50 # define __HI(x) *(1+(int*)&x)
       
    51 # define __LO(x) *(int*)&x
       
    52 #else
       
    53 # define __HI(x) *(int*)&x
       
    54 # define __LO(x) *(1+(int*)&x)
       
    55 #endif
       
    56 
       
    57 static double copysignA(double x, double y) {
       
    58   __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
       
    59   return x;
       
    60 }
       
    61 
       
    62 /*
       
    63  * scalbn (double x, int n)
       
    64  * scalbn(x,n) returns x* 2**n  computed by  exponent
       
    65  * manipulation rather than by actually performing an
       
    66  * exponentiation or a multiplication.
       
    67  */
       
    68 
       
    69 static const double
       
    70 two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
       
    71 twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
       
    72 hugeX  = 1.0e+300,
       
    73 tiny   = 1.0e-300;
       
    74 
       
    75 static double scalbnA (double x, int n) {
       
    76   int  k,hx,lx;
       
    77   hx = __HI(x);
       
    78   lx = __LO(x);
       
    79   k = (hx&0x7ff00000)>>20;              /* extract exponent */
       
    80   if (k==0) {                           /* 0 or subnormal x */
       
    81     if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
       
    82     x *= two54;
       
    83     hx = __HI(x);
       
    84     k = ((hx&0x7ff00000)>>20) - 54;
       
    85     if (n< -50000) return tiny*x;       /*underflow*/
       
    86   }
       
    87   if (k==0x7ff) return x+x;             /* NaN or Inf */
       
    88   k = k+n;
       
    89   if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
       
    90   if (k > 0)                            /* normal result */
       
    91     {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
       
    92   if (k <= -54) {
       
    93     if (n > 50000)      /* in case integer overflow in n+k */
       
    94       return hugeX*copysignA(hugeX,x);  /*overflow*/
       
    95     else return tiny*copysignA(tiny,x); /*underflow*/
       
    96   }
       
    97   k += 54;                              /* subnormal result */
       
    98   __HI(x) = (hx&0x800fffff)|(k<<20);
       
    99   return x*twom54;
       
   100 }
       
   101 
       
   102 /*
       
   103  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
       
   104  * double x[],y[]; int e0,nx,prec; int ipio2[];
       
   105  *
       
   106  * __kernel_rem_pio2 return the last three digits of N with
       
   107  *              y = x - N*pi/2
       
   108  * so that |y| < pi/2.
       
   109  *
       
   110  * The method is to compute the integer (mod 8) and fraction parts of
       
   111  * (2/pi)*x without doing the full multiplication. In general we
       
   112  * skip the part of the product that are known to be a huge integer (
       
   113  * more accurately, = 0 mod 8 ). Thus the number of operations are
       
   114  * independent of the exponent of the input.
       
   115  *
       
   116  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
       
   117  *
       
   118  * Input parameters:
       
   119  *      x[]     The input value (must be positive) is broken into nx
       
   120  *              pieces of 24-bit integers in double precision format.
       
   121  *              x[i] will be the i-th 24 bit of x. The scaled exponent
       
   122  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
       
   123  *              match x's up to 24 bits.
       
   124  *
       
   125  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
       
   126  *                      e0 = ilogb(z)-23
       
   127  *                      z  = scalbn(z,-e0)
       
   128  *              for i = 0,1,2
       
   129  *                      x[i] = floor(z)
       
   130  *                      z    = (z-x[i])*2**24
       
   131  *
       
   132  *
       
   133  *      y[]     ouput result in an array of double precision numbers.
       
   134  *              The dimension of y[] is:
       
   135  *                      24-bit  precision       1
       
   136  *                      53-bit  precision       2
       
   137  *                      64-bit  precision       2
       
   138  *                      113-bit precision       3
       
   139  *              The actual value is the sum of them. Thus for 113-bit
       
   140  *              precsion, one may have to do something like:
       
   141  *
       
   142  *              long double t,w,r_head, r_tail;
       
   143  *              t = (long double)y[2] + (long double)y[1];
       
   144  *              w = (long double)y[0];
       
   145  *              r_head = t+w;
       
   146  *              r_tail = w - (r_head - t);
       
   147  *
       
   148  *      e0      The exponent of x[0]
       
   149  *
       
   150  *      nx      dimension of x[]
       
   151  *
       
   152  *      prec    an interger indicating the precision:
       
   153  *                      0       24  bits (single)
       
   154  *                      1       53  bits (double)
       
   155  *                      2       64  bits (extended)
       
   156  *                      3       113 bits (quad)
       
   157  *
       
   158  *      ipio2[]
       
   159  *              integer array, contains the (24*i)-th to (24*i+23)-th
       
   160  *              bit of 2/pi after binary point. The corresponding
       
   161  *              floating value is
       
   162  *
       
   163  *                      ipio2[i] * 2^(-24(i+1)).
       
   164  *
       
   165  * External function:
       
   166  *      double scalbn(), floor();
       
   167  *
       
   168  *
       
   169  * Here is the description of some local variables:
       
   170  *
       
   171  *      jk      jk+1 is the initial number of terms of ipio2[] needed
       
   172  *              in the computation. The recommended value is 2,3,4,
       
   173  *              6 for single, double, extended,and quad.
       
   174  *
       
   175  *      jz      local integer variable indicating the number of
       
   176  *              terms of ipio2[] used.
       
   177  *
       
   178  *      jx      nx - 1
       
   179  *
       
   180  *      jv      index for pointing to the suitable ipio2[] for the
       
   181  *              computation. In general, we want
       
   182  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
       
   183  *              is an integer. Thus
       
   184  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
       
   185  *              Hence jv = max(0,(e0-3)/24).
       
   186  *
       
   187  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
       
   188  *
       
   189  *      q[]     double array with integral value, representing the
       
   190  *              24-bits chunk of the product of x and 2/pi.
       
   191  *
       
   192  *      q0      the corresponding exponent of q[0]. Note that the
       
   193  *              exponent for q[i] would be q0-24*i.
       
   194  *
       
   195  *      PIo2[]  double precision array, obtained by cutting pi/2
       
   196  *              into 24 bits chunks.
       
   197  *
       
   198  *      f[]     ipio2[] in floating point
       
   199  *
       
   200  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
       
   201  *
       
   202  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
       
   203  *
       
   204  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence
       
   205  *              it also indicates the *sign* of the result.
       
   206  *
       
   207  */
       
   208 
       
   209 
       
   210 /*
       
   211  * Constants:
       
   212  * The hexadecimal values are the intended ones for the following
       
   213  * constants. The decimal values may be used, provided that the
       
   214  * compiler will convert from decimal to binary accurately enough
       
   215  * to produce the hexadecimal values shown.
       
   216  */
       
   217 
       
   218 
       
   219 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
       
   220 
       
   221 static const double PIo2[] = {
       
   222   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
       
   223   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
       
   224   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
       
   225   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
       
   226   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
       
   227   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
       
   228   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
       
   229   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
       
   230 };
       
   231 
       
   232 static const double
       
   233 zeroB   = 0.0,
       
   234 one     = 1.0,
       
   235 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
       
   236 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
       
   237 
       
   238 static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
       
   239   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
       
   240   double z,fw,f[20],fq[20],q[20];
       
   241 
       
   242   /* initialize jk*/
       
   243   jk = init_jk[prec];
       
   244   jp = jk;
       
   245 
       
   246   /* determine jx,jv,q0, note that 3>q0 */
       
   247   jx =  nx-1;
       
   248   jv = (e0-3)/24; if(jv<0) jv=0;
       
   249   q0 =  e0-24*(jv+1);
       
   250 
       
   251   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
       
   252   j = jv-jx; m = jx+jk;
       
   253   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
       
   254 
       
   255   /* compute q[0],q[1],...q[jk] */
       
   256   for (i=0;i<=jk;i++) {
       
   257     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
       
   258   }
       
   259 
       
   260   jz = jk;
       
   261 recompute:
       
   262   /* distill q[] into iq[] reversingly */
       
   263   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
       
   264     fw    =  (double)((int)(twon24* z));
       
   265     iq[i] =  (int)(z-two24B*fw);
       
   266     z     =  q[j-1]+fw;
       
   267   }
       
   268 
       
   269   /* compute n */
       
   270   z  = scalbnA(z,q0);           /* actual value of z */
       
   271   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
       
   272   n  = (int) z;
       
   273   z -= (double)n;
       
   274   ih = 0;
       
   275   if(q0>0) {    /* need iq[jz-1] to determine n */
       
   276     i  = (iq[jz-1]>>(24-q0)); n += i;
       
   277     iq[jz-1] -= i<<(24-q0);
       
   278     ih = iq[jz-1]>>(23-q0);
       
   279   }
       
   280   else if(q0==0) ih = iq[jz-1]>>23;
       
   281   else if(z>=0.5) ih=2;
       
   282 
       
   283   if(ih>0) {    /* q > 0.5 */
       
   284     n += 1; carry = 0;
       
   285     for(i=0;i<jz ;i++) {        /* compute 1-q */
       
   286       j = iq[i];
       
   287       if(carry==0) {
       
   288         if(j!=0) {
       
   289           carry = 1; iq[i] = 0x1000000- j;
       
   290         }
       
   291       } else  iq[i] = 0xffffff - j;
       
   292     }
       
   293     if(q0>0) {          /* rare case: chance is 1 in 12 */
       
   294       switch(q0) {
       
   295       case 1:
       
   296         iq[jz-1] &= 0x7fffff; break;
       
   297       case 2:
       
   298         iq[jz-1] &= 0x3fffff; break;
       
   299       }
       
   300     }
       
   301     if(ih==2) {
       
   302       z = one - z;
       
   303       if(carry!=0) z -= scalbnA(one,q0);
       
   304     }
       
   305   }
       
   306 
       
   307   /* check if recomputation is needed */
       
   308   if(z==zeroB) {
       
   309     j = 0;
       
   310     for (i=jz-1;i>=jk;i--) j |= iq[i];
       
   311     if(j==0) { /* need recomputation */
       
   312       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
       
   313 
       
   314       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
       
   315         f[jx+i] = (double) ipio2[jv+i];
       
   316         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
       
   317         q[i] = fw;
       
   318       }
       
   319       jz += k;
       
   320       goto recompute;
       
   321     }
       
   322   }
       
   323 
       
   324   /* chop off zero terms */
       
   325   if(z==0.0) {
       
   326     jz -= 1; q0 -= 24;
       
   327     while(iq[jz]==0) { jz--; q0-=24;}
       
   328   } else { /* break z into 24-bit if neccessary */
       
   329     z = scalbnA(z,-q0);
       
   330     if(z>=two24B) {
       
   331       fw = (double)((int)(twon24*z));
       
   332       iq[jz] = (int)(z-two24B*fw);
       
   333       jz += 1; q0 += 24;
       
   334       iq[jz] = (int) fw;
       
   335     } else iq[jz] = (int) z ;
       
   336   }
       
   337 
       
   338   /* convert integer "bit" chunk to floating-point value */
       
   339   fw = scalbnA(one,q0);
       
   340   for(i=jz;i>=0;i--) {
       
   341     q[i] = fw*(double)iq[i]; fw*=twon24;
       
   342   }
       
   343 
       
   344   /* compute PIo2[0,...,jp]*q[jz,...,0] */
       
   345   for(i=jz;i>=0;i--) {
       
   346     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
       
   347     fq[jz-i] = fw;
       
   348   }
       
   349 
       
   350   /* compress fq[] into y[] */
       
   351   switch(prec) {
       
   352   case 0:
       
   353     fw = 0.0;
       
   354     for (i=jz;i>=0;i--) fw += fq[i];
       
   355     y[0] = (ih==0)? fw: -fw;
       
   356     break;
       
   357   case 1:
       
   358   case 2:
       
   359     fw = 0.0;
       
   360     for (i=jz;i>=0;i--) fw += fq[i];
       
   361     y[0] = (ih==0)? fw: -fw;
       
   362     fw = fq[0]-fw;
       
   363     for (i=1;i<=jz;i++) fw += fq[i];
       
   364     y[1] = (ih==0)? fw: -fw;
       
   365     break;
       
   366   case 3:       /* painful */
       
   367     for (i=jz;i>0;i--) {
       
   368       fw      = fq[i-1]+fq[i];
       
   369       fq[i]  += fq[i-1]-fw;
       
   370       fq[i-1] = fw;
       
   371     }
       
   372     for (i=jz;i>1;i--) {
       
   373       fw      = fq[i-1]+fq[i];
       
   374       fq[i]  += fq[i-1]-fw;
       
   375       fq[i-1] = fw;
       
   376     }
       
   377     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
       
   378     if(ih==0) {
       
   379       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
       
   380     } else {
       
   381       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
       
   382     }
       
   383   }
       
   384   return n&7;
       
   385 }
       
   386 
       
   387 
       
   388 /*
       
   389  * ====================================================
       
   390  * Copyright 13 Dec 1993 Sun Microsystems, Inc.  All Rights Reserved.
       
   391  *
       
   392  * Developed at SunPro, a Sun Microsystems, Inc. business.
       
   393  * Permission to use, copy, modify, and distribute this
       
   394  * software is freely granted, provided that this notice
       
   395  * is preserved.
       
   396  * ====================================================
       
   397  *
       
   398  */
       
   399 
       
   400 /* __ieee754_rem_pio2(x,y)
       
   401  *
       
   402  * return the remainder of x rem pi/2 in y[0]+y[1]
       
   403  * use __kernel_rem_pio2()
       
   404  */
       
   405 
       
   406 /*
       
   407  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
       
   408  */
       
   409 static const int two_over_pi[] = {
       
   410   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
       
   411   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
       
   412   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
       
   413   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
       
   414   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
       
   415   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
       
   416   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
       
   417   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
       
   418   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
       
   419   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
       
   420   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
       
   421 };
       
   422 
       
   423 static const int npio2_hw[] = {
       
   424   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
       
   425   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
       
   426   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
       
   427   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
       
   428   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
       
   429   0x404858EB, 0x404921FB,
       
   430 };
       
   431 
       
   432 /*
       
   433  * invpio2:  53 bits of 2/pi
       
   434  * pio2_1:   first  33 bit of pi/2
       
   435  * pio2_1t:  pi/2 - pio2_1
       
   436  * pio2_2:   second 33 bit of pi/2
       
   437  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
       
   438  * pio2_3:   third  33 bit of pi/2
       
   439  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
       
   440  */
       
   441 
       
   442 static const double
       
   443 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
       
   444 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
       
   445 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
       
   446 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
       
   447 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
       
   448 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
       
   449 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
       
   450 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
       
   451 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
       
   452 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
       
   453 
       
   454 static int __ieee754_rem_pio2(double x, double *y) {
       
   455   double z,w,t,r,fn;
       
   456   double tx[3];
       
   457   int e0,i,j,nx,n,ix,hx,i0;
       
   458 
       
   459   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
       
   460   hx = *(i0+(int*)&x);          /* high word of x */
       
   461   ix = hx&0x7fffffff;
       
   462   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
       
   463     {y[0] = x; y[1] = 0; return 0;}
       
   464   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
       
   465     if(hx>0) {
       
   466       z = x - pio2_1;
       
   467       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
       
   468         y[0] = z - pio2_1t;
       
   469         y[1] = (z-y[0])-pio2_1t;
       
   470       } else {                /* near pi/2, use 33+33+53 bit pi */
       
   471         z -= pio2_2;
       
   472         y[0] = z - pio2_2t;
       
   473         y[1] = (z-y[0])-pio2_2t;
       
   474       }
       
   475       return 1;
       
   476     } else {    /* negative x */
       
   477       z = x + pio2_1;
       
   478       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
       
   479         y[0] = z + pio2_1t;
       
   480         y[1] = (z-y[0])+pio2_1t;
       
   481       } else {                /* near pi/2, use 33+33+53 bit pi */
       
   482         z += pio2_2;
       
   483         y[0] = z + pio2_2t;
       
   484         y[1] = (z-y[0])+pio2_2t;
       
   485       }
       
   486       return -1;
       
   487     }
       
   488   }
       
   489   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
       
   490     t  = fabsd(x);
       
   491     n  = (int) (t*invpio2+half);
       
   492     fn = (double)n;
       
   493     r  = t-fn*pio2_1;
       
   494     w  = fn*pio2_1t;    /* 1st round good to 85 bit */
       
   495     if(n<32&&ix!=npio2_hw[n-1]) {
       
   496       y[0] = r-w;       /* quick check no cancellation */
       
   497     } else {
       
   498       j  = ix>>20;
       
   499       y[0] = r-w;
       
   500       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
       
   501       if(i>16) {  /* 2nd iteration needed, good to 118 */
       
   502         t  = r;
       
   503         w  = fn*pio2_2;
       
   504         r  = t-w;
       
   505         w  = fn*pio2_2t-((t-r)-w);
       
   506         y[0] = r-w;
       
   507         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
       
   508         if(i>49)  {     /* 3rd iteration need, 151 bits acc */
       
   509           t  = r;       /* will cover all possible cases */
       
   510           w  = fn*pio2_3;
       
   511           r  = t-w;
       
   512           w  = fn*pio2_3t-((t-r)-w);
       
   513           y[0] = r-w;
       
   514         }
       
   515       }
       
   516     }
       
   517     y[1] = (r-y[0])-w;
       
   518     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
       
   519     else         return n;
       
   520   }
       
   521   /*
       
   522    * all other (large) arguments
       
   523    */
       
   524   if(ix>=0x7ff00000) {          /* x is inf or NaN */
       
   525     y[0]=y[1]=x-x; return 0;
       
   526   }
       
   527   /* set z = scalbn(|x|,ilogb(x)-23) */
       
   528   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
       
   529   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
       
   530   *(i0+(int*)&z) = ix - (e0<<20);
       
   531   for(i=0;i<2;i++) {
       
   532     tx[i] = (double)((int)(z));
       
   533     z     = (z-tx[i])*two24A;
       
   534   }
       
   535   tx[2] = z;
       
   536   nx = 3;
       
   537   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
       
   538   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
       
   539   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
       
   540   return n;
       
   541 }
       
   542 
       
   543 
       
   544 /* __kernel_sin( x, y, iy)
       
   545  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
       
   546  * Input x is assumed to be bounded by ~pi/4 in magnitude.
       
   547  * Input y is the tail of x.
       
   548  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
       
   549  *
       
   550  * Algorithm
       
   551  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
       
   552  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
       
   553  *      3. sin(x) is approximated by a polynomial of degree 13 on
       
   554  *         [0,pi/4]
       
   555  *                               3            13
       
   556  *              sin(x) ~ x + S1*x + ... + S6*x
       
   557  *         where
       
   558  *
       
   559  *      |sin(x)         2     4     6     8     10     12  |     -58
       
   560  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
       
   561  *      |  x                                               |
       
   562  *
       
   563  *      4. sin(x+y) = sin(x) + sin'(x')*y
       
   564  *                  ~ sin(x) + (1-x*x/2)*y
       
   565  *         For better accuracy, let
       
   566  *                   3      2      2      2      2
       
   567  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
       
   568  *         then                   3    2
       
   569  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
       
   570  */
       
   571 
       
   572 static const double
       
   573 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
       
   574 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
       
   575 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
       
   576 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
       
   577 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
       
   578 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
       
   579 
       
   580 static double __kernel_sin(double x, double y, int iy)
       
   581 {
       
   582         double z,r,v;
       
   583         int ix;
       
   584         ix = __HI(x)&0x7fffffff;        /* high word of x */
       
   585         if(ix<0x3e400000)                       /* |x| < 2**-27 */
       
   586            {if((int)x==0) return x;}            /* generate inexact */
       
   587         z       =  x*x;
       
   588         v       =  z*x;
       
   589         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
       
   590         if(iy==0) return x+v*(S1+z*r);
       
   591         else      return x-((z*(half*y-v*r)-y)-v*S1);
       
   592 }
       
   593 
       
   594 /*
       
   595  * __kernel_cos( x,  y )
       
   596  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
       
   597  * Input x is assumed to be bounded by ~pi/4 in magnitude.
       
   598  * Input y is the tail of x.
       
   599  *
       
   600  * Algorithm
       
   601  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
       
   602  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
       
   603  *      3. cos(x) is approximated by a polynomial of degree 14 on
       
   604  *         [0,pi/4]
       
   605  *                                       4            14
       
   606  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
       
   607  *         where the remez error is
       
   608  *
       
   609  *      |              2     4     6     8     10    12     14 |     -58
       
   610  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
       
   611  *      |                                                      |
       
   612  *
       
   613  *                     4     6     8     10    12     14
       
   614  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
       
   615  *             cos(x) = 1 - x*x/2 + r
       
   616  *         since cos(x+y) ~ cos(x) - sin(x)*y
       
   617  *                        ~ cos(x) - x*y,
       
   618  *         a correction term is necessary in cos(x) and hence
       
   619  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
       
   620  *         For better accuracy when x > 0.3, let qx = |x|/4 with
       
   621  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
       
   622  *         Then
       
   623  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
       
   624  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
       
   625  *         magnitude of the latter is at least a quarter of x*x/2,
       
   626  *         thus, reducing the rounding error in the subtraction.
       
   627  */
       
   628 
       
   629 static const double
       
   630 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
       
   631 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
       
   632 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
       
   633 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
       
   634 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
       
   635 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
       
   636 
       
   637 static double __kernel_cos(double x, double y)
       
   638 {
       
   639   double a,hz,z,r,qx;
       
   640   int ix;
       
   641   ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/
       
   642   if(ix<0x3e400000) {                   /* if x < 2**27 */
       
   643     if(((int)x)==0) return one;         /* generate inexact */
       
   644   }
       
   645   z  = x*x;
       
   646   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
       
   647   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
       
   648     return one - (0.5*z - (z*r - x*y));
       
   649   else {
       
   650     if(ix > 0x3fe90000) {               /* x > 0.78125 */
       
   651       qx = 0.28125;
       
   652     } else {
       
   653       __HI(qx) = ix-0x00200000; /* x/4 */
       
   654       __LO(qx) = 0;
       
   655     }
       
   656     hz = 0.5*z-qx;
       
   657     a  = one-qx;
       
   658     return a - (hz - (z*r-x*y));
       
   659   }
       
   660 }
       
   661 
       
   662 /* __kernel_tan( x, y, k )
       
   663  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
       
   664  * Input x is assumed to be bounded by ~pi/4 in magnitude.
       
   665  * Input y is the tail of x.
       
   666  * Input k indicates whether tan (if k=1) or
       
   667  * -1/tan (if k= -1) is returned.
       
   668  *
       
   669  * Algorithm
       
   670  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
       
   671  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
       
   672  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
       
   673  *         [0,0.67434]
       
   674  *                               3             27
       
   675  *              tan(x) ~ x + T1*x + ... + T13*x
       
   676  *         where
       
   677  *
       
   678  *              |tan(x)         2     4            26   |     -59.2
       
   679  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
       
   680  *              |  x                                    |
       
   681  *
       
   682  *         Note: tan(x+y) = tan(x) + tan'(x)*y
       
   683  *                        ~ tan(x) + (1+x*x)*y
       
   684  *         Therefore, for better accuracy in computing tan(x+y), let
       
   685  *                   3      2      2       2       2
       
   686  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
       
   687  *         then
       
   688  *                                  3    2
       
   689  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
       
   690  *
       
   691  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
       
   692  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
       
   693  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
       
   694  */
       
   695 
       
   696 static const double
       
   697 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
       
   698 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
       
   699 T[] =  {
       
   700   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
       
   701   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
       
   702   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
       
   703   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
       
   704   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
       
   705   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
       
   706   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
       
   707   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
       
   708   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
       
   709   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
       
   710   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
       
   711  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
       
   712   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
       
   713 };
       
   714 
       
   715 static double __kernel_tan(double x, double y, int iy)
       
   716 {
       
   717   double z,r,v,w,s;
       
   718   int ix,hx;
       
   719   hx = __HI(x);   /* high word of x */
       
   720   ix = hx&0x7fffffff;     /* high word of |x| */
       
   721   if(ix<0x3e300000) {                     /* x < 2**-28 */
       
   722     if((int)x==0) {                       /* generate inexact */
       
   723       if (((ix | __LO(x)) | (iy + 1)) == 0)
       
   724         return one / fabsd(x);
       
   725       else {
       
   726         if (iy == 1)
       
   727           return x;
       
   728         else {    /* compute -1 / (x+y) carefully */
       
   729           double a, t;
       
   730 
       
   731           z = w = x + y;
       
   732           __LO(z) = 0;
       
   733           v = y - (z - x);
       
   734           t = a = -one / w;
       
   735           __LO(t) = 0;
       
   736           s = one + t * z;
       
   737           return t + a * (s + t * v);
       
   738         }
       
   739       }
       
   740     }
       
   741   }
       
   742   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
       
   743     if(hx<0) {x = -x; y = -y;}
       
   744     z = pio4-x;
       
   745     w = pio4lo-y;
       
   746     x = z+w; y = 0.0;
       
   747   }
       
   748   z       =  x*x;
       
   749   w       =  z*z;
       
   750   /* Break x^5*(T[1]+x^2*T[2]+...) into
       
   751    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
       
   752    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
       
   753    */
       
   754   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
       
   755   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
       
   756   s = z*x;
       
   757   r = y + z*(s*(r+v)+y);
       
   758   r += T[0]*s;
       
   759   w = x+r;
       
   760   if(ix>=0x3FE59428) {
       
   761     v = (double)iy;
       
   762     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
       
   763   }
       
   764   if(iy==1) return w;
       
   765   else {          /* if allow error up to 2 ulp,
       
   766                      simply return -1.0/(x+r) here */
       
   767     /*  compute -1.0/(x+r) accurately */
       
   768     double a,t;
       
   769     z  = w;
       
   770     __LO(z) = 0;
       
   771     v  = r-(z - x);     /* z+v = r+x */
       
   772     t = a  = -1.0/w;    /* a = -1.0/w */
       
   773     __LO(t) = 0;
       
   774     s  = 1.0+t*z;
       
   775     return t+a*(s+t*v);
       
   776   }
       
   777 }
       
   778 
       
   779 
       
   780 //----------------------------------------------------------------------
       
   781 //
       
   782 // Routines for new sin/cos implementation
       
   783 //
       
   784 //----------------------------------------------------------------------
       
   785 
       
   786 /* sin(x)
       
   787  * Return sine function of x.
       
   788  *
       
   789  * kernel function:
       
   790  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
       
   791  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
       
   792  *      __ieee754_rem_pio2      ... argument reduction routine
       
   793  *
       
   794  * Method.
       
   795  *      Let S,C and T denote the sin, cos and tan respectively on
       
   796  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
       
   797  *      in [-pi/4 , +pi/4], and let n = k mod 4.
       
   798  *      We have
       
   799  *
       
   800  *          n        sin(x)      cos(x)        tan(x)
       
   801  *     ----------------------------------------------------------
       
   802  *          0          S           C             T
       
   803  *          1          C          -S            -1/T
       
   804  *          2         -S          -C             T
       
   805  *          3         -C           S            -1/T
       
   806  *     ----------------------------------------------------------
       
   807  *
       
   808  * Special cases:
       
   809  *      Let trig be any of sin, cos, or tan.
       
   810  *      trig(+-INF)  is NaN, with signals;
       
   811  *      trig(NaN)    is that NaN;
       
   812  *
       
   813  * Accuracy:
       
   814  *      TRIG(x) returns trig(x) nearly rounded
       
   815  */
       
   816 
       
   817 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
       
   818   double y[2],z=0.0;
       
   819   int n, ix;
       
   820 
       
   821   /* High word of x. */
       
   822   ix = __HI(x);
       
   823 
       
   824   /* |x| ~< pi/4 */
       
   825   ix &= 0x7fffffff;
       
   826   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
       
   827 
       
   828   /* sin(Inf or NaN) is NaN */
       
   829   else if (ix>=0x7ff00000) return x-x;
       
   830 
       
   831   /* argument reduction needed */
       
   832   else {
       
   833     n = __ieee754_rem_pio2(x,y);
       
   834     switch(n&3) {
       
   835     case 0: return  __kernel_sin(y[0],y[1],1);
       
   836     case 1: return  __kernel_cos(y[0],y[1]);
       
   837     case 2: return -__kernel_sin(y[0],y[1],1);
       
   838     default:
       
   839       return -__kernel_cos(y[0],y[1]);
       
   840     }
       
   841   }
       
   842 JRT_END
       
   843 
       
   844 /* cos(x)
       
   845  * Return cosine function of x.
       
   846  *
       
   847  * kernel function:
       
   848  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
       
   849  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
       
   850  *      __ieee754_rem_pio2      ... argument reduction routine
       
   851  *
       
   852  * Method.
       
   853  *      Let S,C and T denote the sin, cos and tan respectively on
       
   854  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
       
   855  *      in [-pi/4 , +pi/4], and let n = k mod 4.
       
   856  *      We have
       
   857  *
       
   858  *          n        sin(x)      cos(x)        tan(x)
       
   859  *     ----------------------------------------------------------
       
   860  *          0          S           C             T
       
   861  *          1          C          -S            -1/T
       
   862  *          2         -S          -C             T
       
   863  *          3         -C           S            -1/T
       
   864  *     ----------------------------------------------------------
       
   865  *
       
   866  * Special cases:
       
   867  *      Let trig be any of sin, cos, or tan.
       
   868  *      trig(+-INF)  is NaN, with signals;
       
   869  *      trig(NaN)    is that NaN;
       
   870  *
       
   871  * Accuracy:
       
   872  *      TRIG(x) returns trig(x) nearly rounded
       
   873  */
       
   874 
       
   875 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
       
   876   double y[2],z=0.0;
       
   877   int n, ix;
       
   878 
       
   879   /* High word of x. */
       
   880   ix = __HI(x);
       
   881 
       
   882   /* |x| ~< pi/4 */
       
   883   ix &= 0x7fffffff;
       
   884   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
       
   885 
       
   886   /* cos(Inf or NaN) is NaN */
       
   887   else if (ix>=0x7ff00000) return x-x;
       
   888 
       
   889   /* argument reduction needed */
       
   890   else {
       
   891     n = __ieee754_rem_pio2(x,y);
       
   892     switch(n&3) {
       
   893     case 0: return  __kernel_cos(y[0],y[1]);
       
   894     case 1: return -__kernel_sin(y[0],y[1],1);
       
   895     case 2: return -__kernel_cos(y[0],y[1]);
       
   896     default:
       
   897       return  __kernel_sin(y[0],y[1],1);
       
   898     }
       
   899   }
       
   900 JRT_END
       
   901 
       
   902 /* tan(x)
       
   903  * Return tangent function of x.
       
   904  *
       
   905  * kernel function:
       
   906  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
       
   907  *      __ieee754_rem_pio2      ... argument reduction routine
       
   908  *
       
   909  * Method.
       
   910  *      Let S,C and T denote the sin, cos and tan respectively on
       
   911  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
       
   912  *      in [-pi/4 , +pi/4], and let n = k mod 4.
       
   913  *      We have
       
   914  *
       
   915  *          n        sin(x)      cos(x)        tan(x)
       
   916  *     ----------------------------------------------------------
       
   917  *          0          S           C             T
       
   918  *          1          C          -S            -1/T
       
   919  *          2         -S          -C             T
       
   920  *          3         -C           S            -1/T
       
   921  *     ----------------------------------------------------------
       
   922  *
       
   923  * Special cases:
       
   924  *      Let trig be any of sin, cos, or tan.
       
   925  *      trig(+-INF)  is NaN, with signals;
       
   926  *      trig(NaN)    is that NaN;
       
   927  *
       
   928  * Accuracy:
       
   929  *      TRIG(x) returns trig(x) nearly rounded
       
   930  */
       
   931 
       
   932 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
       
   933   double y[2],z=0.0;
       
   934   int n, ix;
       
   935 
       
   936   /* High word of x. */
       
   937   ix = __HI(x);
       
   938 
       
   939   /* |x| ~< pi/4 */
       
   940   ix &= 0x7fffffff;
       
   941   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
       
   942 
       
   943   /* tan(Inf or NaN) is NaN */
       
   944   else if (ix>=0x7ff00000) return x-x;            /* NaN */
       
   945 
       
   946   /* argument reduction needed */
       
   947   else {
       
   948     n = __ieee754_rem_pio2(x,y);
       
   949     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
       
   950                                                      -1 -- n odd */
       
   951   }
       
   952 JRT_END
       
   953 
       
   954 
       
   955 #ifdef WIN32
       
   956 # pragma optimize ( "", on )
       
   957 #endif