amath  1.8.5
Simple command line calculator
kremp2.c
Go to the documentation of this file.
1 /*-
2  * Copyright (c) 2014-2018 Carsten Sonne Larsen <cs@innolan.net>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  * Project homepage:
26  * https://amath.innolan.net
27  *
28  * The original source code can be obtained from:
29  * http://www.netlib.org/fdlibm/k_rem_pio2.c
30  *
31  * =================================================================
32  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
33  *
34  * Developed at SunSoft, a Sun Microsystems, Inc. business.
35  * Permission to use, copy, modify, and distribute this
36  * software is freely granted, provided that this notice
37  * is preserved.
38  * =================================================================
39  */
40 
41 /**
42  * @file kremp2.c
43  * @brief Kernel reduction function
44  */
45 
46 #include "prim.h"
47 
48 /*
49  * Constants:
50  * The hexadecimal values are the intended ones for the following
51  * constants. The decimal values may be used, provided that the
52  * compiler will convert from decimal to binary accurately enough
53  * to produce the hexadecimal values shown.
54  */
55 
56 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
57 
58 static const double PIo2[] = {
59  1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
60  7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
61  5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
62  3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
63  1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
64  1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
65  2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
66  2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
67 };
68 
69 static const double
70 zero = 0.0,
71 one = 1.0,
72 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
73 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
74 
75 /**
76  * @brief Kernel reduction function
77  * @details
78  * <pre>
79  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
80  * double x[],y[]; int e0,nx,prec; int ipio2[];
81  *
82  * __kernel_rem_pio2 return the last three digits of N with
83  * y = x - N*pi/2
84  * so that |y| < pi/2.
85  *
86  * The method is to compute the integer (mod 8) and fraction parts of
87  * (2/pi)*x without doing the full multiplication. In general we
88  * skip the part of the product that are known to be a huge integer (
89  * more accurately, = 0 mod 8 ). Thus the number of operations are
90  * independent of the exponent of the input.
91  *
92  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
93  *
94  * Input parameters:
95  * x[] The input value (must be positive) is broken into nx
96  * pieces of 24-bit integers in double precision format.
97  * x[i] will be the i-th 24 bit of x. The scaled exponent
98  * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
99  * match x's up to 24 bits.
100  *
101  * Example of breaking a double positive z into x[0]+x[1]+x[2]:
102  * e0 = ilogb(z)-23
103  * z = scalbn(z,-e0)
104  * for i = 0,1,2
105  * x[i] = floor(z)
106  * z = (z-x[i])*2**24
107  *
108  *
109  * y[] ouput result in an array of double precision numbers.
110  * The dimension of y[] is:
111  * 24-bit precision 1
112  * 53-bit precision 2
113  * 64-bit precision 2
114  * 113-bit precision 3
115  * The actual value is the sum of them. Thus for 113-bit
116  * precison, one may have to do something like:
117  *
118  * long double t,w,r_head, r_tail;
119  * t = (long double)y[2] + (long double)y[1];
120  * w = (long double)y[0];
121  * r_head = t+w;
122  * r_tail = w - (r_head - t);
123  *
124  * e0 The exponent of x[0]
125  *
126  * nx dimension of x[]
127  *
128  * prec an integer indicating the precision:
129  * 0 24 bits (single)
130  * 1 53 bits (double)
131  * 2 64 bits (extended)
132  * 3 113 bits (quad)
133  *
134  * ipio2[]
135  * integer array, contains the (24*i)-th to (24*i+23)-th
136  * bit of 2/pi after binary point. The corresponding
137  * floating value is
138  *
139  * ipio2[i] * 2^(-24(i+1)).
140  *
141  * External function:
142  * double scalbn(), floor();
143  *
144  *
145  * Here is the description of some local variables:
146  *
147  * jk jk+1 is the initial number of terms of ipio2[] needed
148  * in the computation. The recommended value is 2,3,4,
149  * 6 for single, double, extended,and quad.
150  *
151  * jz local integer variable indicating the number of
152  * terms of ipio2[] used.
153  *
154  * jx nx - 1
155  *
156  * jv index for pointing to the suitable ipio2[] for the
157  * computation. In general, we want
158  * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
159  * is an integer. Thus
160  * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
161  * Hence jv = max(0,(e0-3)/24).
162  *
163  * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
164  *
165  * q[] double array with integral value, representing the
166  * 24-bits chunk of the product of x and 2/pi.
167  *
168  * q0 the corresponding exponent of q[0]. Note that the
169  * exponent for q[i] would be q0-24*i.
170  *
171  * PIo2[] double precision array, obtained by cutting pi/2
172  * into 24 bits chunks.
173  *
174  * f[] ipio2[] in floating point
175  *
176  * iq[] integer array by breaking up q[] in 24-bits chunk.
177  *
178  * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
179  *
180  * ih integer. If >0 it indicates q[] is >= 0.5, hence
181  * it also indicates the *sign* of the result.
182  * </pre>
183  */
184 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2)
185 {
186  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
187  double z,fw,f[20],fq[20],q[20];
188 
189  /* initialize jk*/
190  jk = init_jk[prec];
191  jp = jk;
192 
193  /* determine jx,jv,q0, note that 3>q0 */
194  jx = nx-1;
195  jv = (e0-3)/24;
196  if(jv<0) jv=0;
197  q0 = e0-24*(jv+1);
198 
199  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
200  j = jv-jx;
201  m = jx+jk;
202  for(i=0; i<=m; i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
203 
204  /* compute q[0],q[1],...q[jk] */
205  for (i=0; i<=jk; i++) {
206  for(j=0,fw=0.0; j<=jx; j++) fw += x[j]*f[jx+i-j];
207  q[i] = fw;
208  }
209 
210  jz = jk;
211 recompute:
212  /* distill q[] into iq[] reversingly */
213  for(i=0,j=jz,z=q[jz]; j>0; i++,j--) {
214  fw = (double)((int)(twon24* z));
215  iq[i] = (int)(z-two24*fw);
216  z = q[j-1]+fw;
217  }
218 
219  /* compute n */
220  z = scalbn(z,q0); /* actual value of z */
221  z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
222  n = (int) z;
223  z -= (double)n;
224  ih = 0;
225  if(q0>0) { /* need iq[jz-1] to determine n */
226  i = (iq[jz-1]>>(24-q0));
227  n += i;
228  iq[jz-1] -= i<<(24-q0);
229  ih = iq[jz-1]>>(23-q0);
230  }
231  else if(q0==0) ih = iq[jz-1]>>23;
232  else if(z>=0.5) ih=2;
233 
234  if(ih>0) { /* q > 0.5 */
235  n += 1;
236  carry = 0;
237  for(i=0; i<jz ; i++) { /* compute 1-q */
238  j = iq[i];
239  if(carry==0) {
240  if(j!=0) {
241  carry = 1;
242  iq[i] = 0x1000000- j;
243  }
244  } else iq[i] = 0xffffff - j;
245  }
246  if(q0>0) { /* rare case: chance is 1 in 12 */
247  switch(q0) {
248  case 1:
249  iq[jz-1] &= 0x7fffff;
250  break;
251  case 2:
252  iq[jz-1] &= 0x3fffff;
253  break;
254  }
255  }
256  if(ih==2) {
257  z = one - z;
258  if(carry!=0) z -= scalbn(one,q0);
259  }
260  }
261 
262  /* check if recomputation is needed */
263  if(z==zero) {
264  j = 0;
265  for (i=jz-1; i>=jk; i--) j |= iq[i];
266  if(j==0) { /* need recomputation */
267  for(k=1; iq[jk-k]==0; k++); /* k = no. of terms needed */
268 
269  for(i=jz+1; i<=jz+k; i++) { /* add q[jz+1] to q[jz+k] */
270  f[jx+i] = (double) ipio2[jv+i];
271  for(j=0,fw=0.0; j<=jx; j++) fw += x[j]*f[jx+i-j];
272  q[i] = fw;
273  }
274  jz += k;
275  goto recompute;
276  }
277  }
278 
279  /* chop off zero terms */
280  if(z==0.0) {
281  jz -= 1;
282  q0 -= 24;
283  while(iq[jz]==0) {
284  jz--;
285  q0-=24;
286  }
287  } else { /* break z into 24-bit if necessary */
288  z = scalbn(z,-q0);
289  if(z>=two24) {
290  fw = (double)((int)(twon24*z));
291  iq[jz] = (int)(z-two24*fw);
292  jz += 1;
293  q0 += 24;
294  iq[jz] = (int) fw;
295  } else iq[jz] = (int) z ;
296  }
297 
298  /* convert integer "bit" chunk to floating-point value */
299  fw = scalbn(one,q0);
300  for(i=jz; i>=0; i--) {
301  q[i] = fw*(double)iq[i];
302  fw*=twon24;
303  }
304 
305  /* compute PIo2[0,...,jp]*q[jz,...,0] */
306  for(i=jz; i>=0; i--) {
307  for(fw=0.0,k=0; k<=jp&&k<=jz-i; k++) fw += PIo2[k]*q[i+k];
308  fq[jz-i] = fw;
309  }
310 
311  /* compress fq[] into y[] */
312  switch(prec) {
313  case 0:
314  fw = 0.0;
315  for (i=jz; i>=0; i--) fw += fq[i];
316  y[0] = (ih==0)? fw: -fw;
317  break;
318  case 1:
319  case 2:
320  fw = 0.0;
321  for (i=jz; i>=0; i--) fw += fq[i];
322  y[0] = (ih==0)? fw: -fw;
323  fw = fq[0]-fw;
324  for (i=1; i<=jz; i++) fw += fq[i];
325  y[1] = (ih==0)? fw: -fw;
326  break;
327  case 3: /* painful */
328  for (i=jz; i>0; i--) {
329  fw = fq[i-1]+fq[i];
330  fq[i] += fq[i-1]-fw;
331  fq[i-1] = fw;
332  }
333  for (i=jz; i>1; i--) {
334  fw = fq[i-1]+fq[i];
335  fq[i] += fq[i-1]-fw;
336  fq[i-1] = fw;
337  }
338  for (fw=0.0,i=jz; i>=2; i--) fw += fq[i];
339  if(ih==0) {
340  y[0] = fq[0];
341  y[1] = fq[1];
342  y[2] = fw;
343  } else {
344  y[0] = -fq[0];
345  y[1] = -fq[1];
346  y[2] = -fw;
347  }
348  }
349  return n&7;
350 }
double scalbn(double x, int32_t n)
Definition: scalbn.c:56
static const double two24
Definition: kremp2.c:72
double floor(double x)
Floor function.
Definition: floor.c:62
static const double twon24
Definition: kremp2.c:73
static const int init_jk[]
Definition: kremp2.c:56
static const double PIo2[]
Definition: kremp2.c:58
static const double zero
Definition: kremp2.c:70
static const double one
Definition: kremp2.c:71
int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2)
Kernel reduction function.
Definition: kremp2.c:184