amath  1.8.5
Simple command line calculator
kremp2.c File Reference

Kernel reduction function. More...

#include "prim.h"
Include dependency graph for kremp2.c:

Go to the source code of this file.

Functions

int __kernel_rem_pio2 (double *x, double *y, int e0, int nx, int prec, const int *ipio2)
 Kernel reduction function. More...
 

Variables

static const int init_jk [] = {2,3,4,6}
 
static const double PIo2 []
 
static const double zero = 0.0
 
static const double one = 1.0
 
static const double two24 = 1.67772160000000000000e+07
 
static const double twon24 = 5.96046447753906250000e-08
 

Detailed Description

Kernel reduction function.

Definition in file kremp2.c.

Function Documentation

◆ __kernel_rem_pio2()

int __kernel_rem_pio2 ( double *  x,
double *  y,
int  e0,
int  nx,
int  prec,
const int *  ipio2 
)

Kernel reduction function.

__kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
double x[],y[]; int e0,nx,prec; int ipio2[];
__kernel_rem_pio2 return the last three digits of N with
    y = x - N*pi/2
so that |y| < pi/2.
The method is to compute the integer (mod 8) and fraction parts of
(2/pi)*x without doing the full multiplication. In general we
skip the part of the product that are known to be a huge integer (
more accurately, = 0 mod 8 ). Thus the number of operations are
independent of the exponent of the input.
(2/pi) is represented by an array of 24-bit integers in ipio2[].
Input parameters:
    x[] The input value (must be positive) is broken into nx
    pieces of 24-bit integers in double precision format.
    x[i] will be the i-th 24 bit of x. The scaled exponent
    of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
    match x's up to 24 bits.
    Example of breaking a double positive z into x[0]+x[1]+x[2]:
        e0 = ilogb(z)-23
        z  = scalbn(z,-e0)
    for i = 0,1,2
        x[i] = floor(z)
        z    = (z-x[i])*2**24
 y[]    ouput result in an array of double precision numbers.
    The dimension of y[] is:
        24-bit  precision   1
        53-bit  precision   2
        64-bit  precision   2
        113-bit precision   3
    The actual value is the sum of them. Thus for 113-bit
    precison, one may have to do something like:
    long double t,w,r_head, r_tail;
    t = (long double)y[2] + (long double)y[1];
    w = (long double)y[0];
    r_head = t+w;
    r_tail = w - (r_head - t);
 e0 The exponent of x[0]
 nx dimension of x[]
    prec    an integer indicating the precision:
        0   24  bits (single)
        1   53  bits (double)
        2   64  bits (extended)
        3   113 bits (quad)
 ipio2[]
    integer array, contains the (24*i)-th to (24*i+23)-th
    bit of 2/pi after binary point. The corresponding
    floating value is
ipio2[i] * 2^(-24(i+1)).
External function:
 double scalbn(), floor();
Here is the description of some local variables:
jk  jk+1 is the initial number of terms of ipio2[] needed
in the computation. The recommended value is 2,3,4,
6 for single, double, extended,and quad.

jz  local integer variable indicating the number of
terms of ipio2[] used.
 jx nx - 1
 jv index for pointing to the suitable ipio2[] for the
    computation. In general, we want
        ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
    is an integer. Thus
        e0-3-24*jv >= 0 or (e0-3)/24 >= jv
    Hence jv = max(0,(e0-3)/24).
 jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
    q[] double array with integral value, representing the
    24-bits chunk of the product of x and 2/pi.
 q0 the corresponding exponent of q[0]. Note that the
    exponent for q[i] would be q0-24*i.
 PIo2[] double precision array, obtained by cutting pi/2
    into 24 bits chunks.
 f[]    ipio2[] in floating point
 iq[]   integer array by breaking up q[] in 24-bits chunk.
 fq[]   final product of x*(2/pi) in fq[0],..,fq[jk]
 ih integer. If >0 it indicates q[] is >= 0.5, hence
    it also indicates the sign of the result.

Definition at line 184 of file kremp2.c.

References floor(), init_jk, one, PIo2, scalbn(), two24, twon24, and zero.

Referenced by rempio2().

185 {
186  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
187  double z,fw,f[20],fq[20],q[20];
188 
189  /* initialize jk*/
190  jk = init_jk[prec];
191  jp = jk;
192 
193  /* determine jx,jv,q0, note that 3>q0 */
194  jx = nx-1;
195  jv = (e0-3)/24;
196  if(jv<0) jv=0;
197  q0 = e0-24*(jv+1);
198 
199  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
200  j = jv-jx;
201  m = jx+jk;
202  for(i=0; i<=m; i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
203 
204  /* compute q[0],q[1],...q[jk] */
205  for (i=0; i<=jk; i++) {
206  for(j=0,fw=0.0; j<=jx; j++) fw += x[j]*f[jx+i-j];
207  q[i] = fw;
208  }
209 
210  jz = jk;
211 recompute:
212  /* distill q[] into iq[] reversingly */
213  for(i=0,j=jz,z=q[jz]; j>0; i++,j--) {
214  fw = (double)((int)(twon24* z));
215  iq[i] = (int)(z-two24*fw);
216  z = q[j-1]+fw;
217  }
218 
219  /* compute n */
220  z = scalbn(z,q0); /* actual value of z */
221  z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
222  n = (int) z;
223  z -= (double)n;
224  ih = 0;
225  if(q0>0) { /* need iq[jz-1] to determine n */
226  i = (iq[jz-1]>>(24-q0));
227  n += i;
228  iq[jz-1] -= i<<(24-q0);
229  ih = iq[jz-1]>>(23-q0);
230  }
231  else if(q0==0) ih = iq[jz-1]>>23;
232  else if(z>=0.5) ih=2;
233 
234  if(ih>0) { /* q > 0.5 */
235  n += 1;
236  carry = 0;
237  for(i=0; i<jz ; i++) { /* compute 1-q */
238  j = iq[i];
239  if(carry==0) {
240  if(j!=0) {
241  carry = 1;
242  iq[i] = 0x1000000- j;
243  }
244  } else iq[i] = 0xffffff - j;
245  }
246  if(q0>0) { /* rare case: chance is 1 in 12 */
247  switch(q0) {
248  case 1:
249  iq[jz-1] &= 0x7fffff;
250  break;
251  case 2:
252  iq[jz-1] &= 0x3fffff;
253  break;
254  }
255  }
256  if(ih==2) {
257  z = one - z;
258  if(carry!=0) z -= scalbn(one,q0);
259  }
260  }
261 
262  /* check if recomputation is needed */
263  if(z==zero) {
264  j = 0;
265  for (i=jz-1; i>=jk; i--) j |= iq[i];
266  if(j==0) { /* need recomputation */
267  for(k=1; iq[jk-k]==0; k++); /* k = no. of terms needed */
268 
269  for(i=jz+1; i<=jz+k; i++) { /* add q[jz+1] to q[jz+k] */
270  f[jx+i] = (double) ipio2[jv+i];
271  for(j=0,fw=0.0; j<=jx; j++) fw += x[j]*f[jx+i-j];
272  q[i] = fw;
273  }
274  jz += k;
275  goto recompute;
276  }
277  }
278 
279  /* chop off zero terms */
280  if(z==0.0) {
281  jz -= 1;
282  q0 -= 24;
283  while(iq[jz]==0) {
284  jz--;
285  q0-=24;
286  }
287  } else { /* break z into 24-bit if necessary */
288  z = scalbn(z,-q0);
289  if(z>=two24) {
290  fw = (double)((int)(twon24*z));
291  iq[jz] = (int)(z-two24*fw);
292  jz += 1;
293  q0 += 24;
294  iq[jz] = (int) fw;
295  } else iq[jz] = (int) z ;
296  }
297 
298  /* convert integer "bit" chunk to floating-point value */
299  fw = scalbn(one,q0);
300  for(i=jz; i>=0; i--) {
301  q[i] = fw*(double)iq[i];
302  fw*=twon24;
303  }
304 
305  /* compute PIo2[0,...,jp]*q[jz,...,0] */
306  for(i=jz; i>=0; i--) {
307  for(fw=0.0,k=0; k<=jp&&k<=jz-i; k++) fw += PIo2[k]*q[i+k];
308  fq[jz-i] = fw;
309  }
310 
311  /* compress fq[] into y[] */
312  switch(prec) {
313  case 0:
314  fw = 0.0;
315  for (i=jz; i>=0; i--) fw += fq[i];
316  y[0] = (ih==0)? fw: -fw;
317  break;
318  case 1:
319  case 2:
320  fw = 0.0;
321  for (i=jz; i>=0; i--) fw += fq[i];
322  y[0] = (ih==0)? fw: -fw;
323  fw = fq[0]-fw;
324  for (i=1; i<=jz; i++) fw += fq[i];
325  y[1] = (ih==0)? fw: -fw;
326  break;
327  case 3: /* painful */
328  for (i=jz; i>0; i--) {
329  fw = fq[i-1]+fq[i];
330  fq[i] += fq[i-1]-fw;
331  fq[i-1] = fw;
332  }
333  for (i=jz; i>1; i--) {
334  fw = fq[i-1]+fq[i];
335  fq[i] += fq[i-1]-fw;
336  fq[i-1] = fw;
337  }
338  for (fw=0.0,i=jz; i>=2; i--) fw += fq[i];
339  if(ih==0) {
340  y[0] = fq[0];
341  y[1] = fq[1];
342  y[2] = fw;
343  } else {
344  y[0] = -fq[0];
345  y[1] = -fq[1];
346  y[2] = -fw;
347  }
348  }
349  return n&7;
350 }
static const double two24
Definition: kremp2.c:72
static const double twon24
Definition: kremp2.c:73
double scalbn(double x, int n)
static const int init_jk[]
Definition: kremp2.c:56
static const double PIo2[]
Definition: kremp2.c:58
static const double zero
Definition: kremp2.c:70
static const double one
Definition: kremp2.c:71
double floor(double x)
Floor function.
Definition: floor.c:62
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ init_jk

const int init_jk[] = {2,3,4,6}
static

Definition at line 56 of file kremp2.c.

Referenced by __kernel_rem_pio2().

◆ one

const double one = 1.0
static

Definition at line 71 of file kremp2.c.

Referenced by __kernel_rem_pio2().

◆ PIo2

const double PIo2[]
static
Initial value:
= {
1.57079625129699707031e+00,
7.54978941586159635335e-08,
5.39030252995776476554e-15,
3.28200341580791294123e-22,
1.27065575308067607349e-29,
1.22933308981111328932e-36,
2.73370053816464559624e-44,
2.16741683877804819444e-51,
}

Definition at line 58 of file kremp2.c.

Referenced by __kernel_rem_pio2().

◆ two24

const double two24 = 1.67772160000000000000e+07
static

Definition at line 72 of file kremp2.c.

Referenced by __kernel_rem_pio2().

◆ twon24

const double twon24 = 5.96046447753906250000e-08
static

Definition at line 73 of file kremp2.c.

Referenced by __kernel_rem_pio2().

◆ zero

const double zero = 0.0
static

Definition at line 70 of file kremp2.c.

Referenced by __kernel_rem_pio2().