1 module rmathd.nchisq; 2 3 public import rmathd.common; 4 public import rmathd.gamma; 5 public import rmathd.chisq; 6 public import rmathd.poisson; 7 //import rmathd.normal; 8 9 /* 10 ** dmd nchisq.d common.d normal.d chisq.d && ./nchisq 11 */ 12 13 /* 14 * Compute the log of a sum from logs of terms, i.e., 15 * 16 * log (exp (logx) + exp (logy)) 17 * 18 * without causing overflows and without throwing away large handfuls 19 * of accuracy. 20 */ 21 T logspace_add(T)(T logx, T logy) 22 { 23 return fmax2!T(logx, logy) + log1p(exp(-fabs(logx - logy))); 24 } 25 26 static const double _dbl_min_exp = M_LN2 * DBL_MIN_EXP; 27 T pnchisq_raw(T)(T x, T f, T theta /* = ncp */, 28 T errmax, T reltol, int itrmax, 29 int lower_tail, int log_p) 30 { 31 T lam, x2, f2, term, bound, f_x_2n, f_2n; 32 T l_lam = -1., l_x = -1.; /* initialized for -Wall */ 33 int n; 34 //Rboolean 35 int lamSml, tSml, is_r, is_b, is_it; 36 real ans, u, v, t, lt, lu = -1; 37 38 if (x <= 0.) { 39 if(x == 0. && f == 0.) { 40 return lower_tail ? R_D_exp!T(-0.5*theta, log_p) : (log_p ? R_Log1_Exp!T(-0.5*theta) : -expm1(-0.5*theta)); 41 } 42 /* x < 0 or {x==0, f > 0} */ 43 return R_DT_0!T(lower_tail, log_p); 44 } 45 if(!isFinite(x)) 46 return R_DT_1!T(lower_tail, log_p); 47 48 /* This is principally for use from qnchisq */ 49 50 if(theta < 80) { /* use 110 for Inf, as ppois(110, 80/2, lower.tail=FALSE) is 2e-20 */ 51 //real ans; 52 int i; 53 // Have pgamma(x,s) < x^s / Gamma(s+1) (< and ~= for small x) 54 // ==> pchisq(x, f) = pgamma(x, f/2, 2) = pgamma(x/2, f/2) 55 // < (x/2)^(f/2) / Gamma(f/2+1) < eps 56 // <==> f/2 * log(x/2) - log(Gamma(f/2+1)) < log(eps) ( ~= -708.3964 ) 57 // <==> log(x/2) < 2/f*(log(Gamma(f/2+1)) + log(eps)) 58 // <==> log(x) < log(2) + 2/f*(log(Gamma(f/2+1)) + log(eps)) 59 if(lower_tail && f > 0. && log(x) < M_LN2 + 2/f*(lgamma!T(f/2. + 1) + _dbl_min_exp)) { 60 // all pchisq(x, f+2*i, lower_tail, FALSE), i=0,...,110 would underflow to 0. 61 // ==> work in log scale 62 T lambda = 0.5 * theta; 63 T sum, sum2, pr = -lambda; 64 sum = sum2 = -T.infinity; 65 /* we need to renormalize here: the result could be very close to 1 */ 66 for(i = 0; i < 110; pr += log(lambda) - log(++i)) { 67 sum2 = logspace_add!T(sum2, pr); 68 sum = logspace_add!T(sum, pr + pchisq!T(x, f + 2*i, lower_tail, 1)); 69 if (sum2 >= -1e-15) /*<=> EXP(sum2) >= 1-1e-15 */ 70 break; 71 } 72 ans = sum - sum2; 73 return cast(T) (log_p ? ans : exp(ans)); 74 } 75 else { 76 real lambda = 0.5 * theta; 77 real sum = 0, sum2 = 0, pr = exp(-lambda); // does this need a feature test? 78 /* we need to renormalize here: the result could be very close to 1 */ 79 for(i = 0; i < 110; pr *= lambda / ++i) { 80 // pr == exp(-lambda) lambda^i / i! == dpois(i, lambda) 81 sum2 += pr; 82 // pchisq(*, i, *) is strictly decreasing to 0 for lower_tail=TRUE 83 // and strictly increasing to 1 for lower_tail=FALSE 84 sum += pr * pchisq!T(x, f + 2*i, lower_tail, 0); 85 if (sum2 >= 1-1e-15) 86 break; 87 } 88 ans = sum/sum2; 89 return cast(T) (log_p ? log(ans) : ans); 90 } 91 } // if(theta < 80) 92 93 // else: theta == ncp >= 80 -------------------------------------------- 94 // Series expansion ------- FIXME: log_p=TRUE, lower_tail=FALSE only applied at end 95 96 lam = .5 * theta; 97 lamSml = (-lam < _dbl_min_exp); 98 if(lamSml) { 99 /* MATHLIB_ERROR( 100 "non centrality parameter (= %g) too large for current algorithm", 101 theta) */ 102 u = 0; 103 lu = -lam;/* == ln(u) */ 104 l_lam = log(lam); 105 } else { 106 u = exp(-lam); 107 } 108 109 /* evaluate the first term */ 110 v = u; 111 x2 = .5 * x; 112 f2 = .5 * f; 113 f_x_2n = f - x; 114 115 if(f2 * DBL_EPSILON > 0.125 && /* very large f and x ~= f: probably needs */ 116 fabs(t = x2 - f2) < /* another algorithm anyway */ 117 sqrt(DBL_EPSILON) * f2) { 118 /* evade cancellation error */ 119 /* t = exp((1 - t)*(2 - t/(f2 + 1))) / sqrt(2*M_PI*(f2 + 1));*/ 120 lt = (1 - t)*(2 - t/(f2 + 1)) - M_LN_SQRT_2PI - 0.5 * log(f2 + 1); 121 } 122 else { 123 /* Usual case 2: careful not to overflow .. : */ 124 lt = f2*log(x2) -x2 - lgammafn!T(f2 + 1); 125 } 126 127 tSml = (lt < _dbl_min_exp); 128 if(tSml) { 129 if (x > f + theta + 5* sqrt( 2*(f + 2*theta))) { 130 /* x > E[X] + 5* sigma(X) */ 131 return R_DT_1!T(lower_tail, log_p); /* FIXME: could be more accurate than 0. */ 132 } /* else */ 133 l_x = log(x); 134 ans = term = 0.; t = 0; 135 } 136 else { 137 t = exp(lt); 138 ans = term = cast(T) (v * t); 139 } 140 141 for (n = 1, f_2n = f + 2., f_x_2n += 2.; ; n++, f_2n += 2, f_x_2n += 2) { 142 /* f_2n === f + 2*n 143 * f_x_2n === f - x + 2*n > 0 <==> (f+2n) > x */ 144 if (f_x_2n > 0) { 145 /* find the error bound and check for convergence */ 146 bound = cast(T) (t*x / f_x_2n); 147 is_r = is_it = 0; 148 /* convergence only if BOTH absolute and relative error < 'bnd' */ 149 is_b = (bound <= errmax); is_r = (term <= reltol * ans); is_it = (n > itrmax); 150 if((is_b && is_r) || is_it) 151 { 152 break; /* out completely */ 153 } 154 } 155 156 /* evaluate the next term of the */ 157 /* expansion and then the partial sum */ 158 159 if(lamSml) { 160 lu += l_lam - log(n); /* u = u* lam / n */ 161 if(lu >= _dbl_min_exp) { 162 /* no underflow anymore ==> change regime */ 163 164 v = u = exp(lu); /* the first non-0 'u' */ 165 lamSml = 0; 166 } 167 } else { 168 u *= lam / n; 169 v += u; 170 } 171 if(tSml) { 172 lt += l_x - log(f_2n);/* t <- t * (x / f2n) */ 173 if(lt >= _dbl_min_exp) { 174 /* no underflow anymore ==> change regime */ 175 t = exp(lt); /* the first non-0 't' */ 176 tSml = 0; 177 } 178 } else { 179 t *= x / f_2n; 180 } 181 if(!lamSml && !tSml) { 182 term = cast(T) (v * t); 183 ans += term; 184 } 185 186 } /* for(n ...) */ 187 188 if (is_it) { 189 doNothing(); 190 } 191 T dans = cast(T) ans; 192 return R_DT_val!T(dans, lower_tail, log_p); 193 } 194 195 196 T pnchisq(T: double)(T x, T df, T ncp, int lower_tail, int log_p) 197 { 198 T ans; 199 if (isNaN(x) || isNaN(df) || isNaN(ncp)) 200 return x + df + ncp; 201 if (!isFinite(df) || !isFinite(ncp)) 202 return T.nan; 203 204 if (df < 0. || ncp < 0.) 205 return T.nan; 206 207 mixin R_D__1!log_p; 208 ans = pnchisq_raw!T(x, df, ncp, 1e-12, 8*DBL_EPSILON, 1000000, lower_tail, log_p); 209 if(ncp >= 80) { 210 if(lower_tail) { 211 ans = fmin2!T(ans, R_D__1); /* e.g., pchisq(555, 1.01, ncp = 80) */ 212 } else { /* !lower_tail */ 213 /* since we computed the other tail cancellation is likely */ 214 if(ans < (log_p ? (-10. * M_LN10) : 1e-10)){ 215 doNothing(); 216 } 217 if(!log_p) ans = fmax2!T(ans, 0.0); /* Precaution PR#7099 */ 218 } 219 } 220 if (!log_p || ans < -1e-8) 221 return ans; 222 else { // log_p && ans > -1e-8 223 // prob. = exp(ans) is near one: we can do better using the other tail 224 // FIXME: (sum,sum2) will be the same (=> return them as well and reuse here ?) 225 ans = pnchisq_raw!T(x, df, ncp, 1e-12, 8*DBL_EPSILON, 1000000, !lower_tail, 0); 226 return log1p(-ans); 227 } 228 } 229 230 231 T dnchisq(T: double)(T x, T df, T ncp, int give_log) 232 { 233 const static T eps = 5e-15; 234 235 T i, ncp2, q, mid, dfmid, imax; 236 real sum, term; 237 238 if (isNaN(x) || isNaN(df) || isNaN(ncp)) 239 return x + df + ncp; 240 241 if (!isFinite(df) || !isFinite(ncp) || ncp < 0 || df < 0) 242 return T.nan; 243 244 mixin R_D__0!give_log; 245 246 if(x < 0) 247 return R_D__0; 248 if(x == 0 && df < 2.) 249 return T.infinity; 250 if(ncp == 0) 251 return (df > 0) ? dchisq!T(x, df, give_log) : R_D__0; 252 if(x == T.infinity) 253 return R_D__0; 254 255 ncp2 = 0.5 * ncp; 256 257 /* find max element of sum */ 258 imax = ceil((-(2 + df) + sqrt((2 - df) * (2 - df) + 4*ncp*x))/4); 259 if (imax < 0) 260 imax = 0; 261 if(isFinite(imax)) { 262 dfmid = df + 2 * imax; 263 mid = dpois_raw!T(imax, ncp2, 0) * dchisq!T(x, dfmid, 0); 264 } else /* imax = Inf */ 265 mid = 0; 266 267 if(mid == 0) { 268 /* underflow to 0 -- maybe numerically correct; maybe can be more accurate, 269 * particularly when give_log = TRUE */ 270 /* Use central-chisq approximation formula when appropriate; 271 * ((FIXME: the optimal cutoff also depends on (x,df); use always here? )) */ 272 if(give_log || ncp > 1000.) { 273 T nl = df + ncp, ic = nl/(nl + ncp);/* = "1/(1+b)" Abramowitz & St.*/ 274 return dchisq!T(x*ic, nl*ic, give_log); 275 } else 276 return R_D__0; 277 } 278 279 sum = mid; 280 281 /* errorbound := term * q / (1-q) now subsumed in while() / if() below: */ 282 283 /* upper tail */ 284 term = mid; df = dfmid; i = imax; 285 T x2 = x * ncp2; 286 do { 287 i++; 288 q = x2 / i / df; 289 df += 2; 290 term *= q; 291 sum += term; 292 } while (q >= 1 || term * q > (1 - q)*eps || term > 1e-10*sum); 293 /* lower tail */ 294 term = mid; df = dfmid; i = imax; 295 while (i != 0) { 296 df -= 2; 297 q = i * df / x2; 298 i--; 299 term *= q; 300 sum += term; 301 if (q < 1 && term * q <= (1 - q)*eps) 302 break; 303 } 304 return R_D_val!T(cast(T)sum, give_log); 305 } 306 307 308 T qnchisq(T: double)(T p, T df, T ncp, int lower_tail, int log_p) 309 { 310 const static T accu = 1e-13; 311 const static T racc = 4*DBL_EPSILON; 312 /* these two are for the "search" loops, can have less accuracy: */ 313 const static T Eps = 1e-11; /* must be > accu */ 314 const static T rEps= 1e-10; /* relative tolerance ... */ 315 316 T ux, lx, ux0, nx, pp; 317 318 319 if (isNaN(p) || isNaN(df) || isNaN(ncp)) 320 return p + df + ncp; 321 322 if (!isFinite(df)) 323 return T.nan; 324 325 /* Was 326 * df = floor(df + 0.5); 327 * if (df < 1 || ncp < 0) ML_ERR_return_NAN; 328 */ 329 if (df < 0 || ncp < 0) 330 return T.nan; 331 332 immutable T INF = T.infinity; 333 mixin (R_Q_P01_boundaries!(p, 0, INF)()); 334 335 pp = R_D_qIv!T(p, log_p); 336 if(pp > 1 - DBL_EPSILON) 337 return lower_tail ? T.infinity : 0.0; 338 339 /* Invert pnchisq(.) : 340 * 1. finding an upper and lower bound */ 341 { 342 /* This is Pearson's (1959) approximation, 343 which is usually good to 4 figs or so. */ 344 T b, c, ff; 345 b = (ncp*ncp)/(df + 3*ncp); 346 c = (df + 3*ncp)/(df + 2*ncp); 347 ff = (df + 2 * ncp)/(c*c); 348 ux = b + c * qchisq!T(p, ff, lower_tail, log_p); 349 if(ux < 0) ux = 1; 350 ux0 = ux; 351 } 352 353 if(!lower_tail && ncp >= 80) { 354 /* in this case, pnchisq() works via lower_tail = TRUE */ 355 if(pp < 1e-10){ 356 doNothing(); 357 } 358 p = /* R_DT_qIv(p)*/ log_p ? -expm1(p) : (0.5 - (p) + 0.5); 359 lower_tail = 1; 360 } else { 361 p = pp; 362 } 363 364 pp = fmin2!T(1 - DBL_EPSILON, p * (1 + Eps)); 365 if(lower_tail) { 366 for(; ux < DBL_MAX && 367 pnchisq_raw!T(ux, df, ncp, Eps, rEps, 10000, 1, 0) < pp; 368 ux *= 2){} 369 pp = p * (1 - Eps); 370 for(lx = fmin2!T(ux0, DBL_MAX); 371 lx > DBL_MIN && 372 pnchisq_raw!T(lx, df, ncp, Eps, rEps, 10000, 1, 0) > pp; 373 lx *= 0.5){} 374 } 375 else { 376 for(; ux < DBL_MAX && 377 pnchisq_raw!T(ux, df, ncp, Eps, rEps, 10000, 0, 0) > pp; 378 ux *= 2){} 379 pp = p * (1 - Eps); 380 for(lx = fmin2!T(ux0, DBL_MAX); 381 lx > DBL_MIN && pnchisq_raw!T(lx, df, ncp, Eps, rEps, 10000, 0, 0) < pp; 382 lx *= 0.5){} 383 } 384 385 /* 2. interval (lx,ux) halving : */ 386 if(lower_tail) { 387 do { 388 nx = 0.5 * (lx + ux); 389 if (pnchisq_raw!T(nx, df, ncp, accu, racc, 100000, 1, 0) > p) 390 ux = nx; 391 else 392 lx = nx; 393 } 394 while ((ux - lx) / nx > accu); 395 } else { 396 do { 397 nx = 0.5 * (lx + ux); 398 if (pnchisq_raw!T(nx, df, ncp, accu, racc, 100000, 0, 0) < p) 399 ux = nx; 400 else 401 lx = nx; 402 } 403 while ((ux - lx) / nx > accu); 404 } 405 return 0.5 * (ux + lx); 406 } 407 408 409 T rnchisq(T: double)(T df, T lambda) 410 { 411 if (!isFinite(df) || !isFinite(lambda) || df < 0. || lambda < 0.) 412 return T.nan; 413 414 if(lambda == 0.) { 415 return (df == 0.) ? 0. : rgamma!T(df / 2., 2.); 416 } else { 417 T r = rpois!T(lambda / 2.); 418 if(r > 0.) r = rchisq!T(2. * r); 419 if(df > 0.) r += rgamma!T(df / 2., 2.); 420 return r; 421 } 422 } 423 424 425 426 void test_nchisq() 427 { 428 import std.stdio: writeln; 429 writeln("dnchisq: ", dnchisq(2., 3., 7., 0)); 430 writeln("pnchisq: ", pnchisq(10., 3., 7., 1, 0)); 431 writeln("qnchisq: ", qnchisq(.6, 3., 7., 1, 0)); 432 writeln("rnchisq: ", rnchisq(3., 7.), ", rnchisq: ", rnchisq(3., 7.) , ", rnchisq: ", rnchisq(3., 7.)); 433 } 434