1 module tukey;
2 
3 import rmathd.common;
4 import rmathd.normal;
5 
6 
7 /* 
8 ** t.d beta.d chisq.d
9 ** dmd tukey.d common.d normal.d && ./tukey
10 */
11 
12 
13 static T wprob(T)(T w, T rr, T cc)
14 {
15 /*  wprob() :
16 
17 	This function calculates probability integral of Hartley's
18 	form of the range.
19 
20 	w     = value of range
21 	rr    = no. of rows or groups
22 	cc    = no. of columns or treatments
23 	ir    = error flag = 1 if pr_w probability > 1
24 	pr_w = returned probability integral from (0, w)
25 
26 	program will not terminate if ir is raised.
27 
28 	bb = upper limit of legendre integration
29 	iMax = maximum acceptable value of integral
30 	nleg = order of legendre quadrature
31 	ihalf = int ((nleg + 1) / 2)
32 	wlar = value of range above which wincr1 intervals are used to
33 	       calculate second part of integral,
34 	       else wincr2 intervals are used.
35 	C1, C2, C3 = values which are used as cutoffs for terminating
36 	or modifying a calculation.
37 
38 	M_1_SQRT_2PI = 1 / sqrt(2 * pi);  from abramowitz & stegun, p. 3.
39 	M_SQRT2 = sqrt(2)
40 	xleg = legendre 12-point nodes
41 	aleg = legendre 12-point coefficients
42  */
43     enum nleg  = 12;
44     enum ihalf = 6;
45 
46     /* looks like this is suboptimal for double precision.
47        (see how C1-C3 are used) <MM>
48     */
49     /* const double iMax  = 1.; not used if = 1*/
50     const static T C1 = -30.;
51     const static T C2 = -50.;
52     const static T C3 = 60.;
53     const static T bb   = 8.;
54     const static T wlar = 3.;
55     const static T wincr1 = 2.;
56     const static T wincr2 = 3.;
57     const static T[ihalf] xleg = [
58 	0.981560634246719250690549090149,
59 	0.904117256370474856678465866119,
60 	0.769902674194304687036893833213,
61 	0.587317954286617447296702418941,
62 	0.367831498998180193752691536644,
63 	0.125233408511468915472441369464
64     ];
65     const static T[ihalf] aleg = [
66 	0.047175336386511827194615961485,
67 	0.106939325995318430960254718194,
68 	0.160078328543346226334652529543,
69 	0.203167426723065921749064455810,
70 	0.233492536538354808760849898925,
71 	0.249147045813402785000562436043
72     ];
73     T a, ac, pr_w, b, binc, c, cc1,
74 	pminus, pplus, qexpo, qsqz, rinsum, wi, wincr, xx;
75     real blb, bub, einsum, elsum;
76     int j, jj;
77 
78 
79     qsqz = w * 0.5;
80 
81     /* if w >= 16 then the integral lower bound (occurs for c=20) */
82     /* is 0.99999999999995 so return a value of 1. */
83 
84     if (qsqz >= bb)
85 	    return 1.0;
86 
87     /* find (f(w/2) - 1) ^ cc */
88     /* (first term in integral of hartley's form). */
89 
90     pr_w = 2 * pnorm!T(qsqz, 0.,1., 1,0) - 1.; /* erf(qsqz / M_SQRT2) */
91     /* if pr_w ^ cc < 2e-22 then set pr_w = 0 */
92     if (pr_w >= exp(C2 / cc))
93 	    pr_w = pow(pr_w, cc);
94     else
95 	    pr_w = 0.0;
96 
97     /* if w is large then the second component of the */
98     /* integral is small, so fewer intervals are needed. */
99 
100     if (w > wlar)
101 	    wincr = wincr1;
102     else
103 	    wincr = wincr2;
104 
105     /* find the integral of second term of hartley's form */
106     /* for the integral of the range for equal-length */
107     /* intervals using legendre quadrature.  limits of */
108     /* integration are from (w/2, 8).  two or three */
109     /* equal-length intervals are used. */
110 
111     /* blb and bub are lower and upper limits of integration. */
112 
113     blb = qsqz;
114     binc = (bb - qsqz) / wincr;
115     bub = blb + binc;
116     einsum = 0.0;
117 
118     /* integrate over each interval */
119 
120     cc1 = cc - 1.0;
121     for (wi = 1; wi <= wincr; wi++) {
122 	elsum = 0.0;
123 	a = cast(T)(0.5 * (bub + blb));
124 
125 	/* legendre quadrature with order = nleg */
126 
127 	b = cast(T)(0.5 * (bub - blb));
128 
129 	for (jj = 1; jj <= nleg; jj++) {
130 	    if (ihalf < jj) {
131 		j = (nleg - jj) + 1;
132 		xx = xleg[j-1];
133 	    } else {
134 		j = jj;
135 		xx = -xleg[j-1];
136 	    }
137 	    c = b * xx;
138 	    ac = a + c;
139 
140 	    /* if exp(-qexpo/2) < 9e-14, */
141 	    /* then doesn't contribute to integral */
142 
143 	    qexpo = ac * ac;
144 	    if (qexpo > C3)
145 		break;
146 
147 	    pplus = 2 * pnorm!T(ac, 0., 1., 1,0);
148 	    pminus= 2 * pnorm!T(ac, w,  1., 1,0);
149 
150 	    /* if rinsum ^ (cc-1) < 9e-14, */
151 	    /* then doesn't contribute to integral */
152 
153 	    rinsum = (pplus * 0.5) - (pminus * 0.5);
154 	    if (rinsum >= exp(C1 / cc1)) {
155 		rinsum = (aleg[j-1] * exp(-(0.5 * qexpo))) * pow(rinsum, cc1);
156 		elsum += rinsum;
157 	    }
158 	}
159 	elsum *= (((2.0 * b) * cc) * M_1_SQRT_2PI);
160 	einsum += elsum;
161 	blb = bub;
162 	bub += binc;
163     }
164 
165     /* if pr_w ^ rr < 9e-14, then return 0 */
166     pr_w += cast(T) einsum;
167     if (pr_w <= exp(C1 / rr))
168 	return 0.;
169 
170     pr_w = pow(pr_w, rr);
171     if (pr_w >= 1.)/* 1 was iMax was eps */
172 	return 1.;
173     return pr_w;
174 } /* wprob() */
175 
176 
177 /* Returns incorrect answers! */
178 T ptukey(T)(T q, T rr, T cc, T df, int lower_tail, int log_p)
179 {
180 /*  function ptukey() [was qprob() ]:
181 
182 	q = value of studentized range
183 	rr = no. of rows or groups
184 	cc = no. of columns or treatments
185 	df = degrees of freedom of error term
186 	ir[0] = error flag = 1 if wprob probability > 1
187 	ir[1] = error flag = 1 if qprob probability > 1
188 
189 	qprob = returned probability integral over [0, q]
190 
191 	The program will not terminate if ir[0] or ir[1] are raised.
192 
193 	All references in wprob to Abramowitz and Stegun
194 	are from the following reference:
195 
196 	Abramowitz, Milton and Stegun, Irene A.
197 	Handbook of Mathematical Functions.
198 	New York:  Dover publications, Inc. (1970).
199 
200 	All constants taken from this text are
201 	given to 25 significant digits.
202 
203 	nlegq = order of legendre quadrature
204 	ihalfq = int ((nlegq + 1) / 2)
205 	eps = max. allowable value of integral
206 	eps1 & eps2 = values below which there is
207 		      no contribution to integral.
208 
209 	d.f. <= dhaf:	integral is divided into ulen1 length intervals.  else
210 	d.f. <= dquar:	integral is divided into ulen2 length intervals.  else
211 	d.f. <= deigh:	integral is divided into ulen3 length intervals.  else
212 	d.f. <= dlarg:	integral is divided into ulen4 length intervals.
213 
214 	d.f. > dlarg:	the range is used to calculate integral.
215 
216 	M_LN2 = log(2)
217 
218 	xlegq = legendre 16-point nodes
219 	alegq = legendre 16-point coefficients
220 
221 	The coefficients and nodes for the legendre quadrature used in
222 	qprob and wprob were calculated using the algorithms found in:
223 
224 	Stroud, A. H. and Secrest, D.
225 	Gaussian Quadrature Formulas.
226 	Englewood Cliffs,
227 	New Jersey:  Prentice-Hall, Inc, 1966.
228 
229 	All values matched the tables (provided in same reference)
230 	to 30 significant digits.
231 
232 	f(x) = .5 + erf(x / sqrt(2)) / 2      for x > 0
233 
234 	f(x) = erfc( -x / sqrt(2)) / 2	      for x < 0
235 
236 	where f(x) is standard normal c. d. f.
237 
238 	if degrees of freedom large, approximate integral
239 	with range distribution.
240  */
241     enum nlegq  = 16;
242     enum ihalfq = 8;
243 
244 /*  const double eps = 1.0; not used if = 1 */
245     const static T eps1 = -30.0;
246     const static T eps2 = 1.0e-14;
247     const static T dhaf  = 100.0;
248     const static T dquar = 800.0;
249     const static T deigh = 5000.0;
250     const static T dlarg = 25000.0;
251     const static T ulen1 = 1.0;
252     const static T ulen2 = 0.5;
253     const static T ulen3 = 0.25;
254     const static T ulen4 = 0.125;
255     const static T[ihalfq] xlegq = [
256 	0.989400934991649932596154173450,
257 	0.944575023073232576077988415535,
258 	0.865631202387831743880467897712,
259 	0.755404408355003033895101194847,
260 	0.617876244402643748446671764049,
261 	0.458016777657227386342419442984,
262 	0.281603550779258913230460501460,
263 	0.950125098376374401853193354250e-1
264     ];
265     const static T[ihalfq] alegq = [
266 	0.271524594117540948517805724560e-1,
267 	0.622535239386478928628438369944e-1,
268 	0.951585116824927848099251076022e-1,
269 	0.124628971255533872052476282192,
270 	0.149595988816576732081501730547,
271 	0.169156519395002538189312079030,
272 	0.182603415044923588866763667969,
273 	0.189450610455068496285396723208
274     ];
275     T ans, f2, f21, f2lf, ff4, otsum, qsqz, rotsum, t1, twa1, ulen, wprb;
276     int i, j, jj;
277 
278     if (isNaN(q) || isNaN(rr) || isNaN(cc) || isNaN(df))
279 	    return T.nan;
280 
281     if (q <= 0)
282 	    return R_DT_0!T(lower_tail, log_p);
283 
284     /* df must be > 1 */
285     /* there must be at least two values */
286 
287     if (df < 2 || rr < 1 || cc < 2)
288     	return T.nan;
289 
290     if(!isFinite(q))
291 	    return R_DT_1!T(lower_tail, log_p);
292 
293     if (df > dlarg)
294 	    return R_DT_val!T(wprob!T(q, rr, cc), lower_tail, log_p);
295 
296     /* calculate leading constant */
297 
298     f2 = df * 0.5;
299     /* lgammafn(u) = log(gamma(u)) */
300     f2lf = ((f2 * log(df)) - (df * M_LN2)) - lgammafn!T(f2);
301     f21 = f2 - 1.0;
302 
303     /* integral is divided into unit, half-unit, quarter-unit, or */
304     /* eighth-unit length intervals depending on the value of the */
305     /* degrees of freedom. */
306 
307     ff4 = df * 0.25;
308     if(df <= dhaf)
309     	ulen = ulen1;
310     else if(df <= dquar)
311     	ulen = ulen2;
312     else if(df <= deigh)
313     	ulen = ulen3;
314     else
315     	ulen = ulen4;
316 
317     f2lf += log(ulen);
318 
319     /* integrate over each subinterval */
320 
321     ans = 0.0;
322 
323     for (i = 1; i <= 50; i++) {
324 	otsum = 0.0;
325 
326 	/* legendre quadrature with order = nlegq */
327 	/* nodes (stored in xlegq) are symmetric around zero. */
328 
329 	twa1 = (2 * i - 1) * ulen;
330 
331 	for (jj = 1; jj <= nlegq; jj++) {
332 	    if (ihalfq < jj) {
333 		j = jj - ihalfq - 1;
334 		t1 = (f2lf + (f21 * log(twa1 + (xlegq[j] * ulen))))
335 		    - (((xlegq[j] * ulen) + twa1) * ff4);
336 	    } else {
337 		j = jj - 1;
338 		t1 = (f2lf + (f21 * log(twa1 - (xlegq[j] * ulen))))
339 		    + (((xlegq[j] * ulen) - twa1) * ff4);
340 
341 	    }
342 
343 	    /* if exp(t1) < 9e-14, then doesn't contribute to integral */
344 	    if (t1 >= eps1) {
345 		if (ihalfq < jj) {
346 		    qsqz = q * sqrt(((xlegq[j] * ulen) + twa1) * 0.5);
347 		} else {
348 		    qsqz = q * sqrt(((-(xlegq[j] * ulen)) + twa1) * 0.5);
349 		}
350 
351 		/* call wprob to find integral of range portion */
352 
353 		wprb = wprob!T(qsqz, rr, cc);
354 		rotsum = (wprb * alegq[j]) * exp(t1);
355 		otsum += rotsum;
356 	    }
357 	    /* end legendre integral for interval i */
358 	    /* L200: */
359 	}
360 
361 	/* if integral for interval i < 1e-14, then stop.
362 	 * However, in order to avoid small area under left tail,
363 	 * at least  1 / ulen  intervals are calculated.
364 	 */
365 	if (i * ulen >= 1.0 && otsum <= eps2)
366 	    break;
367 
368 	/* end of interval i */
369 	/* L330: */
370 
371 	ans += otsum;
372     }
373 
374     if(otsum > eps2) { /* not converged */
375 	    //ML_ERROR(ME_PRECISION, "ptukey");
376 	    assert(0, "Precision ptukey");
377     }
378     if (ans > 1.)
379 	    ans = 1.;
380     return R_DT_val!T(ans, lower_tail, log_p);
381 }
382 
383 
384 /* qinv() :
385  *	this function finds percentage point of the studentized range
386  *	which is used as initial estimate for the secant method.
387  *	function is adapted from portion of algorithm as 70
388  *	from applied statistics (1974) ,vol. 23, no. 1
389  *	by odeh, r. e. and evans, j. o.
390  *
391  *	  p = percentage point
392  *	  c = no. of columns or treatments
393  *	  v = degrees of freedom
394  *	  qinv = returned initial estimate
395  *
396  *	vmax is cutoff above which degrees of freedom
397  *	is treated as infinity.
398  */
399 
400 static T qinv(T)(T p, T c, T v)
401 {
402     const static T p0 = 0.322232421088;
403     const static T q0 = 0.993484626060e-01;
404     const static T p1 = -1.0;
405     const static T q1 = 0.588581570495;
406     const static T p2 = -0.342242088547;
407     const static T q2 = 0.531103462366;
408     const static T p3 = -0.204231210125;
409     const static T q3 = 0.103537752850;
410     const static T p4 = -0.453642210148e-04;
411     const static T q4 = 0.38560700634e-02;
412     const static T c1 = 0.8832;
413     const static T c2 = 0.2368;
414     const static T c3 = 1.214;
415     const static T c4 = 1.208;
416     const static T c5 = 1.4142;
417     const static T vmax = 120.0;
418 
419     T ps, q, t, yi;
420 
421     ps = 0.5 - 0.5 * p;
422     yi = sqrt (log (1.0 / (ps * ps)));
423     t = yi + (((( yi * p4 + p3) * yi + p2) * yi + p1) * yi + p0)
424 	   / (((( yi * q4 + q3) * yi + q2) * yi + q1) * yi + q0);
425     if (v < vmax) t += (t * t * t + t) / v / 4.0;
426     q = c1 - c2 * t;
427     if (v < vmax) q += -c3 / v + c4 * t / v;
428     return t * (q * log (c - 1.0) + c5);
429 }
430 
431 /*
432  *  Copenhaver, Margaret Diponzio & Holland, Burt S.
433  *  Multiple comparisons of simple effects in
434  *  the two-way analysis of variance with fixed effects.
435  *  Journal of Statistical Computation and Simulation,
436  *  Vol.30, pp.1-15, 1988.
437  *
438  *  Uses the secant method to find critical values.
439  *
440  *  p = confidence level (1 - alpha)
441  *  rr = no. of rows or groups
442  *  cc = no. of columns or treatments
443  *  df = degrees of freedom of error term
444  *
445  *  ir(1) = error flag = 1 if wprob probability > 1
446  *  ir(2) = error flag = 1 if ptukey probability > 1
447  *  ir(3) = error flag = 1 if convergence not reached in 50 iterations
448  *		       = 2 if df < 2
449  *
450  *  qtukey = returned critical value
451  *
452  *  If the difference between successive iterates is less than eps,
453  *  the search is terminated
454  */
455 
456 import std.stdio: writeln;
457 T qtukey(T)(T p, T rr, T cc, T df, int lower_tail, int log_p)
458 {
459     const static T eps = 0.0001;
460     const int maxiter = 50;
461 
462     T ans = 0.0, valx0, valx1, x0, x1, xabs;
463     int iter;
464 
465     if (isNaN(p) || isNaN(rr) || isNaN(cc) || isNaN(df)) {
466 	    //ML_ERROR(ME_DOMAIN, "qtukey");
467 	    return p + rr + cc + df;
468     }
469 
470     /* df must be > 1 ; there must be at least two values */
471     if (df < 2 || rr < 1 || cc < 2)
472     	return T.nan;
473 
474     immutable(T) INF = T.infinity;
475     mixin (R_Q_P01_boundaries!(p, 0, INF));
476 
477     mixin R_DT_qIv!p;
478     p = R_DT_qIv; /* lower_tail,non-log "p" */
479 
480     /* Initial value */
481 
482     x0 = qinv!T(p, cc, df);
483 
484     /* Find prob(value < x0) */
485 
486     valx0 = ptukey!T(x0, rr, cc, df, /*LOWER*/1, /*LOG_P*/0) - p;
487 
488     /* Find the second iterate and prob(value < x1). */
489     /* If the first iterate has probability value */
490     /* exceeding p then second iterate is 1 less than */
491     /* first iterate; otherwise it is 1 greater. */
492 
493     if (valx0 > 0.0)
494 	    x1 = fmax2!T(0.0, x0 - 1.0);
495     else
496 	    x1 = x0 + 1.0;
497     valx1 = ptukey!T(x1, rr, cc, df, /*LOWER*/1, /*LOG_P*/0) - p;
498 
499     /* Find new iterate */
500 
501     for(iter = 1 ; iter < maxiter ; iter++) {
502 	ans = x1 - ((valx1 * (x1 - x0)) / (valx1 - valx0));
503 	valx0 = valx1;
504 
505 	/* New iterate must be >= 0 */
506 
507 	x0 = x1;
508 	if (ans < 0.0) {
509 	    ans = 0.0;
510 	    valx1 = -p;
511 	}
512 	/* Find prob(value < new iterate) */
513 
514 	valx1 = ptukey!T(ans, rr, cc, df, /*LOWER*/1, /*LOG_P*/0) - p;
515 	x1 = ans;
516 
517 	/* If the difference between two successive */
518 	/* iterates is less than eps, stop */
519 
520 	xabs = fabs(x1 - x0);
521 	if (xabs < eps)
522 	    return ans;
523     }
524 
525     /* The process did not converge in 'maxiter' iterations */
526     //ML_ERROR(ME_NOCONV, "qtukey");
527     return ans;
528 }
529 
530 
531 
532 
533 void test_tukey()
534 {
535 	import std.stdio: writeln;
536 	writeln("wprob: ", wprob(0.5, 2., 3.));
537 	writeln("ptukey: ", ptukey(8., 2., 3., 5., 1, 0));
538 	writeln("qtukey: ", qtukey(0.7, 2., 3., 5., 1, 0));
539 }
540