001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math3.stat.regression; 018 019 import java.util.Arrays; 020 import org.apache.commons.math3.exception.util.LocalizedFormats; 021 import org.apache.commons.math3.util.FastMath; 022 import org.apache.commons.math3.util.Precision; 023 import org.apache.commons.math3.util.MathArrays; 024 025 /** 026 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface. 027 * 028 * <p>The algorithm is described in: <pre> 029 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman 030 * Author(s): Alan J. Miller 031 * Source: Journal of the Royal Statistical Society. 032 * Series C (Applied Statistics), Vol. 41, No. 2 033 * (1992), pp. 458-478 034 * Published by: Blackwell Publishing for the Royal Statistical Society 035 * Stable URL: http://www.jstor.org/stable/2347583 </pre></p> 036 * 037 * <p>This method for multiple regression forms the solution to the OLS problem 038 * by updating the QR decomposition as described by Gentleman.</p> 039 * 040 * @version $Id: MillerUpdatingRegression.java 1392358 2012-10-01 14:41:55Z psteitz $ 041 * @since 3.0 042 */ 043 public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression { 044 045 /** number of variables in regression */ 046 private final int nvars; 047 /** diagonals of cross products matrix */ 048 private final double[] d; 049 /** the elements of the R`Y */ 050 private final double[] rhs; 051 /** the off diagonal portion of the R matrix */ 052 private final double[] r; 053 /** the tolerance for each of the variables */ 054 private final double[] tol; 055 /** residual sum of squares for all nested regressions */ 056 private final double[] rss; 057 /** order of the regressors */ 058 private final int[] vorder; 059 /** scratch space for tolerance calc */ 060 private final double[] work_tolset; 061 /** number of observations entered */ 062 private long nobs = 0; 063 /** sum of squared errors of largest regression */ 064 private double sserr = 0.0; 065 /** has rss been called? */ 066 private boolean rss_set = false; 067 /** has the tolerance setting method been called */ 068 private boolean tol_set = false; 069 /** flags for variables with linear dependency problems */ 070 private final boolean[] lindep; 071 /** singular x values */ 072 private final double[] x_sing; 073 /** workspace for singularity method */ 074 private final double[] work_sing; 075 /** summation of Y variable */ 076 private double sumy = 0.0; 077 /** summation of squared Y values */ 078 private double sumsqy = 0.0; 079 /** boolean flag whether a regression constant is added */ 080 private boolean hasIntercept; 081 /** zero tolerance */ 082 private final double epsilon; 083 /** 084 * Set the default constructor to private access 085 * to prevent inadvertent instantiation 086 */ 087 @SuppressWarnings("unused") 088 private MillerUpdatingRegression() { 089 this(-1, false, Double.NaN); 090 } 091 092 /** 093 * This is the augmented constructor for the MillerUpdatingRegression class. 094 * 095 * @param numberOfVariables number of regressors to expect, not including constant 096 * @param includeConstant include a constant automatically 097 * @param errorTolerance zero tolerance, how machine zero is determined 098 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} 099 */ 100 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) 101 throws ModelSpecificationException { 102 if (numberOfVariables < 1) { 103 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); 104 } 105 if (includeConstant) { 106 this.nvars = numberOfVariables + 1; 107 } else { 108 this.nvars = numberOfVariables; 109 } 110 this.hasIntercept = includeConstant; 111 this.nobs = 0; 112 this.d = new double[this.nvars]; 113 this.rhs = new double[this.nvars]; 114 this.r = new double[this.nvars * (this.nvars - 1) / 2]; 115 this.tol = new double[this.nvars]; 116 this.rss = new double[this.nvars]; 117 this.vorder = new int[this.nvars]; 118 this.x_sing = new double[this.nvars]; 119 this.work_sing = new double[this.nvars]; 120 this.work_tolset = new double[this.nvars]; 121 this.lindep = new boolean[this.nvars]; 122 for (int i = 0; i < this.nvars; i++) { 123 vorder[i] = i; 124 } 125 if (errorTolerance > 0) { 126 this.epsilon = errorTolerance; 127 } else { 128 this.epsilon = -errorTolerance; 129 } 130 } 131 132 /** 133 * Primary constructor for the MillerUpdatingRegression. 134 * 135 * @param numberOfVariables maximum number of potential regressors 136 * @param includeConstant include a constant automatically 137 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} 138 */ 139 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant) 140 throws ModelSpecificationException { 141 this(numberOfVariables, includeConstant, Precision.EPSILON); 142 } 143 144 /** 145 * A getter method which determines whether a constant is included. 146 * @return true regression has an intercept, false no intercept 147 */ 148 public boolean hasIntercept() { 149 return this.hasIntercept; 150 } 151 152 /** 153 * Gets the number of observations added to the regression model. 154 * @return number of observations 155 */ 156 public long getN() { 157 return this.nobs; 158 } 159 160 /** 161 * Adds an observation to the regression model. 162 * @param x the array with regressor values 163 * @param y the value of dependent variable given these regressors 164 * @exception ModelSpecificationException if the length of {@code x} does not equal 165 * the number of independent variables in the model 166 */ 167 public void addObservation(final double[] x, final double y) 168 throws ModelSpecificationException { 169 170 if ((!this.hasIntercept && x.length != nvars) || 171 (this.hasIntercept && x.length + 1 != nvars)) { 172 throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION, 173 x.length, nvars); 174 } 175 if (!this.hasIntercept) { 176 include(MathArrays.copyOf(x, x.length), 1.0, y); 177 } else { 178 final double[] tmp = new double[x.length + 1]; 179 System.arraycopy(x, 0, tmp, 1, x.length); 180 tmp[0] = 1.0; 181 include(tmp, 1.0, y); 182 } 183 ++nobs; 184 185 } 186 187 /** 188 * Adds multiple observations to the model. 189 * @param x observations on the regressors 190 * @param y observations on the regressand 191 * @throws ModelSpecificationException if {@code x} is not rectangular, does not match 192 * the length of {@code y} or does not contain sufficient data to estimate the model 193 */ 194 public void addObservations(double[][] x, double[] y) throws ModelSpecificationException { 195 if ((x == null) || (y == null) || (x.length != y.length)) { 196 throw new ModelSpecificationException( 197 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, 198 (x == null) ? 0 : x.length, 199 (y == null) ? 0 : y.length); 200 } 201 if (x.length == 0) { // Must be no y data either 202 throw new ModelSpecificationException( 203 LocalizedFormats.NO_DATA); 204 } 205 if (x[0].length + 1 > x.length) { 206 throw new ModelSpecificationException( 207 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 208 x.length, x[0].length); 209 } 210 for (int i = 0; i < x.length; i++) { 211 addObservation(x[i], y[i]); 212 } 213 } 214 215 /** 216 * The include method is where the QR decomposition occurs. This statement forms all 217 * intermediate data which will be used for all derivative measures. 218 * According to the miller paper, note that in the original implementation the x vector 219 * is overwritten. In this implementation, the include method is passed a copy of the 220 * original data vector so that there is no contamination of the data. Additionally, 221 * this method differs slightly from Gentleman's method, in that the assumption is 222 * of dense design matrices, there is some advantage in using the original gentleman algorithm 223 * on sparse matrices. 224 * 225 * @param x observations on the regressors 226 * @param wi weight of the this observation (-1,1) 227 * @param yi observation on the regressand 228 */ 229 private void include(final double[] x, final double wi, final double yi) { 230 int nextr = 0; 231 double w = wi; 232 double y = yi; 233 double xi; 234 double di; 235 double wxi; 236 double dpi; 237 double xk; 238 double _w; 239 this.rss_set = false; 240 sumy = smartAdd(yi, sumy); 241 sumsqy = smartAdd(sumsqy, yi * yi); 242 for (int i = 0; i < x.length; i++) { 243 if (w == 0.0) { 244 return; 245 } 246 xi = x[i]; 247 248 if (xi == 0.0) { 249 nextr += nvars - i - 1; 250 continue; 251 } 252 di = d[i]; 253 wxi = w * xi; 254 _w = w; 255 if (di != 0.0) { 256 dpi = smartAdd(di, wxi * xi); 257 final double tmp = wxi * xi / di; 258 if (FastMath.abs(tmp) > Precision.EPSILON) { 259 w = (di * w) / dpi; 260 } 261 } else { 262 dpi = wxi * xi; 263 w = 0.0; 264 } 265 d[i] = dpi; 266 for (int k = i + 1; k < nvars; k++) { 267 xk = x[k]; 268 x[k] = smartAdd(xk, -xi * r[nextr]); 269 if (di != 0.0) { 270 r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi; 271 } else { 272 r[nextr] = xk / xi; 273 } 274 ++nextr; 275 } 276 xk = y; 277 y = smartAdd(xk, -xi * rhs[i]); 278 if (di != 0.0) { 279 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi; 280 } else { 281 rhs[i] = xk / xi; 282 } 283 } 284 sserr = smartAdd(sserr, w * y * y); 285 } 286 287 /** 288 * Adds to number a and b such that the contamination due to 289 * numerical smallness of one addend does not corrupt the sum. 290 * @param a - an addend 291 * @param b - an addend 292 * @return the sum of the a and b 293 */ 294 private double smartAdd(double a, double b) { 295 final double _a = FastMath.abs(a); 296 final double _b = FastMath.abs(b); 297 if (_a > _b) { 298 final double eps = _a * Precision.EPSILON; 299 if (_b > eps) { 300 return a + b; 301 } 302 return a; 303 } else { 304 final double eps = _b * Precision.EPSILON; 305 if (_a > eps) { 306 return a + b; 307 } 308 return b; 309 } 310 } 311 312 /** 313 * As the name suggests, clear wipes the internals and reorders everything in the 314 * canonical order. 315 */ 316 public void clear() { 317 Arrays.fill(this.d, 0.0); 318 Arrays.fill(this.rhs, 0.0); 319 Arrays.fill(this.r, 0.0); 320 Arrays.fill(this.tol, 0.0); 321 Arrays.fill(this.rss, 0.0); 322 Arrays.fill(this.work_tolset, 0.0); 323 Arrays.fill(this.work_sing, 0.0); 324 Arrays.fill(this.x_sing, 0.0); 325 Arrays.fill(this.lindep, false); 326 for (int i = 0; i < nvars; i++) { 327 this.vorder[i] = i; 328 } 329 this.nobs = 0; 330 this.sserr = 0.0; 331 this.sumy = 0.0; 332 this.sumsqy = 0.0; 333 this.rss_set = false; 334 this.tol_set = false; 335 } 336 337 /** 338 * This sets up tolerances for singularity testing. 339 */ 340 private void tolset() { 341 int pos; 342 double total; 343 final double eps = this.epsilon; 344 for (int i = 0; i < nvars; i++) { 345 this.work_tolset[i] = Math.sqrt(d[i]); 346 } 347 tol[0] = eps * this.work_tolset[0]; 348 for (int col = 1; col < nvars; col++) { 349 pos = col - 1; 350 total = work_tolset[col]; 351 for (int row = 0; row < col; row++) { 352 total += Math.abs(r[pos]) * work_tolset[row]; 353 pos += nvars - row - 2; 354 } 355 tol[col] = eps * total; 356 } 357 tol_set = true; 358 } 359 360 /** 361 * The regcf method conducts the linear regression and extracts the 362 * parameter vector. Notice that the algorithm can do subset regression 363 * with no alteration. 364 * 365 * @param nreq how many of the regressors to include (either in canonical 366 * order, or in the current reordered state) 367 * @return an array with the estimated slope coefficients 368 * @throws ModelSpecificationException if {@code nreq} is less than 1 369 * or greater than the number of independent variables 370 */ 371 private double[] regcf(int nreq) throws ModelSpecificationException { 372 int nextr; 373 if (nreq < 1) { 374 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); 375 } 376 if (nreq > this.nvars) { 377 throw new ModelSpecificationException( 378 LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars); 379 } 380 if (!this.tol_set) { 381 tolset(); 382 } 383 final double[] ret = new double[nreq]; 384 boolean rankProblem = false; 385 for (int i = nreq - 1; i > -1; i--) { 386 if (Math.sqrt(d[i]) < tol[i]) { 387 ret[i] = 0.0; 388 d[i] = 0.0; 389 rankProblem = true; 390 } else { 391 ret[i] = rhs[i]; 392 nextr = i * (nvars + nvars - i - 1) / 2; 393 for (int j = i + 1; j < nreq; j++) { 394 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]); 395 ++nextr; 396 } 397 } 398 } 399 if (rankProblem) { 400 for (int i = 0; i < nreq; i++) { 401 if (this.lindep[i]) { 402 ret[i] = Double.NaN; 403 } 404 } 405 } 406 return ret; 407 } 408 409 /** 410 * The method which checks for singularities and then eliminates the offending 411 * columns. 412 */ 413 private void singcheck() { 414 int pos; 415 for (int i = 0; i < nvars; i++) { 416 work_sing[i] = Math.sqrt(d[i]); 417 } 418 for (int col = 0; col < nvars; col++) { 419 // Set elements within R to zero if they are less than tol(col) in 420 // absolute value after being scaled by the square root of their row 421 // multiplier 422 final double temp = tol[col]; 423 pos = col - 1; 424 for (int row = 0; row < col - 1; row++) { 425 if (Math.abs(r[pos]) * work_sing[row] < temp) { 426 r[pos] = 0.0; 427 } 428 pos += nvars - row - 2; 429 } 430 // If diagonal element is near zero, set it to zero, set appropriate 431 // element of LINDEP, and use INCLUD to augment the projections in 432 // the lower rows of the orthogonalization. 433 lindep[col] = false; 434 if (work_sing[col] < temp) { 435 lindep[col] = true; 436 if (col < nvars - 1) { 437 Arrays.fill(x_sing, 0.0); 438 int _pi = col * (nvars + nvars - col - 1) / 2; 439 for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) { 440 x_sing[_xi] = r[_pi]; 441 r[_pi] = 0.0; 442 } 443 final double y = rhs[col]; 444 final double weight = d[col]; 445 d[col] = 0.0; 446 rhs[col] = 0.0; 447 this.include(x_sing, weight, y); 448 } else { 449 sserr += d[col] * rhs[col] * rhs[col]; 450 } 451 } 452 } 453 } 454 455 /** 456 * Calculates the sum of squared errors for the full regression 457 * and all subsets in the following manner: <pre> 458 * rss[] ={ 459 * ResidualSumOfSquares_allNvars, 460 * ResidualSumOfSquares_FirstNvars-1, 461 * ResidualSumOfSquares_FirstNvars-2, 462 * ..., ResidualSumOfSquares_FirstVariable} </pre> 463 */ 464 private void ss() { 465 double total = sserr; 466 rss[nvars - 1] = sserr; 467 for (int i = nvars - 1; i > 0; i--) { 468 total += d[i] * rhs[i] * rhs[i]; 469 rss[i - 1] = total; 470 } 471 rss_set = true; 472 } 473 474 /** 475 * Calculates the cov matrix assuming only the first nreq variables are 476 * included in the calculation. The returned array contains a symmetric 477 * matrix stored in lower triangular form. The matrix will have 478 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre> 479 * cov = 480 * { 481 * cov_00, 482 * cov_10, cov_11, 483 * cov_20, cov_21, cov22, 484 * ... 485 * } </pre> 486 * 487 * @param nreq how many of the regressors to include (either in canonical 488 * order, or in the current reordered state) 489 * @return an array with the variance covariance of the included 490 * regressors in lower triangular form 491 */ 492 private double[] cov(int nreq) { 493 if (this.nobs <= nreq) { 494 return null; 495 } 496 double rnk = 0.0; 497 for (int i = 0; i < nreq; i++) { 498 if (!this.lindep[i]) { 499 rnk += 1.0; 500 } 501 } 502 final double var = rss[nreq - 1] / (nobs - rnk); 503 final double[] rinv = new double[nreq * (nreq - 1) / 2]; 504 inverse(rinv, nreq); 505 final double[] covmat = new double[nreq * (nreq + 1) / 2]; 506 Arrays.fill(covmat, Double.NaN); 507 int pos2; 508 int pos1; 509 int start = 0; 510 double total = 0; 511 for (int row = 0; row < nreq; row++) { 512 pos2 = start; 513 if (!this.lindep[row]) { 514 for (int col = row; col < nreq; col++) { 515 if (!this.lindep[col]) { 516 pos1 = start + col - row; 517 if (row == col) { 518 total = 1.0 / d[col]; 519 } else { 520 total = rinv[pos1 - 1] / d[col]; 521 } 522 for (int k = col + 1; k < nreq; k++) { 523 if (!this.lindep[k]) { 524 total += rinv[pos1] * rinv[pos2] / d[k]; 525 } 526 ++pos1; 527 ++pos2; 528 } 529 covmat[ (col + 1) * col / 2 + row] = total * var; 530 } else { 531 pos2 += nreq - col - 1; 532 } 533 } 534 } 535 start += nreq - row - 1; 536 } 537 return covmat; 538 } 539 540 /** 541 * This internal method calculates the inverse of the upper-triangular portion 542 * of the R matrix. 543 * @param rinv the storage for the inverse of r 544 * @param nreq how many of the regressors to include (either in canonical 545 * order, or in the current reordered state) 546 */ 547 private void inverse(double[] rinv, int nreq) { 548 int pos = nreq * (nreq - 1) / 2 - 1; 549 int pos1 = -1; 550 int pos2 = -1; 551 double total = 0.0; 552 Arrays.fill(rinv, Double.NaN); 553 for (int row = nreq - 1; row > 0; --row) { 554 if (!this.lindep[row]) { 555 final int start = (row - 1) * (nvars + nvars - row) / 2; 556 for (int col = nreq; col > row; --col) { 557 pos1 = start; 558 pos2 = pos; 559 total = 0.0; 560 for (int k = row; k < col - 1; k++) { 561 pos2 += nreq - k - 1; 562 if (!this.lindep[k]) { 563 total += -r[pos1] * rinv[pos2]; 564 } 565 ++pos1; 566 } 567 rinv[pos] = total - r[pos1]; 568 --pos; 569 } 570 } else { 571 pos -= nreq - row; 572 } 573 } 574 } 575 576 /** 577 * In the original algorithm only the partial correlations of the regressors 578 * is returned to the user. In this implementation, we have <pre> 579 * corr = 580 * { 581 * corrxx - lower triangular 582 * corrxy - bottom row of the matrix 583 * } 584 * Replaces subroutines PCORR and COR of: 585 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre> 586 * 587 * <p>Calculate partial correlations after the variables in rows 588 * 1, 2, ..., IN have been forced into the regression. 589 * If IN = 1, and the first row of R represents a constant in the 590 * model, then the usual simple correlations are returned.</p> 591 * 592 * <p>If IN = 0, the value returned in array CORMAT for the correlation 593 * of variables Xi & Xj is: <pre> 594 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p> 595 * 596 * <p>On return, array CORMAT contains the upper triangle of the matrix of 597 * partial correlations stored by rows, excluding the 1's on the diagonal. 598 * e.g. if IN = 2, the consecutive elements returned are: 599 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc. 600 * Array YCORR stores the partial correlations with the Y-variable 601 * starting with YCORR(IN+1) = partial correlation with the variable in 602 * position (IN+1). </p> 603 * 604 * @param in how many of the regressors to include (either in canonical 605 * order, or in the current reordered state) 606 * @return an array with the partial correlations of the remainder of 607 * regressors with each other and the regressand, in lower triangular form 608 */ 609 public double[] getPartialCorrelations(int in) { 610 final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2]; 611 int pos; 612 int pos1; 613 int pos2; 614 final int rms_off = -in; 615 final int wrk_off = -(in + 1); 616 final double[] rms = new double[nvars - in]; 617 final double[] work = new double[nvars - in - 1]; 618 double sumxx; 619 double sumxy; 620 double sumyy; 621 final int offXX = (nvars - in) * (nvars - in - 1) / 2; 622 if (in < -1 || in >= nvars) { 623 return null; 624 } 625 final int nvm = nvars - 1; 626 final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2; 627 if (d[in] > 0.0) { 628 rms[in + rms_off] = 1.0 / Math.sqrt(d[in]); 629 } 630 for (int col = in + 1; col < nvars; col++) { 631 pos = base_pos + col - 1 - in; 632 sumxx = d[col]; 633 for (int row = in; row < col; row++) { 634 sumxx += d[row] * r[pos] * r[pos]; 635 pos += nvars - row - 2; 636 } 637 if (sumxx > 0.0) { 638 rms[col + rms_off] = 1.0 / Math.sqrt(sumxx); 639 } else { 640 rms[col + rms_off] = 0.0; 641 } 642 } 643 sumyy = sserr; 644 for (int row = in; row < nvars; row++) { 645 sumyy += d[row] * rhs[row] * rhs[row]; 646 } 647 if (sumyy > 0.0) { 648 sumyy = 1.0 / Math.sqrt(sumyy); 649 } 650 pos = 0; 651 for (int col1 = in; col1 < nvars; col1++) { 652 sumxy = 0.0; 653 Arrays.fill(work, 0.0); 654 pos1 = base_pos + col1 - in - 1; 655 for (int row = in; row < col1; row++) { 656 pos2 = pos1 + 1; 657 for (int col2 = col1 + 1; col2 < nvars; col2++) { 658 work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2]; 659 pos2++; 660 } 661 sumxy += d[row] * r[pos1] * rhs[row]; 662 pos1 += nvars - row - 2; 663 } 664 pos2 = pos1 + 1; 665 for (int col2 = col1 + 1; col2 < nvars; col2++) { 666 work[col2 + wrk_off] += d[col1] * r[pos2]; 667 ++pos2; 668 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] = 669 work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off]; 670 ++pos; 671 } 672 sumxy += d[col1] * rhs[col1]; 673 output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy; 674 } 675 676 return output; 677 } 678 679 /** 680 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2. 681 * Move variable from position FROM to position TO in an 682 * orthogonal reduction produced by AS75.1. 683 * 684 * @param from initial position 685 * @param to destination 686 */ 687 private void vmove(int from, int to) { 688 double d1; 689 double d2; 690 double X; 691 double d1new; 692 double d2new; 693 double cbar; 694 double sbar; 695 double Y; 696 int first; 697 int inc; 698 int m1; 699 int m2; 700 int mp1; 701 int pos; 702 boolean bSkipTo40 = false; 703 if (from == to) { 704 return; 705 } 706 if (!this.rss_set) { 707 ss(); 708 } 709 int count = 0; 710 if (from < to) { 711 first = from; 712 inc = 1; 713 count = to - from; 714 } else { 715 first = from - 1; 716 inc = -1; 717 count = from - to; 718 } 719 720 int m = first; 721 int idx = 0; 722 while (idx < count) { 723 m1 = m * (nvars + nvars - m - 1) / 2; 724 m2 = m1 + nvars - m - 1; 725 mp1 = m + 1; 726 727 d1 = d[m]; 728 d2 = d[mp1]; 729 // Special cases. 730 if (d1 > this.epsilon || d2 > this.epsilon) { 731 X = r[m1]; 732 if (Math.abs(X) * Math.sqrt(d1) < tol[mp1]) { 733 X = 0.0; 734 } 735 if (d1 < this.epsilon || Math.abs(X) < this.epsilon) { 736 d[m] = d2; 737 d[mp1] = d1; 738 r[m1] = 0.0; 739 for (int col = m + 2; col < nvars; col++) { 740 ++m1; 741 X = r[m1]; 742 r[m1] = r[m2]; 743 r[m2] = X; 744 ++m2; 745 } 746 X = rhs[m]; 747 rhs[m] = rhs[mp1]; 748 rhs[mp1] = X; 749 bSkipTo40 = true; 750 //break; 751 } else if (d2 < this.epsilon) { 752 d[m] = d1 * X * X; 753 r[m1] = 1.0 / X; 754 for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) { 755 r[_i] /= X; 756 } 757 rhs[m] = rhs[m] / X; 758 bSkipTo40 = true; 759 //break; 760 } 761 if (!bSkipTo40) { 762 d1new = d2 + d1 * X * X; 763 cbar = d2 / d1new; 764 sbar = X * d1 / d1new; 765 d2new = d1 * cbar; 766 d[m] = d1new; 767 d[mp1] = d2new; 768 r[m1] = sbar; 769 for (int col = m + 2; col < nvars; col++) { 770 ++m1; 771 Y = r[m1]; 772 r[m1] = cbar * r[m2] + sbar * Y; 773 r[m2] = Y - X * r[m2]; 774 ++m2; 775 } 776 Y = rhs[m]; 777 rhs[m] = cbar * rhs[mp1] + sbar * Y; 778 rhs[mp1] = Y - X * rhs[mp1]; 779 } 780 } 781 if (m > 0) { 782 pos = m; 783 for (int row = 0; row < m; row++) { 784 X = r[pos]; 785 r[pos] = r[pos - 1]; 786 r[pos - 1] = X; 787 pos += nvars - row - 2; 788 } 789 } 790 // Adjust variable order (VORDER), the tolerances (TOL) and 791 // the vector of residual sums of squares (RSS). 792 m1 = vorder[m]; 793 vorder[m] = vorder[mp1]; 794 vorder[mp1] = m1; 795 X = tol[m]; 796 tol[m] = tol[mp1]; 797 tol[mp1] = X; 798 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1]; 799 800 m += inc; 801 ++idx; 802 } 803 } 804 805 /** 806 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 807 * 808 * <p> Re-order the variables in an orthogonal reduction produced by 809 * AS75.1 so that the N variables in LIST start at position POS1, 810 * though will not necessarily be in the same order as in LIST. 811 * Any variables in VORDER before position POS1 are not moved. 812 * Auxiliary routine called: VMOVE. </p> 813 * 814 * <p>This internal method reorders the regressors.</p> 815 * 816 * @param list the regressors to move 817 * @param pos1 where the list will be placed 818 * @return -1 error, 0 everything ok 819 */ 820 private int reorderRegressors(int[] list, int pos1) { 821 int next; 822 int i; 823 int l; 824 if (list.length < 1 || list.length > nvars + 1 - pos1) { 825 return -1; 826 } 827 next = pos1; 828 i = pos1; 829 while (i < nvars) { 830 l = vorder[i]; 831 for (int j = 0; j < list.length; j++) { 832 if (l == list[j] && i > next) { 833 this.vmove(i, next); 834 ++next; 835 if (next >= list.length + pos1) { 836 return 0; 837 } else { 838 break; 839 } 840 } 841 } 842 ++i; 843 } 844 return 0; 845 } 846 847 /** 848 * Gets the diagonal of the Hat matrix also known as the leverage matrix. 849 * 850 * @param row_data returns the diagonal of the hat matrix for this observation 851 * @return the diagonal element of the hatmatrix 852 */ 853 public double getDiagonalOfHatMatrix(double[] row_data) { 854 double[] wk = new double[this.nvars]; 855 int pos; 856 double total; 857 858 if (row_data.length > nvars) { 859 return Double.NaN; 860 } 861 double[] xrow; 862 if (this.hasIntercept) { 863 xrow = new double[row_data.length + 1]; 864 xrow[0] = 1.0; 865 System.arraycopy(row_data, 0, xrow, 1, row_data.length); 866 } else { 867 xrow = row_data; 868 } 869 double hii = 0.0; 870 for (int col = 0; col < xrow.length; col++) { 871 if (Math.sqrt(d[col]) < tol[col]) { 872 wk[col] = 0.0; 873 } else { 874 pos = col - 1; 875 total = xrow[col]; 876 for (int row = 0; row < col; row++) { 877 total = smartAdd(total, -wk[row] * r[pos]); 878 pos += nvars - row - 2; 879 } 880 wk[col] = total; 881 hii = smartAdd(hii, (total * total) / d[col]); 882 } 883 } 884 return hii; 885 } 886 887 /** 888 * Gets the order of the regressors, useful if some type of reordering 889 * has been called. Calling regress with int[]{} args will trigger 890 * a reordering. 891 * 892 * @return int[] with the current order of the regressors 893 */ 894 public int[] getOrderOfRegressors(){ 895 return MathArrays.copyOf(vorder); 896 } 897 898 /** 899 * Conducts a regression on the data in the model, using all regressors. 900 * 901 * @return RegressionResults the structure holding all regression results 902 * @exception ModelSpecificationException - thrown if number of observations is 903 * less than the number of variables 904 */ 905 public RegressionResults regress() throws ModelSpecificationException { 906 return regress(this.nvars); 907 } 908 909 /** 910 * Conducts a regression on the data in the model, using a subset of regressors. 911 * 912 * @param numberOfRegressors many of the regressors to include (either in canonical 913 * order, or in the current reordered state) 914 * @return RegressionResults the structure holding all regression results 915 * @exception ModelSpecificationException - thrown if number of observations is 916 * less than the number of variables or number of regressors requested 917 * is greater than the regressors in the model 918 */ 919 public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException { 920 if (this.nobs <= numberOfRegressors) { 921 throw new ModelSpecificationException( 922 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 923 this.nobs, numberOfRegressors); 924 } 925 if( numberOfRegressors > this.nvars ){ 926 throw new ModelSpecificationException( 927 LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars); 928 } 929 930 tolset(); 931 singcheck(); 932 933 double[] beta = this.regcf(numberOfRegressors); 934 935 ss(); 936 937 double[] cov = this.cov(numberOfRegressors); 938 939 int rnk = 0; 940 for (int i = 0; i < this.lindep.length; i++) { 941 if (!this.lindep[i]) { 942 ++rnk; 943 } 944 } 945 946 boolean needsReorder = false; 947 for (int i = 0; i < numberOfRegressors; i++) { 948 if (this.vorder[i] != i) { 949 needsReorder = true; 950 break; 951 } 952 } 953 if (!needsReorder) { 954 return new RegressionResults( 955 beta, new double[][]{cov}, true, this.nobs, rnk, 956 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 957 } else { 958 double[] betaNew = new double[beta.length]; 959 double[] covNew = new double[cov.length]; 960 961 int[] newIndices = new int[beta.length]; 962 for (int i = 0; i < nvars; i++) { 963 for (int j = 0; j < numberOfRegressors; j++) { 964 if (this.vorder[j] == i) { 965 betaNew[i] = beta[ j]; 966 newIndices[i] = j; 967 } 968 } 969 } 970 971 int idx1 = 0; 972 int idx2; 973 int _i; 974 int _j; 975 for (int i = 0; i < beta.length; i++) { 976 _i = newIndices[i]; 977 for (int j = 0; j <= i; j++, idx1++) { 978 _j = newIndices[j]; 979 if (_i > _j) { 980 idx2 = _i * (_i + 1) / 2 + _j; 981 } else { 982 idx2 = _j * (_j + 1) / 2 + _i; 983 } 984 covNew[idx1] = cov[idx2]; 985 } 986 } 987 return new RegressionResults( 988 betaNew, new double[][]{covNew}, true, this.nobs, rnk, 989 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 990 } 991 } 992 993 /** 994 * Conducts a regression on the data in the model, using regressors in array 995 * Calling this method will change the internal order of the regressors 996 * and care is required in interpreting the hatmatrix. 997 * 998 * @param variablesToInclude array of variables to include in regression 999 * @return RegressionResults the structure holding all regression results 1000 * @exception ModelSpecificationException - thrown if number of observations is 1001 * less than the number of variables, the number of regressors requested 1002 * is greater than the regressors in the model or a regressor index in 1003 * regressor array does not exist 1004 */ 1005 public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException { 1006 if (variablesToInclude.length > this.nvars) { 1007 throw new ModelSpecificationException( 1008 LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars); 1009 } 1010 if (this.nobs <= this.nvars) { 1011 throw new ModelSpecificationException( 1012 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 1013 this.nobs, this.nvars); 1014 } 1015 Arrays.sort(variablesToInclude); 1016 int iExclude = 0; 1017 for (int i = 0; i < variablesToInclude.length; i++) { 1018 if (i >= this.nvars) { 1019 throw new ModelSpecificationException( 1020 LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars); 1021 } 1022 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) { 1023 variablesToInclude[i] = -1; 1024 ++iExclude; 1025 } 1026 } 1027 int[] series; 1028 if (iExclude > 0) { 1029 int j = 0; 1030 series = new int[variablesToInclude.length - iExclude]; 1031 for (int i = 0; i < variablesToInclude.length; i++) { 1032 if (variablesToInclude[i] > -1) { 1033 series[j] = variablesToInclude[i]; 1034 ++j; 1035 } 1036 } 1037 } else { 1038 series = variablesToInclude; 1039 } 1040 1041 reorderRegressors(series, 0); 1042 tolset(); 1043 singcheck(); 1044 1045 double[] beta = this.regcf(series.length); 1046 1047 ss(); 1048 1049 double[] cov = this.cov(series.length); 1050 1051 int rnk = 0; 1052 for (int i = 0; i < this.lindep.length; i++) { 1053 if (!this.lindep[i]) { 1054 ++rnk; 1055 } 1056 } 1057 1058 boolean needsReorder = false; 1059 for (int i = 0; i < this.nvars; i++) { 1060 if (this.vorder[i] != series[i]) { 1061 needsReorder = true; 1062 break; 1063 } 1064 } 1065 if (!needsReorder) { 1066 return new RegressionResults( 1067 beta, new double[][]{cov}, true, this.nobs, rnk, 1068 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 1069 } else { 1070 double[] betaNew = new double[beta.length]; 1071 int[] newIndices = new int[beta.length]; 1072 for (int i = 0; i < series.length; i++) { 1073 for (int j = 0; j < this.vorder.length; j++) { 1074 if (this.vorder[j] == series[i]) { 1075 betaNew[i] = beta[ j]; 1076 newIndices[i] = j; 1077 } 1078 } 1079 } 1080 double[] covNew = new double[cov.length]; 1081 int idx1 = 0; 1082 int idx2; 1083 int _i; 1084 int _j; 1085 for (int i = 0; i < beta.length; i++) { 1086 _i = newIndices[i]; 1087 for (int j = 0; j <= i; j++, idx1++) { 1088 _j = newIndices[j]; 1089 if (_i > _j) { 1090 idx2 = _i * (_i + 1) / 2 + _j; 1091 } else { 1092 idx2 = _j * (_j + 1) / 2 + _i; 1093 } 1094 covNew[idx1] = cov[idx2]; 1095 } 1096 } 1097 return new RegressionResults( 1098 betaNew, new double[][]{covNew}, true, this.nobs, rnk, 1099 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 1100 } 1101 } 1102 }