001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math3.stat.regression; 018 019 import java.io.Serializable; 020 import java.util.Arrays; 021 import org.apache.commons.math3.util.FastMath; 022 import org.apache.commons.math3.util.MathArrays; 023 import org.apache.commons.math3.exception.OutOfRangeException; 024 025 /** 026 * Results of a Multiple Linear Regression model fit. 027 * 028 * @version $Id: RegressionResults.java 1392342 2012-10-01 14:08:52Z psteitz $ 029 * @since 3.0 030 */ 031 public class RegressionResults implements Serializable { 032 033 /** INDEX of Sum of Squared Errors */ 034 private static final int SSE_IDX = 0; 035 /** INDEX of Sum of Squares of Model */ 036 private static final int SST_IDX = 1; 037 /** INDEX of R-Squared of regression */ 038 private static final int RSQ_IDX = 2; 039 /** INDEX of Mean Squared Error */ 040 private static final int MSE_IDX = 3; 041 /** INDEX of Adjusted R Squared */ 042 private static final int ADJRSQ_IDX = 4; 043 /** UID */ 044 private static final long serialVersionUID = 1l; 045 /** regression slope parameters */ 046 private final double[] parameters; 047 /** variance covariance matrix of parameters */ 048 private final double[][] varCovData; 049 /** boolean flag for variance covariance matrix in symm compressed storage */ 050 private final boolean isSymmetricVCD; 051 /** rank of the solution */ 052 @SuppressWarnings("unused") 053 private final int rank; 054 /** number of observations on which results are based */ 055 private final long nobs; 056 /** boolean flag indicator of whether a constant was included*/ 057 private final boolean containsConstant; 058 /** array storing global results, SSE, MSE, RSQ, adjRSQ */ 059 private final double[] globalFitInfo; 060 061 /** 062 * Set the default constructor to private access 063 * to prevent inadvertent instantiation 064 */ 065 @SuppressWarnings("unused") 066 private RegressionResults() { 067 this.parameters = null; 068 this.varCovData = null; 069 this.rank = -1; 070 this.nobs = -1; 071 this.containsConstant = false; 072 this.isSymmetricVCD = false; 073 this.globalFitInfo = null; 074 } 075 076 /** 077 * Constructor for Regression Results. 078 * 079 * @param parameters a double array with the regression slope estimates 080 * @param varcov the variance covariance matrix, stored either in a square matrix 081 * or as a compressed 082 * @param isSymmetricCompressed a flag which denotes that the variance covariance 083 * matrix is in symmetric compressed format 084 * @param nobs the number of observations of the regression estimation 085 * @param rank the number of independent variables in the regression 086 * @param sumy the sum of the independent variable 087 * @param sumysq the sum of the squared independent variable 088 * @param sse sum of squared errors 089 * @param containsConstant true model has constant, false model does not have constant 090 * @param copyData if true a deep copy of all input data is made, if false only references 091 * are copied and the RegressionResults become mutable 092 */ 093 public RegressionResults( 094 final double[] parameters, final double[][] varcov, 095 final boolean isSymmetricCompressed, 096 final long nobs, final int rank, 097 final double sumy, final double sumysq, final double sse, 098 final boolean containsConstant, 099 final boolean copyData) { 100 if (copyData) { 101 this.parameters = MathArrays.copyOf(parameters); 102 this.varCovData = new double[varcov.length][]; 103 for (int i = 0; i < varcov.length; i++) { 104 this.varCovData[i] = MathArrays.copyOf(varcov[i]); 105 } 106 } else { 107 this.parameters = parameters; 108 this.varCovData = varcov; 109 } 110 this.isSymmetricVCD = isSymmetricCompressed; 111 this.nobs = nobs; 112 this.rank = rank; 113 this.containsConstant = containsConstant; 114 this.globalFitInfo = new double[5]; 115 Arrays.fill(this.globalFitInfo, Double.NaN); 116 117 if (rank > 0) { 118 this.globalFitInfo[SST_IDX] = containsConstant ? 119 (sumysq - sumy * sumy / nobs) : sumysq; 120 } 121 122 this.globalFitInfo[SSE_IDX] = sse; 123 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] / 124 (nobs - rank); 125 this.globalFitInfo[RSQ_IDX] = 1.0 - 126 this.globalFitInfo[SSE_IDX] / 127 this.globalFitInfo[SST_IDX]; 128 129 if (!containsConstant) { 130 this.globalFitInfo[ADJRSQ_IDX] = 1.0- 131 (1.0 - this.globalFitInfo[RSQ_IDX]) * 132 ( (double) nobs / ( (double) (nobs - rank))); 133 } else { 134 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) / 135 (globalFitInfo[SST_IDX] * (nobs - rank)); 136 } 137 } 138 139 /** 140 * <p>Returns the parameter estimate for the regressor at the given index.</p> 141 * 142 * <p>A redundant regressor will have its redundancy flag set, as well as 143 * a parameters estimated equal to {@code Double.NaN}</p> 144 * 145 * @param index Index. 146 * @return the parameters estimated for regressor at index. 147 * @throws OutOfRangeException if {@code index} is not in the interval 148 * {@code [0, number of parameters)}. 149 */ 150 public double getParameterEstimate(int index) throws OutOfRangeException { 151 if (parameters == null) { 152 return Double.NaN; 153 } 154 if (index < 0 || index >= this.parameters.length) { 155 throw new OutOfRangeException(index, 0, this.parameters.length - 1); 156 } 157 return this.parameters[index]; 158 } 159 160 /** 161 * <p>Returns a copy of the regression parameters estimates.</p> 162 * 163 * <p>The parameter estimates are returned in the natural order of the data.</p> 164 * 165 * <p>A redundant regressor will have its redundancy flag set, as will 166 * a parameter estimate equal to {@code Double.NaN}.</p> 167 * 168 * @return array of parameter estimates, null if no estimation occurred 169 */ 170 public double[] getParameterEstimates() { 171 if (this.parameters == null) { 172 return null; 173 } 174 return MathArrays.copyOf(parameters); 175 } 176 177 /** 178 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 179 * error of the parameter estimate at index</a>, 180 * usually denoted s(b<sub>index</sub>). 181 * 182 * @param index Index. 183 * @return the standard errors associated with parameters estimated at index. 184 * @throws OutOfRangeException if {@code index} is not in the interval 185 * {@code [0, number of parameters)}. 186 */ 187 public double getStdErrorOfEstimate(int index) throws OutOfRangeException { 188 if (parameters == null) { 189 return Double.NaN; 190 } 191 if (index < 0 || index >= this.parameters.length) { 192 throw new OutOfRangeException(index, 0, this.parameters.length - 1); 193 } 194 double var = this.getVcvElement(index, index); 195 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 196 return FastMath.sqrt(var); 197 } 198 return Double.NaN; 199 } 200 201 /** 202 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 203 * error of the parameter estimates</a>, 204 * usually denoted s(b<sub>i</sub>).</p> 205 * 206 * <p>If there are problems with an ill conditioned design matrix then the regressor 207 * which is redundant will be assigned <code>Double.NaN</code>. </p> 208 * 209 * @return an array standard errors associated with parameters estimates, 210 * null if no estimation occurred 211 */ 212 public double[] getStdErrorOfEstimates() { 213 if (parameters == null) { 214 return null; 215 } 216 double[] se = new double[this.parameters.length]; 217 for (int i = 0; i < this.parameters.length; i++) { 218 double var = this.getVcvElement(i, i); 219 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 220 se[i] = FastMath.sqrt(var); 221 continue; 222 } 223 se[i] = Double.NaN; 224 } 225 return se; 226 } 227 228 /** 229 * <p>Returns the covariance between regression parameters i and j.</p> 230 * 231 * <p>If there are problems with an ill conditioned design matrix then the covariance 232 * which involves redundant columns will be assigned {@code Double.NaN}. </p> 233 * 234 * @param i {@code i}th regression parameter. 235 * @param j {@code j}th regression parameter. 236 * @return the covariance of the parameter estimates. 237 * @throws OutOfRangeException if {@code i} or {@code j} is not in the 238 * interval {@code [0, number of parameters)}. 239 */ 240 public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException { 241 if (parameters == null) { 242 return Double.NaN; 243 } 244 if (i < 0 || i >= this.parameters.length) { 245 throw new OutOfRangeException(i, 0, this.parameters.length - 1); 246 } 247 if (j < 0 || j >= this.parameters.length) { 248 throw new OutOfRangeException(j, 0, this.parameters.length - 1); 249 } 250 return this.getVcvElement(i, j); 251 } 252 253 /** 254 * <p>Returns the number of parameters estimated in the model.</p> 255 * 256 * <p>This is the maximum number of regressors, some techniques may drop 257 * redundant parameters</p> 258 * 259 * @return number of regressors, -1 if not estimated 260 */ 261 public int getNumberOfParameters() { 262 if (this.parameters == null) { 263 return -1; 264 } 265 return this.parameters.length; 266 } 267 268 /** 269 * Returns the number of observations added to the regression model. 270 * 271 * @return Number of observations, -1 if an error condition prevents estimation 272 */ 273 public long getN() { 274 return this.nobs; 275 } 276 277 /** 278 * <p>Returns the sum of squared deviations of the y values about their mean.</p> 279 * 280 * <p>This is defined as SSTO 281 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p> 282 * 283 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p> 284 * 285 * @return sum of squared deviations of y values 286 */ 287 public double getTotalSumSquares() { 288 return this.globalFitInfo[SST_IDX]; 289 } 290 291 /** 292 * <p>Returns the sum of squared deviations of the predicted y values about 293 * their mean (which equals the mean of y).</p> 294 * 295 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM 296 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p> 297 * 298 * <p><strong>Preconditions</strong>: <ul> 299 * <li>At least two observations (with at least two different x values) 300 * must have been added before invoking this method. If this method is 301 * invoked before a model can be estimated, <code>Double.NaN</code> is 302 * returned. 303 * </li></ul></p> 304 * 305 * @return sum of squared deviations of predicted y values 306 */ 307 public double getRegressionSumSquares() { 308 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX]; 309 } 310 311 /** 312 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm"> 313 * sum of squared errors</a> (SSE) associated with the regression 314 * model.</p> 315 * 316 * <p>The return value is constrained to be non-negative - i.e., if due to 317 * rounding errors the computational formula returns a negative result, 318 * 0 is returned.</p> 319 * 320 * <p><strong>Preconditions</strong>: <ul> 321 * <li>numberOfParameters data pairs 322 * must have been added before invoking this method. If this method is 323 * invoked before a model can be estimated, <code>Double,NaN</code> is 324 * returned. 325 * </li></ul></p> 326 * 327 * @return sum of squared errors associated with the regression model 328 */ 329 public double getErrorSumSquares() { 330 return this.globalFitInfo[ SSE_IDX]; 331 } 332 333 /** 334 * <p>Returns the sum of squared errors divided by the degrees of freedom, 335 * usually abbreviated MSE.</p> 336 * 337 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model, 338 * or if there is no variation in <code>x</code>, this returns 339 * <code>Double.NaN</code>.</p> 340 * 341 * @return sum of squared deviations of y values 342 */ 343 public double getMeanSquareError() { 344 return this.globalFitInfo[ MSE_IDX]; 345 } 346 347 /** 348 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm"> 349 * coefficient of multiple determination</a>, 350 * usually denoted r-square.</p> 351 * 352 * <p><strong>Preconditions</strong>: <ul> 353 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values) 354 * must have been added before invoking this method. If this method is 355 * invoked before a model can be estimated, {@code Double,NaN} is 356 * returned. 357 * </li></ul></p> 358 * 359 * @return r-square, a double in the interval [0, 1] 360 */ 361 public double getRSquared() { 362 return this.globalFitInfo[ RSQ_IDX]; 363 } 364 365 /** 366 * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> 367 * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] 368 * </pre> 369 * where SSR is the sum of squared residuals}, 370 * SSTO is the total sum of squares}, n is the number 371 * of observations and p is the number of parameters estimated (including the intercept).</p> 372 * 373 * <p>If the regression is estimated without an intercept term, what is returned is <pre> 374 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code> 375 * </pre></p> 376 * 377 * @return adjusted R-Squared statistic 378 */ 379 public double getAdjustedRSquared() { 380 return this.globalFitInfo[ ADJRSQ_IDX]; 381 } 382 383 /** 384 * Returns true if the regression model has been computed including an intercept. 385 * In this case, the coefficient of the intercept is the first element of the 386 * {@link #getParameterEstimates() parameter estimates}. 387 * @return true if the model has an intercept term 388 */ 389 public boolean hasIntercept() { 390 return this.containsConstant; 391 } 392 393 /** 394 * Gets the i-jth element of the variance-covariance matrix. 395 * 396 * @param i first variable index 397 * @param j second variable index 398 * @return the requested variance-covariance matrix entry 399 */ 400 private double getVcvElement(int i, int j) { 401 if (this.isSymmetricVCD) { 402 if (this.varCovData.length > 1) { 403 //could be stored in upper or lower triangular 404 if (i == j) { 405 return varCovData[i][i]; 406 } else if (i >= varCovData[j].length) { 407 return varCovData[i][j]; 408 } else { 409 return varCovData[j][i]; 410 } 411 } else {//could be in single array 412 if (i > j) { 413 return varCovData[0][(i + 1) * i / 2 + j]; 414 } else { 415 return varCovData[0][(j + 1) * j / 2 + i]; 416 } 417 } 418 } else { 419 return this.varCovData[i][j]; 420 } 421 } 422 }