001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math3.analysis.interpolation; 018 019 import java.io.Serializable; 020 import java.util.Arrays; 021 022 import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction; 023 import org.apache.commons.math3.exception.NotPositiveException; 024 import org.apache.commons.math3.exception.OutOfRangeException; 025 import org.apache.commons.math3.exception.DimensionMismatchException; 026 import org.apache.commons.math3.exception.NoDataException; 027 import org.apache.commons.math3.exception.NumberIsTooSmallException; 028 import org.apache.commons.math3.exception.NonMonotonicSequenceException; 029 import org.apache.commons.math3.exception.NotFiniteNumberException; 030 import org.apache.commons.math3.exception.util.LocalizedFormats; 031 import org.apache.commons.math3.util.FastMath; 032 import org.apache.commons.math3.util.MathUtils; 033 import org.apache.commons.math3.util.MathArrays; 034 035 /** 036 * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression"> 037 * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of 038 * real univariate functions. 039 * <p/> 040 * For reference, see 041 * <a href="http://www.math.tau.ac.il/~yekutiel/MA seminar/Cleveland 1979.pdf"> 042 * William S. Cleveland - Robust Locally Weighted Regression and Smoothing 043 * Scatterplots</a> 044 * <p/> 045 * This class implements both the loess method and serves as an interpolation 046 * adapter to it, allowing one to build a spline on the obtained loess fit. 047 * 048 * @version $Id: LoessInterpolator.java 1379904 2012-09-01 23:54:52Z erans $ 049 * @since 2.0 050 */ 051 public class LoessInterpolator 052 implements UnivariateInterpolator, Serializable { 053 /** Default value of the bandwidth parameter. */ 054 public static final double DEFAULT_BANDWIDTH = 0.3; 055 /** Default value of the number of robustness iterations. */ 056 public static final int DEFAULT_ROBUSTNESS_ITERS = 2; 057 /** 058 * Default value for accuracy. 059 * @since 2.1 060 */ 061 public static final double DEFAULT_ACCURACY = 1e-12; 062 /** serializable version identifier. */ 063 private static final long serialVersionUID = 5204927143605193821L; 064 /** 065 * The bandwidth parameter: when computing the loess fit at 066 * a particular point, this fraction of source points closest 067 * to the current point is taken into account for computing 068 * a least-squares regression. 069 * <p/> 070 * A sensible value is usually 0.25 to 0.5. 071 */ 072 private final double bandwidth; 073 /** 074 * The number of robustness iterations parameter: this many 075 * robustness iterations are done. 076 * <p/> 077 * A sensible value is usually 0 (just the initial fit without any 078 * robustness iterations) to 4. 079 */ 080 private final int robustnessIters; 081 /** 082 * If the median residual at a certain robustness iteration 083 * is less than this amount, no more iterations are done. 084 */ 085 private final double accuracy; 086 087 /** 088 * Constructs a new {@link LoessInterpolator} 089 * with a bandwidth of {@link #DEFAULT_BANDWIDTH}, 090 * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations 091 * and an accuracy of {#link #DEFAULT_ACCURACY}. 092 * See {@link #LoessInterpolator(double, int, double)} for an explanation of 093 * the parameters. 094 */ 095 public LoessInterpolator() { 096 this.bandwidth = DEFAULT_BANDWIDTH; 097 this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS; 098 this.accuracy = DEFAULT_ACCURACY; 099 } 100 101 /** 102 * Construct a new {@link LoessInterpolator} 103 * with given bandwidth and number of robustness iterations. 104 * <p> 105 * Calling this constructor is equivalent to calling {link {@link 106 * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth, 107 * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)} 108 * </p> 109 * 110 * @param bandwidth when computing the loess fit at 111 * a particular point, this fraction of source points closest 112 * to the current point is taken into account for computing 113 * a least-squares regression.</br> 114 * A sensible value is usually 0.25 to 0.5, the default value is 115 * {@link #DEFAULT_BANDWIDTH}. 116 * @param robustnessIters This many robustness iterations are done.</br> 117 * A sensible value is usually 0 (just the initial fit without any 118 * robustness iterations) to 4, the default value is 119 * {@link #DEFAULT_ROBUSTNESS_ITERS}. 120 121 * @see #LoessInterpolator(double, int, double) 122 */ 123 public LoessInterpolator(double bandwidth, int robustnessIters) { 124 this(bandwidth, robustnessIters, DEFAULT_ACCURACY); 125 } 126 127 /** 128 * Construct a new {@link LoessInterpolator} 129 * with given bandwidth, number of robustness iterations and accuracy. 130 * 131 * @param bandwidth when computing the loess fit at 132 * a particular point, this fraction of source points closest 133 * to the current point is taken into account for computing 134 * a least-squares regression.</br> 135 * A sensible value is usually 0.25 to 0.5, the default value is 136 * {@link #DEFAULT_BANDWIDTH}. 137 * @param robustnessIters This many robustness iterations are done.</br> 138 * A sensible value is usually 0 (just the initial fit without any 139 * robustness iterations) to 4, the default value is 140 * {@link #DEFAULT_ROBUSTNESS_ITERS}. 141 * @param accuracy If the median residual at a certain robustness iteration 142 * is less than this amount, no more iterations are done. 143 * @throws OutOfRangeException if bandwidth does not lie in the interval [0,1]. 144 * @throws NotPositiveException if {@code robustnessIters} is negative. 145 * @see #LoessInterpolator(double, int) 146 * @since 2.1 147 */ 148 public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy) 149 throws OutOfRangeException, 150 NotPositiveException { 151 if (bandwidth < 0 || 152 bandwidth > 1) { 153 throw new OutOfRangeException(LocalizedFormats.BANDWIDTH, bandwidth, 0, 1); 154 } 155 this.bandwidth = bandwidth; 156 if (robustnessIters < 0) { 157 throw new NotPositiveException(LocalizedFormats.ROBUSTNESS_ITERATIONS, robustnessIters); 158 } 159 this.robustnessIters = robustnessIters; 160 this.accuracy = accuracy; 161 } 162 163 /** 164 * Compute an interpolating function by performing a loess fit 165 * on the data at the original abscissae and then building a cubic spline 166 * with a 167 * {@link org.apache.commons.math3.analysis.interpolation.SplineInterpolator} 168 * on the resulting fit. 169 * 170 * @param xval the arguments for the interpolation points 171 * @param yval the values for the interpolation points 172 * @return A cubic spline built upon a loess fit to the data at the original abscissae 173 * @throws NonMonotonicSequenceException if {@code xval} not sorted in 174 * strictly increasing order. 175 * @throws DimensionMismatchException if {@code xval} and {@code yval} have 176 * different sizes. 177 * @throws NoDataException if {@code xval} or {@code yval} has zero size. 178 * @throws NotFiniteNumberException if any of the arguments and values are 179 * not finite real numbers. 180 * @throws NumberIsTooSmallException if the bandwidth is too small to 181 * accomodate the size of the input data (i.e. the bandwidth must be 182 * larger than 2/n). 183 */ 184 public final PolynomialSplineFunction interpolate(final double[] xval, 185 final double[] yval) 186 throws NonMonotonicSequenceException, 187 DimensionMismatchException, 188 NoDataException, 189 NotFiniteNumberException, 190 NumberIsTooSmallException { 191 return new SplineInterpolator().interpolate(xval, smooth(xval, yval)); 192 } 193 194 /** 195 * Compute a weighted loess fit on the data at the original abscissae. 196 * 197 * @param xval Arguments for the interpolation points. 198 * @param yval Values for the interpolation points. 199 * @param weights point weights: coefficients by which the robustness weight 200 * of a point is multiplied. 201 * @return the values of the loess fit at corresponding original abscissae. 202 * @throws NonMonotonicSequenceException if {@code xval} not sorted in 203 * strictly increasing order. 204 * @throws DimensionMismatchException if {@code xval} and {@code yval} have 205 * different sizes. 206 * @throws NoDataException if {@code xval} or {@code yval} has zero size. 207 * @throws NotFiniteNumberException if any of the arguments and values are 208 not finite real numbers. 209 * @throws NumberIsTooSmallException if the bandwidth is too small to 210 * accomodate the size of the input data (i.e. the bandwidth must be 211 * larger than 2/n). 212 * @since 2.1 213 */ 214 public final double[] smooth(final double[] xval, final double[] yval, 215 final double[] weights) 216 throws NonMonotonicSequenceException, 217 DimensionMismatchException, 218 NoDataException, 219 NotFiniteNumberException, 220 NumberIsTooSmallException { 221 if (xval.length != yval.length) { 222 throw new DimensionMismatchException(xval.length, yval.length); 223 } 224 225 final int n = xval.length; 226 227 if (n == 0) { 228 throw new NoDataException(); 229 } 230 231 checkAllFiniteReal(xval); 232 checkAllFiniteReal(yval); 233 checkAllFiniteReal(weights); 234 235 MathArrays.checkOrder(xval); 236 237 if (n == 1) { 238 return new double[]{yval[0]}; 239 } 240 241 if (n == 2) { 242 return new double[]{yval[0], yval[1]}; 243 } 244 245 int bandwidthInPoints = (int) (bandwidth * n); 246 247 if (bandwidthInPoints < 2) { 248 throw new NumberIsTooSmallException(LocalizedFormats.BANDWIDTH, 249 bandwidthInPoints, 2, true); 250 } 251 252 final double[] res = new double[n]; 253 254 final double[] residuals = new double[n]; 255 final double[] sortedResiduals = new double[n]; 256 257 final double[] robustnessWeights = new double[n]; 258 259 // Do an initial fit and 'robustnessIters' robustness iterations. 260 // This is equivalent to doing 'robustnessIters+1' robustness iterations 261 // starting with all robustness weights set to 1. 262 Arrays.fill(robustnessWeights, 1); 263 264 for (int iter = 0; iter <= robustnessIters; ++iter) { 265 final int[] bandwidthInterval = {0, bandwidthInPoints - 1}; 266 // At each x, compute a local weighted linear regression 267 for (int i = 0; i < n; ++i) { 268 final double x = xval[i]; 269 270 // Find out the interval of source points on which 271 // a regression is to be made. 272 if (i > 0) { 273 updateBandwidthInterval(xval, weights, i, bandwidthInterval); 274 } 275 276 final int ileft = bandwidthInterval[0]; 277 final int iright = bandwidthInterval[1]; 278 279 // Compute the point of the bandwidth interval that is 280 // farthest from x 281 final int edge; 282 if (xval[i] - xval[ileft] > xval[iright] - xval[i]) { 283 edge = ileft; 284 } else { 285 edge = iright; 286 } 287 288 // Compute a least-squares linear fit weighted by 289 // the product of robustness weights and the tricube 290 // weight function. 291 // See http://en.wikipedia.org/wiki/Linear_regression 292 // (section "Univariate linear case") 293 // and http://en.wikipedia.org/wiki/Weighted_least_squares 294 // (section "Weighted least squares") 295 double sumWeights = 0; 296 double sumX = 0; 297 double sumXSquared = 0; 298 double sumY = 0; 299 double sumXY = 0; 300 double denom = FastMath.abs(1.0 / (xval[edge] - x)); 301 for (int k = ileft; k <= iright; ++k) { 302 final double xk = xval[k]; 303 final double yk = yval[k]; 304 final double dist = (k < i) ? x - xk : xk - x; 305 final double w = tricube(dist * denom) * robustnessWeights[k] * weights[k]; 306 final double xkw = xk * w; 307 sumWeights += w; 308 sumX += xkw; 309 sumXSquared += xk * xkw; 310 sumY += yk * w; 311 sumXY += yk * xkw; 312 } 313 314 final double meanX = sumX / sumWeights; 315 final double meanY = sumY / sumWeights; 316 final double meanXY = sumXY / sumWeights; 317 final double meanXSquared = sumXSquared / sumWeights; 318 319 final double beta; 320 if (FastMath.sqrt(FastMath.abs(meanXSquared - meanX * meanX)) < accuracy) { 321 beta = 0; 322 } else { 323 beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX); 324 } 325 326 final double alpha = meanY - beta * meanX; 327 328 res[i] = beta * x + alpha; 329 residuals[i] = FastMath.abs(yval[i] - res[i]); 330 } 331 332 // No need to recompute the robustness weights at the last 333 // iteration, they won't be needed anymore 334 if (iter == robustnessIters) { 335 break; 336 } 337 338 // Recompute the robustness weights. 339 340 // Find the median residual. 341 // An arraycopy and a sort are completely tractable here, 342 // because the preceding loop is a lot more expensive 343 System.arraycopy(residuals, 0, sortedResiduals, 0, n); 344 Arrays.sort(sortedResiduals); 345 final double medianResidual = sortedResiduals[n / 2]; 346 347 if (FastMath.abs(medianResidual) < accuracy) { 348 break; 349 } 350 351 for (int i = 0; i < n; ++i) { 352 final double arg = residuals[i] / (6 * medianResidual); 353 if (arg >= 1) { 354 robustnessWeights[i] = 0; 355 } else { 356 final double w = 1 - arg * arg; 357 robustnessWeights[i] = w * w; 358 } 359 } 360 } 361 362 return res; 363 } 364 365 /** 366 * Compute a loess fit on the data at the original abscissae. 367 * 368 * @param xval the arguments for the interpolation points 369 * @param yval the values for the interpolation points 370 * @return values of the loess fit at corresponding original abscissae 371 * @throws NonMonotonicSequenceException if {@code xval} not sorted in 372 * strictly increasing order. 373 * @throws DimensionMismatchException if {@code xval} and {@code yval} have 374 * different sizes. 375 * @throws NoDataException if {@code xval} or {@code yval} has zero size. 376 * @throws NotFiniteNumberException if any of the arguments and values are 377 * not finite real numbers. 378 * @throws NumberIsTooSmallException if the bandwidth is too small to 379 * accomodate the size of the input data (i.e. the bandwidth must be 380 * larger than 2/n). 381 */ 382 public final double[] smooth(final double[] xval, final double[] yval) 383 throws NonMonotonicSequenceException, 384 DimensionMismatchException, 385 NoDataException, 386 NotFiniteNumberException, 387 NumberIsTooSmallException { 388 if (xval.length != yval.length) { 389 throw new DimensionMismatchException(xval.length, yval.length); 390 } 391 392 final double[] unitWeights = new double[xval.length]; 393 Arrays.fill(unitWeights, 1.0); 394 395 return smooth(xval, yval, unitWeights); 396 } 397 398 /** 399 * Given an index interval into xval that embraces a certain number of 400 * points closest to {@code xval[i-1]}, update the interval so that it 401 * embraces the same number of points closest to {@code xval[i]}, 402 * ignoring zero weights. 403 * 404 * @param xval Arguments array. 405 * @param weights Weights array. 406 * @param i Index around which the new interval should be computed. 407 * @param bandwidthInterval a two-element array {left, right} such that: 408 * {@code (left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])} 409 * and 410 * {@code (right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])}. 411 * The array will be updated. 412 */ 413 private static void updateBandwidthInterval(final double[] xval, final double[] weights, 414 final int i, 415 final int[] bandwidthInterval) { 416 final int left = bandwidthInterval[0]; 417 final int right = bandwidthInterval[1]; 418 419 // The right edge should be adjusted if the next point to the right 420 // is closer to xval[i] than the leftmost point of the current interval 421 int nextRight = nextNonzero(weights, right); 422 if (nextRight < xval.length && xval[nextRight] - xval[i] < xval[i] - xval[left]) { 423 int nextLeft = nextNonzero(weights, bandwidthInterval[0]); 424 bandwidthInterval[0] = nextLeft; 425 bandwidthInterval[1] = nextRight; 426 } 427 } 428 429 /** 430 * Return the smallest index {@code j} such that 431 * {@code j > i && (j == weights.length || weights[j] != 0)}. 432 * 433 * @param weights Weights array. 434 * @param i Index from which to start search. 435 * @return the smallest compliant index. 436 */ 437 private static int nextNonzero(final double[] weights, final int i) { 438 int j = i + 1; 439 while(j < weights.length && weights[j] == 0) { 440 ++j; 441 } 442 return j; 443 } 444 445 /** 446 * Compute the 447 * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a> 448 * weight function 449 * 450 * @param x Argument. 451 * @return <code>(1 - |x|<sup>3</sup>)<sup>3</sup></code> for |x| < 1, 0 otherwise. 452 */ 453 private static double tricube(final double x) { 454 final double absX = FastMath.abs(x); 455 if (absX >= 1.0) { 456 return 0.0; 457 } 458 final double tmp = 1 - absX * absX * absX; 459 return tmp * tmp * tmp; 460 } 461 462 /** 463 * Check that all elements of an array are finite real numbers. 464 * 465 * @param values Values array. 466 * @throws org.apache.commons.math3.exception.NotFiniteNumberException 467 * if one of the values is not a finite real number. 468 */ 469 private static void checkAllFiniteReal(final double[] values) { 470 for (int i = 0; i < values.length; i++) { 471 MathUtils.checkFinite(values[i]); 472 } 473 } 474 }