001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.optimization.fitting;
019    
020    import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer;
021    import org.apache.commons.math3.analysis.function.HarmonicOscillator;
022    import org.apache.commons.math3.exception.ZeroException;
023    import org.apache.commons.math3.exception.NumberIsTooSmallException;
024    import org.apache.commons.math3.exception.MathIllegalStateException;
025    import org.apache.commons.math3.exception.util.LocalizedFormats;
026    import org.apache.commons.math3.util.FastMath;
027    
028    /**
029     * Class that implements a curve fitting specialized for sinusoids.
030     *
031     * Harmonic fitting is a very simple case of curve fitting. The
032     * estimated coefficients are the amplitude a, the pulsation ω and
033     * the phase &phi;: <code>f (t) = a cos (&omega; t + &phi;)</code>. They are
034     * searched by a least square estimator initialized with a rough guess
035     * based on integrals.
036     *
037     * @version $Id: HarmonicFitter.java 1422230 2012-12-15 12:11:13Z erans $
038     * @deprecated As of 3.1 (to be removed in 4.0).
039     * @since 2.0
040     */
041    @Deprecated
042    public class HarmonicFitter extends CurveFitter<HarmonicOscillator.Parametric> {
043        /**
044         * Simple constructor.
045         * @param optimizer Optimizer to use for the fitting.
046         */
047        public HarmonicFitter(final DifferentiableMultivariateVectorOptimizer optimizer) {
048            super(optimizer);
049        }
050    
051        /**
052         * Fit an harmonic function to the observed points.
053         *
054         * @param initialGuess First guess values in the following order:
055         * <ul>
056         *  <li>Amplitude</li>
057         *  <li>Angular frequency</li>
058         *  <li>Phase</li>
059         * </ul>
060         * @return the parameters of the harmonic function that best fits the
061         * observed points (in the same order as above).
062         */
063        public double[] fit(double[] initialGuess) {
064            return fit(new HarmonicOscillator.Parametric(), initialGuess);
065        }
066    
067        /**
068         * Fit an harmonic function to the observed points.
069         * An initial guess will be automatically computed.
070         *
071         * @return the parameters of the harmonic function that best fits the
072         * observed points (see the other {@link #fit(double[]) fit} method.
073         * @throws NumberIsTooSmallException if the sample is too short for the
074         * the first guess to be computed.
075         * @throws ZeroException if the first guess cannot be computed because
076         * the abscissa range is zero.
077         */
078        public double[] fit() {
079            return fit((new ParameterGuesser(getObservations())).guess());
080        }
081    
082        /**
083         * This class guesses harmonic coefficients from a sample.
084         * <p>The algorithm used to guess the coefficients is as follows:</p>
085         *
086         * <p>We know f (t) at some sampling points t<sub>i</sub> and want to find a,
087         * &omega; and &phi; such that f (t) = a cos (&omega; t + &phi;).
088         * </p>
089         *
090         * <p>From the analytical expression, we can compute two primitives :
091         * <pre>
092         *     If2  (t) = &int; f<sup>2</sup>  = a<sup>2</sup> &times; [t + S (t)] / 2
093         *     If'2 (t) = &int; f'<sup>2</sup> = a<sup>2</sup> &omega;<sup>2</sup> &times; [t - S (t)] / 2
094         *     where S (t) = sin (2 (&omega; t + &phi;)) / (2 &omega;)
095         * </pre>
096         * </p>
097         *
098         * <p>We can remove S between these expressions :
099         * <pre>
100         *     If'2 (t) = a<sup>2</sup> &omega;<sup>2</sup> t - &omega;<sup>2</sup> If2 (t)
101         * </pre>
102         * </p>
103         *
104         * <p>The preceding expression shows that If'2 (t) is a linear
105         * combination of both t and If2 (t): If'2 (t) = A &times; t + B &times; If2 (t)
106         * </p>
107         *
108         * <p>From the primitive, we can deduce the same form for definite
109         * integrals between t<sub>1</sub> and t<sub>i</sub> for each t<sub>i</sub> :
110         * <pre>
111         *   If2 (t<sub>i</sub>) - If2 (t<sub>1</sub>) = A &times; (t<sub>i</sub> - t<sub>1</sub>) + B &times; (If2 (t<sub>i</sub>) - If2 (t<sub>1</sub>))
112         * </pre>
113         * </p>
114         *
115         * <p>We can find the coefficients A and B that best fit the sample
116         * to this linear expression by computing the definite integrals for
117         * each sample points.
118         * </p>
119         *
120         * <p>For a bilinear expression z (x<sub>i</sub>, y<sub>i</sub>) = A &times; x<sub>i</sub> + B &times; y<sub>i</sub>, the
121         * coefficients A and B that minimize a least square criterion
122         * &sum; (z<sub>i</sub> - z (x<sub>i</sub>, y<sub>i</sub>))<sup>2</sup> are given by these expressions:</p>
123         * <pre>
124         *
125         *         &sum;y<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
126         *     A = ------------------------
127         *         &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>y<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>y<sub>i</sub>
128         *
129         *         &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub>
130         *     B = ------------------------
131         *         &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>y<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>y<sub>i</sub>
132         * </pre>
133         * </p>
134         *
135         *
136         * <p>In fact, we can assume both a and &omega; are positive and
137         * compute them directly, knowing that A = a<sup>2</sup> &omega;<sup>2</sup> and that
138         * B = - &omega;<sup>2</sup>. The complete algorithm is therefore:</p>
139         * <pre>
140         *
141         * for each t<sub>i</sub> from t<sub>1</sub> to t<sub>n-1</sub>, compute:
142         *   f  (t<sub>i</sub>)
143         *   f' (t<sub>i</sub>) = (f (t<sub>i+1</sub>) - f(t<sub>i-1</sub>)) / (t<sub>i+1</sub> - t<sub>i-1</sub>)
144         *   x<sub>i</sub> = t<sub>i</sub> - t<sub>1</sub>
145         *   y<sub>i</sub> = &int; f<sup>2</sup> from t<sub>1</sub> to t<sub>i</sub>
146         *   z<sub>i</sub> = &int; f'<sup>2</sup> from t<sub>1</sub> to t<sub>i</sub>
147         *   update the sums &sum;x<sub>i</sub>x<sub>i</sub>, &sum;y<sub>i</sub>y<sub>i</sub>, &sum;x<sub>i</sub>y<sub>i</sub>, &sum;x<sub>i</sub>z<sub>i</sub> and &sum;y<sub>i</sub>z<sub>i</sub>
148         * end for
149         *
150         *            |--------------------------
151         *         \  | &sum;y<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
152         * a     =  \ | ------------------------
153         *           \| &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
154         *
155         *
156         *            |--------------------------
157         *         \  | &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
158         * &omega;     =  \ | ------------------------
159         *           \| &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>y<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>y<sub>i</sub>
160         *
161         * </pre>
162         * </p>
163         *
164         * <p>Once we know &omega;, we can compute:
165         * <pre>
166         *    fc = &omega; f (t) cos (&omega; t) - f' (t) sin (&omega; t)
167         *    fs = &omega; f (t) sin (&omega; t) + f' (t) cos (&omega; t)
168         * </pre>
169         * </p>
170         *
171         * <p>It appears that <code>fc = a &omega; cos (&phi;)</code> and
172         * <code>fs = -a &omega; sin (&phi;)</code>, so we can use these
173         * expressions to compute &phi;. The best estimate over the sample is
174         * given by averaging these expressions.
175         * </p>
176         *
177         * <p>Since integrals and means are involved in the preceding
178         * estimations, these operations run in O(n) time, where n is the
179         * number of measurements.</p>
180         */
181        public static class ParameterGuesser {
182            /** Amplitude. */
183            private final double a;
184            /** Angular frequency. */
185            private final double omega;
186            /** Phase. */
187            private final double phi;
188    
189            /**
190             * Simple constructor.
191             *
192             * @param observations Sampled observations.
193             * @throws NumberIsTooSmallException if the sample is too short.
194             * @throws ZeroException if the abscissa range is zero.
195             * @throws MathIllegalStateException when the guessing procedure cannot
196             * produce sensible results.
197             */
198            public ParameterGuesser(WeightedObservedPoint[] observations) {
199                if (observations.length < 4) {
200                    throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE,
201                                                        observations.length, 4, true);
202                }
203    
204                final WeightedObservedPoint[] sorted = sortObservations(observations);
205    
206                final double aOmega[] = guessAOmega(sorted);
207                a = aOmega[0];
208                omega = aOmega[1];
209    
210                phi = guessPhi(sorted);
211            }
212    
213            /**
214             * Gets an estimation of the parameters.
215             *
216             * @return the guessed parameters, in the following order:
217             * <ul>
218             *  <li>Amplitude</li>
219             *  <li>Angular frequency</li>
220             *  <li>Phase</li>
221             * </ul>
222             */
223            public double[] guess() {
224                return new double[] { a, omega, phi };
225            }
226    
227            /**
228             * Sort the observations with respect to the abscissa.
229             *
230             * @param unsorted Input observations.
231             * @return the input observations, sorted.
232             */
233            private WeightedObservedPoint[] sortObservations(WeightedObservedPoint[] unsorted) {
234                final WeightedObservedPoint[] observations = unsorted.clone();
235    
236                // Since the samples are almost always already sorted, this
237                // method is implemented as an insertion sort that reorders the
238                // elements in place. Insertion sort is very efficient in this case.
239                WeightedObservedPoint curr = observations[0];
240                for (int j = 1; j < observations.length; ++j) {
241                    WeightedObservedPoint prec = curr;
242                    curr = observations[j];
243                    if (curr.getX() < prec.getX()) {
244                        // the current element should be inserted closer to the beginning
245                        int i = j - 1;
246                        WeightedObservedPoint mI = observations[i];
247                        while ((i >= 0) && (curr.getX() < mI.getX())) {
248                            observations[i + 1] = mI;
249                            if (i-- != 0) {
250                                mI = observations[i];
251                            }
252                        }
253                        observations[i + 1] = curr;
254                        curr = observations[j];
255                    }
256                }
257    
258                return observations;
259            }
260    
261            /**
262             * Estimate a first guess of the amplitude and angular frequency.
263             * This method assumes that the {@link #sortObservations()} method
264             * has been called previously.
265             *
266             * @param observations Observations, sorted w.r.t. abscissa.
267             * @throws ZeroException if the abscissa range is zero.
268             * @throws MathIllegalStateException when the guessing procedure cannot
269             * produce sensible results.
270             * @return the guessed amplitude (at index 0) and circular frequency
271             * (at index 1).
272             */
273            private double[] guessAOmega(WeightedObservedPoint[] observations) {
274                final double[] aOmega = new double[2];
275    
276                // initialize the sums for the linear model between the two integrals
277                double sx2 = 0;
278                double sy2 = 0;
279                double sxy = 0;
280                double sxz = 0;
281                double syz = 0;
282    
283                double currentX = observations[0].getX();
284                double currentY = observations[0].getY();
285                double f2Integral = 0;
286                double fPrime2Integral = 0;
287                final double startX = currentX;
288                for (int i = 1; i < observations.length; ++i) {
289                    // one step forward
290                    final double previousX = currentX;
291                    final double previousY = currentY;
292                    currentX = observations[i].getX();
293                    currentY = observations[i].getY();
294    
295                    // update the integrals of f<sup>2</sup> and f'<sup>2</sup>
296                    // considering a linear model for f (and therefore constant f')
297                    final double dx = currentX - previousX;
298                    final double dy = currentY - previousY;
299                    final double f2StepIntegral =
300                        dx * (previousY * previousY + previousY * currentY + currentY * currentY) / 3;
301                    final double fPrime2StepIntegral = dy * dy / dx;
302    
303                    final double x = currentX - startX;
304                    f2Integral += f2StepIntegral;
305                    fPrime2Integral += fPrime2StepIntegral;
306    
307                    sx2 += x * x;
308                    sy2 += f2Integral * f2Integral;
309                    sxy += x * f2Integral;
310                    sxz += x * fPrime2Integral;
311                    syz += f2Integral * fPrime2Integral;
312                }
313    
314                // compute the amplitude and pulsation coefficients
315                double c1 = sy2 * sxz - sxy * syz;
316                double c2 = sxy * sxz - sx2 * syz;
317                double c3 = sx2 * sy2 - sxy * sxy;
318                if ((c1 / c2 < 0) || (c2 / c3 < 0)) {
319                    final int last = observations.length - 1;
320                    // Range of the observations, assuming that the
321                    // observations are sorted.
322                    final double xRange = observations[last].getX() - observations[0].getX();
323                    if (xRange == 0) {
324                        throw new ZeroException();
325                    }
326                    aOmega[1] = 2 * Math.PI / xRange;
327    
328                    double yMin = Double.POSITIVE_INFINITY;
329                    double yMax = Double.NEGATIVE_INFINITY;
330                    for (int i = 1; i < observations.length; ++i) {
331                        final double y = observations[i].getY();
332                        if (y < yMin) {
333                            yMin = y;
334                        }
335                        if (y > yMax) {
336                            yMax = y;
337                        }
338                    }
339                    aOmega[0] = 0.5 * (yMax - yMin);
340                } else {
341                    if (c2 == 0) {
342                        // In some ill-conditioned cases (cf. MATH-844), the guesser
343                        // procedure cannot produce sensible results.
344                        throw new MathIllegalStateException(LocalizedFormats.ZERO_DENOMINATOR);
345                    }
346    
347                    aOmega[0] = FastMath.sqrt(c1 / c2);
348                    aOmega[1] = FastMath.sqrt(c2 / c3);
349                }
350    
351                return aOmega;
352            }
353    
354            /**
355             * Estimate a first guess of the phase.
356             *
357             * @param observations Observations, sorted w.r.t. abscissa.
358             * @return the guessed phase.
359             */
360            private double guessPhi(WeightedObservedPoint[] observations) {
361                // initialize the means
362                double fcMean = 0;
363                double fsMean = 0;
364    
365                double currentX = observations[0].getX();
366                double currentY = observations[0].getY();
367                for (int i = 1; i < observations.length; ++i) {
368                    // one step forward
369                    final double previousX = currentX;
370                    final double previousY = currentY;
371                    currentX = observations[i].getX();
372                    currentY = observations[i].getY();
373                    final double currentYPrime = (currentY - previousY) / (currentX - previousX);
374    
375                    double omegaX = omega * currentX;
376                    double cosine = FastMath.cos(omegaX);
377                    double sine = FastMath.sin(omegaX);
378                    fcMean += omega * currentY * cosine - currentYPrime * sine;
379                    fsMean += omega * currentY * sine + currentYPrime * cosine;
380                }
381    
382                return FastMath.atan2(-fsMean, fcMean);
383            }
384        }
385    }