001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.stat.descriptive;
019    
020    import java.io.Serializable;
021    import java.util.Collection;
022    import java.util.Iterator;
023    
024    import org.apache.commons.math3.exception.NullArgumentException;
025    
026    /**
027     * <p>
028     * An aggregator for {@code SummaryStatistics} from several data sets or
029     * data set partitions.  In its simplest usage mode, the client creates an
030     * instance via the zero-argument constructor, then uses
031     * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
032     * for each individual data set / partition.  The per-set statistics objects
033     * are used as normal, and at any time the aggregate statistics for all the
034     * contributors can be obtained from this object.
035     * </p><p>
036     * Clients with specialized requirements can use alternative constructors to
037     * control the statistics implementations and initial values used by the
038     * contributing and the internal aggregate {@code SummaryStatistics} objects.
039     * </p><p>
040     * A static {@link #aggregate(Collection)} method is also included that computes
041     * aggregate statistics directly from a Collection of SummaryStatistics instances.
042     * </p><p>
043     * When {@link #createContributingStatistics()} is used to create SummaryStatistics
044     * instances to be aggregated concurrently, the created instances'
045     * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
046     * instance maintained by this class.  In multithreaded environments, if the functionality
047     * provided by {@link #aggregate(Collection)} is adequate, that method should be used
048     * to avoid unnecessary computation and synchronization delays.</p>
049     *
050     * @since 2.0
051     * @version $Id: AggregateSummaryStatistics.java 1416643 2012-12-03 19:37:14Z tn $
052     *
053     */
054    public class AggregateSummaryStatistics implements StatisticalSummary,
055            Serializable {
056    
057    
058        /** Serializable version identifier */
059        private static final long serialVersionUID = -8207112444016386906L;
060    
061        /**
062         * A SummaryStatistics serving as a prototype for creating SummaryStatistics
063         * contributing to this aggregate
064         */
065        private final SummaryStatistics statisticsPrototype;
066    
067        /**
068         * The SummaryStatistics in which aggregate statistics are accumulated.
069         */
070        private final SummaryStatistics statistics;
071    
072        /**
073         * Initializes a new AggregateSummaryStatistics with default statistics
074         * implementations.
075         *
076         */
077        public AggregateSummaryStatistics() {
078            // No try-catch or throws NAE because arg is guaranteed non-null
079            this(new SummaryStatistics());
080        }
081    
082        /**
083         * Initializes a new AggregateSummaryStatistics with the specified statistics
084         * object as a prototype for contributing statistics and for the internal
085         * aggregate statistics.  This provides for customized statistics implementations
086         * to be used by contributing and aggregate statistics.
087         *
088         * @param prototypeStatistics a {@code SummaryStatistics} serving as a
089         *      prototype both for the internal aggregate statistics and for
090         *      contributing statistics obtained via the
091         *      {@code createContributingStatistics()} method.  Being a prototype
092         *      means that other objects are initialized by copying this object's state.
093         *      If {@code null}, a new, default statistics object is used.  Any statistic
094         *      values in the prototype are propagated to contributing statistics
095         *      objects and (once) into these aggregate statistics.
096         * @throws NullArgumentException if prototypeStatistics is null
097         * @see #createContributingStatistics()
098         */
099        public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
100            this(prototypeStatistics,
101                 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
102        }
103    
104        /**
105         * Initializes a new AggregateSummaryStatistics with the specified statistics
106         * object as a prototype for contributing statistics and for the internal
107         * aggregate statistics.  This provides for different statistics implementations
108         * to be used by contributing and aggregate statistics and for an initial
109         * state to be supplied for the aggregate statistics.
110         *
111         * @param prototypeStatistics a {@code SummaryStatistics} serving as a
112         *      prototype both for the internal aggregate statistics and for
113         *      contributing statistics obtained via the
114         *      {@code createContributingStatistics()} method.  Being a prototype
115         *      means that other objects are initialized by copying this object's state.
116         *      If {@code null}, a new, default statistics object is used.  Any statistic
117         *      values in the prototype are propagated to contributing statistics
118         *      objects, but not into these aggregate statistics.
119         * @param initialStatistics a {@code SummaryStatistics} to serve as the
120         *      internal aggregate statistics object.  If {@code null}, a new, default
121         *      statistics object is used.
122         * @see #createContributingStatistics()
123         */
124        public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
125                                          SummaryStatistics initialStatistics) {
126            this.statisticsPrototype =
127                (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
128            this.statistics =
129                (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
130        }
131    
132        /**
133         * {@inheritDoc}.  This version returns the maximum over all the aggregated
134         * data.
135         *
136         * @see StatisticalSummary#getMax()
137         */
138        public double getMax() {
139            synchronized (statistics) {
140                return statistics.getMax();
141            }
142        }
143    
144        /**
145         * {@inheritDoc}.  This version returns the mean of all the aggregated data.
146         *
147         * @see StatisticalSummary#getMean()
148         */
149        public double getMean() {
150            synchronized (statistics) {
151                return statistics.getMean();
152            }
153        }
154    
155        /**
156         * {@inheritDoc}.  This version returns the minimum over all the aggregated
157         * data.
158         *
159         * @see StatisticalSummary#getMin()
160         */
161        public double getMin() {
162            synchronized (statistics) {
163                return statistics.getMin();
164            }
165        }
166    
167        /**
168         * {@inheritDoc}.  This version returns a count of all the aggregated data.
169         *
170         * @see StatisticalSummary#getN()
171         */
172        public long getN() {
173            synchronized (statistics) {
174                return statistics.getN();
175            }
176        }
177    
178        /**
179         * {@inheritDoc}.  This version returns the standard deviation of all the
180         * aggregated data.
181         *
182         * @see StatisticalSummary#getStandardDeviation()
183         */
184        public double getStandardDeviation() {
185            synchronized (statistics) {
186                return statistics.getStandardDeviation();
187            }
188        }
189    
190        /**
191         * {@inheritDoc}.  This version returns a sum of all the aggregated data.
192         *
193         * @see StatisticalSummary#getSum()
194         */
195        public double getSum() {
196            synchronized (statistics) {
197                return statistics.getSum();
198            }
199        }
200    
201        /**
202         * {@inheritDoc}.  This version returns the variance of all the aggregated
203         * data.
204         *
205         * @see StatisticalSummary#getVariance()
206         */
207        public double getVariance() {
208            synchronized (statistics) {
209                return statistics.getVariance();
210            }
211        }
212    
213        /**
214         * Returns the sum of the logs of all the aggregated data.
215         *
216         * @return the sum of logs
217         * @see SummaryStatistics#getSumOfLogs()
218         */
219        public double getSumOfLogs() {
220            synchronized (statistics) {
221                return statistics.getSumOfLogs();
222            }
223        }
224    
225        /**
226         * Returns the geometric mean of all the aggregated data.
227         *
228         * @return the geometric mean
229         * @see SummaryStatistics#getGeometricMean()
230         */
231        public double getGeometricMean() {
232            synchronized (statistics) {
233                return statistics.getGeometricMean();
234            }
235        }
236    
237        /**
238         * Returns the sum of the squares of all the aggregated data.
239         *
240         * @return The sum of squares
241         * @see SummaryStatistics#getSumsq()
242         */
243        public double getSumsq() {
244            synchronized (statistics) {
245                return statistics.getSumsq();
246            }
247        }
248    
249        /**
250         * Returns a statistic related to the Second Central Moment.  Specifically,
251         * what is returned is the sum of squared deviations from the sample mean
252         * among the all of the aggregated data.
253         *
254         * @return second central moment statistic
255         * @see SummaryStatistics#getSecondMoment()
256         */
257        public double getSecondMoment() {
258            synchronized (statistics) {
259                return statistics.getSecondMoment();
260            }
261        }
262    
263        /**
264         * Return a {@link StatisticalSummaryValues} instance reporting current
265         * aggregate statistics.
266         *
267         * @return Current values of aggregate statistics
268         */
269        public StatisticalSummary getSummary() {
270            synchronized (statistics) {
271                return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
272                        getMax(), getMin(), getSum());
273            }
274        }
275    
276        /**
277         * Creates and returns a {@code SummaryStatistics} whose data will be
278         * aggregated with those of this {@code AggregateSummaryStatistics}.
279         *
280         * @return a {@code SummaryStatistics} whose data will be aggregated with
281         *      those of this {@code AggregateSummaryStatistics}.  The initial state
282         *      is a copy of the configured prototype statistics.
283         */
284        public SummaryStatistics createContributingStatistics() {
285            SummaryStatistics contributingStatistics
286                    = new AggregatingSummaryStatistics(statistics);
287    
288            // No try - catch or advertising NAE because neither argument will ever be null
289            SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
290    
291            return contributingStatistics;
292        }
293    
294        /**
295         * Computes aggregate summary statistics. This method can be used to combine statistics
296         * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
297         * should contain the same values that would have been obtained by computing a single
298         * StatisticalSummary over the combined dataset.
299         * <p>
300         * Returns null if the collection is empty or null.
301         * </p>
302         *
303         * @param statistics collection of SummaryStatistics to aggregate
304         * @return summary statistics for the combined dataset
305         */
306        public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
307            if (statistics == null) {
308                return null;
309            }
310            Iterator<SummaryStatistics> iterator = statistics.iterator();
311            if (!iterator.hasNext()) {
312                return null;
313            }
314            SummaryStatistics current = iterator.next();
315            long n = current.getN();
316            double min = current.getMin();
317            double sum = current.getSum();
318            double max = current.getMax();
319            double m2 = current.getSecondMoment();
320            double mean = current.getMean();
321            while (iterator.hasNext()) {
322                current = iterator.next();
323                if (current.getMin() < min || Double.isNaN(min)) {
324                    min = current.getMin();
325                }
326                if (current.getMax() > max || Double.isNaN(max)) {
327                    max = current.getMax();
328                }
329                sum += current.getSum();
330                final double oldN = n;
331                final double curN = current.getN();
332                n += curN;
333                final double meanDiff = current.getMean() - mean;
334                mean = sum / n;
335                m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
336            }
337            final double variance;
338            if (n == 0) {
339                variance = Double.NaN;
340            } else if (n == 1) {
341                variance = 0d;
342            } else {
343                variance = m2 / (n - 1);
344            }
345            return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
346        }
347    
348        /**
349         * A SummaryStatistics that also forwards all values added to it to a second
350         * {@code SummaryStatistics} for aggregation.
351         *
352         * @since 2.0
353         */
354        private static class AggregatingSummaryStatistics extends SummaryStatistics {
355    
356            /**
357             * The serialization version of this class
358             */
359            private static final long serialVersionUID = 1L;
360    
361            /**
362             * An additional SummaryStatistics into which values added to these
363             * statistics (and possibly others) are aggregated
364             */
365            private final SummaryStatistics aggregateStatistics;
366    
367            /**
368             * Initializes a new AggregatingSummaryStatistics with the specified
369             * aggregate statistics object
370             *
371             * @param aggregateStatistics a {@code SummaryStatistics} into which
372             *      values added to this statistics object should be aggregated
373             */
374            public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
375                this.aggregateStatistics = aggregateStatistics;
376            }
377    
378            /**
379             * {@inheritDoc}.  This version adds the provided value to the configured
380             * aggregate after adding it to these statistics.
381             *
382             * @see SummaryStatistics#addValue(double)
383             */
384            @Override
385            public void addValue(double value) {
386                super.addValue(value);
387                synchronized (aggregateStatistics) {
388                    aggregateStatistics.addValue(value);
389                }
390            }
391    
392            /**
393             * Returns true iff <code>object</code> is a
394             * <code>SummaryStatistics</code> instance and all statistics have the
395             * same values as this.
396             * @param object the object to test equality against.
397             * @return true if object equals this
398             */
399            @Override
400            public boolean equals(Object object) {
401                if (object == this) {
402                    return true;
403                }
404                if (object instanceof AggregatingSummaryStatistics == false) {
405                    return false;
406                }
407                AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
408                return super.equals(stat) &&
409                       aggregateStatistics.equals(stat.aggregateStatistics);
410            }
411    
412            /**
413             * Returns hash code based on values of statistics
414             * @return hash code
415             */
416            @Override
417            public int hashCode() {
418                return 123 + super.hashCode() + aggregateStatistics.hashCode();
419            }
420        }
421    }