001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.distribution;
019    
020    import org.apache.commons.math3.exception.NotPositiveException;
021    import org.apache.commons.math3.exception.NotStrictlyPositiveException;
022    import org.apache.commons.math3.exception.NumberIsTooLargeException;
023    import org.apache.commons.math3.exception.util.LocalizedFormats;
024    import org.apache.commons.math3.util.FastMath;
025    import org.apache.commons.math3.random.RandomGenerator;
026    import org.apache.commons.math3.random.Well19937c;
027    
028    /**
029     * Implementation of the hypergeometric distribution.
030     *
031     * @see <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric distribution (Wikipedia)</a>
032     * @see <a href="http://mathworld.wolfram.com/HypergeometricDistribution.html">Hypergeometric distribution (MathWorld)</a>
033     * @version $Id: HypergeometricDistribution.java 1416643 2012-12-03 19:37:14Z tn $
034     */
035    public class HypergeometricDistribution extends AbstractIntegerDistribution {
036        /** Serializable version identifier. */
037        private static final long serialVersionUID = -436928820673516179L;
038        /** The number of successes in the population. */
039        private final int numberOfSuccesses;
040        /** The population size. */
041        private final int populationSize;
042        /** The sample size. */
043        private final int sampleSize;
044        /** Cached numerical variance */
045        private double numericalVariance = Double.NaN;
046        /** Whether or not the numerical variance has been calculated */
047        private boolean numericalVarianceIsCalculated = false;
048    
049        /**
050         * Construct a new hypergeometric distribution with the specified population
051         * size, number of successes in the population, and sample size.
052         *
053         * @param populationSize Population size.
054         * @param numberOfSuccesses Number of successes in the population.
055         * @param sampleSize Sample size.
056         * @throws NotPositiveException if {@code numberOfSuccesses < 0}.
057         * @throws NotStrictlyPositiveException if {@code populationSize <= 0}.
058         * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize},
059         * or {@code sampleSize > populationSize}.
060         */
061        public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize)
062        throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException {
063            this(new Well19937c(), populationSize, numberOfSuccesses, sampleSize);
064        }
065    
066        /**
067         * Creates a new hypergeometric distribution.
068         *
069         * @param rng Random number generator.
070         * @param populationSize Population size.
071         * @param numberOfSuccesses Number of successes in the population.
072         * @param sampleSize Sample size.
073         * @throws NotPositiveException if {@code numberOfSuccesses < 0}.
074         * @throws NotStrictlyPositiveException if {@code populationSize <= 0}.
075         * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize},
076         * or {@code sampleSize > populationSize}.
077         * @since 3.1
078         */
079        public HypergeometricDistribution(RandomGenerator rng,
080                                          int populationSize,
081                                          int numberOfSuccesses,
082                                          int sampleSize)
083        throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException {
084            super(rng);
085    
086            if (populationSize <= 0) {
087                throw new NotStrictlyPositiveException(LocalizedFormats.POPULATION_SIZE,
088                                                       populationSize);
089            }
090            if (numberOfSuccesses < 0) {
091                throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SUCCESSES,
092                                               numberOfSuccesses);
093            }
094            if (sampleSize < 0) {
095                throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES,
096                                               sampleSize);
097            }
098    
099            if (numberOfSuccesses > populationSize) {
100                throw new NumberIsTooLargeException(LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
101                                                    numberOfSuccesses, populationSize, true);
102            }
103            if (sampleSize > populationSize) {
104                throw new NumberIsTooLargeException(LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
105                                                    sampleSize, populationSize, true);
106            }
107    
108            this.numberOfSuccesses = numberOfSuccesses;
109            this.populationSize = populationSize;
110            this.sampleSize = sampleSize;
111        }
112    
113        /** {@inheritDoc} */
114        public double cumulativeProbability(int x) {
115            double ret;
116    
117            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
118            if (x < domain[0]) {
119                ret = 0.0;
120            } else if (x >= domain[1]) {
121                ret = 1.0;
122            } else {
123                ret = innerCumulativeProbability(domain[0], x, 1);
124            }
125    
126            return ret;
127        }
128    
129        /**
130         * Return the domain for the given hypergeometric distribution parameters.
131         *
132         * @param n Population size.
133         * @param m Number of successes in the population.
134         * @param k Sample size.
135         * @return a two element array containing the lower and upper bounds of the
136         * hypergeometric distribution.
137         */
138        private int[] getDomain(int n, int m, int k) {
139            return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
140        }
141    
142        /**
143         * Return the lowest domain value for the given hypergeometric distribution
144         * parameters.
145         *
146         * @param n Population size.
147         * @param m Number of successes in the population.
148         * @param k Sample size.
149         * @return the lowest domain value of the hypergeometric distribution.
150         */
151        private int getLowerDomain(int n, int m, int k) {
152            return FastMath.max(0, m - (n - k));
153        }
154    
155        /**
156         * Access the number of successes.
157         *
158         * @return the number of successes.
159         */
160        public int getNumberOfSuccesses() {
161            return numberOfSuccesses;
162        }
163    
164        /**
165         * Access the population size.
166         *
167         * @return the population size.
168         */
169        public int getPopulationSize() {
170            return populationSize;
171        }
172    
173        /**
174         * Access the sample size.
175         *
176         * @return the sample size.
177         */
178        public int getSampleSize() {
179            return sampleSize;
180        }
181    
182        /**
183         * Return the highest domain value for the given hypergeometric distribution
184         * parameters.
185         *
186         * @param m Number of successes in the population.
187         * @param k Sample size.
188         * @return the highest domain value of the hypergeometric distribution.
189         */
190        private int getUpperDomain(int m, int k) {
191            return FastMath.min(k, m);
192        }
193    
194        /** {@inheritDoc} */
195        public double probability(int x) {
196            double ret;
197    
198            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
199            if (x < domain[0] || x > domain[1]) {
200                ret = 0.0;
201            } else {
202                double p = (double) sampleSize / (double) populationSize;
203                double q = (double) (populationSize - sampleSize) / (double) populationSize;
204                double p1 = SaddlePointExpansion.logBinomialProbability(x,
205                        numberOfSuccesses, p, q);
206                double p2 =
207                    SaddlePointExpansion.logBinomialProbability(sampleSize - x,
208                        populationSize - numberOfSuccesses, p, q);
209                double p3 =
210                    SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
211                ret = FastMath.exp(p1 + p2 - p3);
212            }
213    
214            return ret;
215        }
216    
217        /**
218         * For this distribution, {@code X}, this method returns {@code P(X >= x)}.
219         *
220         * @param x Value at which the CDF is evaluated.
221         * @return the upper tail CDF for this distribution.
222         * @since 1.1
223         */
224        public double upperCumulativeProbability(int x) {
225            double ret;
226    
227            final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
228            if (x <= domain[0]) {
229                ret = 1.0;
230            } else if (x > domain[1]) {
231                ret = 0.0;
232            } else {
233                ret = innerCumulativeProbability(domain[1], x, -1);
234            }
235    
236            return ret;
237        }
238    
239        /**
240         * For this distribution, {@code X}, this method returns
241         * {@code P(x0 <= X <= x1)}.
242         * This probability is computed by summing the point probabilities for the
243         * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by
244         * {@code dx}.
245         *
246         * @param x0 Inclusive lower bound.
247         * @param x1 Inclusive upper bound.
248         * @param dx Direction of summation (1 indicates summing from x0 to x1, and
249         * 0 indicates summing from x1 to x0).
250         * @return {@code P(x0 <= X <= x1)}.
251         */
252        private double innerCumulativeProbability(int x0, int x1, int dx) {
253            double ret = probability(x0);
254            while (x0 != x1) {
255                x0 += dx;
256                ret += probability(x0);
257            }
258            return ret;
259        }
260    
261        /**
262         * {@inheritDoc}
263         *
264         * For population size {@code N}, number of successes {@code m}, and sample
265         * size {@code n}, the mean is {@code n * m / N}.
266         */
267        public double getNumericalMean() {
268            return (double) (getSampleSize() * getNumberOfSuccesses()) / (double) getPopulationSize();
269        }
270    
271        /**
272         * {@inheritDoc}
273         *
274         * For population size {@code N}, number of successes {@code m}, and sample
275         * size {@code n}, the variance is
276         * {@code [n * m * (N - n) * (N - m)] / [N^2 * (N - 1)]}.
277         */
278        public double getNumericalVariance() {
279            if (!numericalVarianceIsCalculated) {
280                numericalVariance = calculateNumericalVariance();
281                numericalVarianceIsCalculated = true;
282            }
283            return numericalVariance;
284        }
285    
286        /**
287         * Used by {@link #getNumericalVariance()}.
288         *
289         * @return the variance of this distribution
290         */
291        protected double calculateNumericalVariance() {
292            final double N = getPopulationSize();
293            final double m = getNumberOfSuccesses();
294            final double n = getSampleSize();
295            return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
296        }
297    
298        /**
299         * {@inheritDoc}
300         *
301         * For population size {@code N}, number of successes {@code m}, and sample
302         * size {@code n}, the lower bound of the support is
303         * {@code max(0, n + m - N)}.
304         *
305         * @return lower bound of the support
306         */
307        public int getSupportLowerBound() {
308            return FastMath.max(0,
309                                getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
310        }
311    
312        /**
313         * {@inheritDoc}
314         *
315         * For number of successes {@code m} and sample size {@code n}, the upper
316         * bound of the support is {@code min(m, n)}.
317         *
318         * @return upper bound of the support
319         */
320        public int getSupportUpperBound() {
321            return FastMath.min(getNumberOfSuccesses(), getSampleSize());
322        }
323    
324        /**
325         * {@inheritDoc}
326         *
327         * The support of this distribution is connected.
328         *
329         * @return {@code true}
330         */
331        public boolean isSupportConnected() {
332            return true;
333        }
334    }