001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math3.stat.correlation; 018 019 import org.apache.commons.math3.exception.DimensionMismatchException; 020 import org.apache.commons.math3.exception.MathUnsupportedOperationException; 021 import org.apache.commons.math3.exception.NumberIsTooSmallException; 022 import org.apache.commons.math3.linear.MatrixUtils; 023 import org.apache.commons.math3.linear.RealMatrix; 024 025 /** 026 * Covariance implementation that does not require input data to be 027 * stored in memory. The size of the covariance matrix is specified in the 028 * constructor. Specific elements of the matrix are incrementally updated with 029 * calls to incrementRow() or increment Covariance(). 030 * 031 * <p>This class is based on a paper written by Philippe Pébay: 032 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf"> 033 * Formulas for Robust, One-Pass Parallel Computation of Covariances and 034 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212, 035 * Sandia National Laboratories.</p> 036 * 037 * <p>Note: the underlying covariance matrix is symmetric, thus only the 038 * upper triangular part of the matrix is stored and updated each increment.</p> 039 * 040 * @version $Id: StorelessCovariance.java 1410238 2012-11-16 07:58:49Z luc $ 041 * @since 3.0 042 */ 043 public class StorelessCovariance extends Covariance { 044 045 /** the square covariance matrix (upper triangular part) */ 046 private StorelessBivariateCovariance[] covMatrix; 047 048 /** dimension of the square covariance matrix */ 049 private int dimension; 050 051 /** 052 * Create a bias corrected covariance matrix with a given dimension. 053 * 054 * @param dim the dimension of the square covariance matrix 055 */ 056 public StorelessCovariance(final int dim) { 057 this(dim, true); 058 } 059 060 /** 061 * Create a covariance matrix with a given number of rows and columns and the 062 * indicated bias correction. 063 * 064 * @param dim the dimension of the covariance matrix 065 * @param biasCorrected if <code>true</code> the covariance estimate is corrected 066 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction, 067 * i.e. n in the denominator. 068 */ 069 public StorelessCovariance(final int dim, final boolean biasCorrected) { 070 dimension = dim; 071 covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2]; 072 initializeMatrix(biasCorrected); 073 } 074 075 /** 076 * Initialize the internal two-dimensional array of 077 * {@link StorelessBivariateCovariance} instances. 078 * 079 * @param biasCorrected if the covariance estimate shall be corrected for bias 080 */ 081 private void initializeMatrix(final boolean biasCorrected) { 082 for(int i = 0; i < dimension; i++){ 083 for(int j = 0; j < dimension; j++){ 084 setElement(i, j, new StorelessBivariateCovariance(biasCorrected)); 085 } 086 } 087 } 088 089 /** 090 * Returns the index (i, j) translated into the one-dimensional 091 * array used to store the upper triangular part of the symmetric 092 * covariance matrix. 093 * 094 * @param i the row index 095 * @param j the column index 096 * @return the corresponding index in the matrix array 097 */ 098 private int indexOf(final int i, final int j) { 099 return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i; 100 } 101 102 /** 103 * Gets the element at index (i, j) from the covariance matrix 104 * @param i the row index 105 * @param j the column index 106 * @return the {@link StorelessBivariateCovariance} element at the given index 107 */ 108 private StorelessBivariateCovariance getElement(final int i, final int j) { 109 return covMatrix[indexOf(i, j)]; 110 } 111 112 /** 113 * Sets the covariance element at index (i, j) in the covariance matrix 114 * @param i the row index 115 * @param j the column index 116 * @param cov the {@link StorelessBivariateCovariance} element to be set 117 */ 118 private void setElement(final int i, final int j, 119 final StorelessBivariateCovariance cov) { 120 covMatrix[indexOf(i, j)] = cov; 121 } 122 123 /** 124 * Get the covariance for an individual element of the covariance matrix. 125 * 126 * @param xIndex row index in the covariance matrix 127 * @param yIndex column index in the covariance matrix 128 * @return the covariance of the given element 129 * @throws NumberIsTooSmallException if the number of observations 130 * in the cell is < 2 131 */ 132 public double getCovariance(final int xIndex, 133 final int yIndex) 134 throws NumberIsTooSmallException { 135 136 return getElement(xIndex, yIndex).getResult(); 137 138 } 139 140 /** 141 * Increment the covariance matrix with one row of data. 142 * 143 * @param data array representing one row of data. 144 * @throws DimensionMismatchException if the length of <code>rowData</code> 145 * does not match with the covariance matrix 146 */ 147 public void increment(final double[] data) 148 throws DimensionMismatchException { 149 150 int length = data.length; 151 if (length != dimension) { 152 throw new DimensionMismatchException(length, dimension); 153 } 154 155 // only update the upper triangular part of the covariance matrix 156 // as only these parts are actually stored 157 for (int i = 0; i < length; i++){ 158 for (int j = i; j < length; j++){ 159 getElement(i, j).increment(data[i], data[j]); 160 } 161 } 162 163 } 164 165 /** 166 * {@inheritDoc} 167 * @throws NumberIsTooSmallException if the number of observations 168 * in a cell is < 2 169 */ 170 @Override 171 public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException { 172 return MatrixUtils.createRealMatrix(getData()); 173 } 174 175 /** 176 * Return the covariance matrix as two-dimensional array. 177 * 178 * @return a two-dimensional double array of covariance values 179 * @throws NumberIsTooSmallException if the number of observations 180 * for a cell is < 2 181 */ 182 public double[][] getData() throws NumberIsTooSmallException { 183 final double[][] data = new double[dimension][dimension]; 184 for (int i = 0; i < dimension; i++) { 185 for (int j = 0; j < dimension; j++) { 186 data[i][j] = getElement(i, j).getResult(); 187 } 188 } 189 return data; 190 } 191 192 /** 193 * This {@link Covariance} method is not supported by a {@link StorelessCovariance}, 194 * since the number of bivariate observations does not have to be the same for different 195 * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined. 196 * 197 * @return nothing as this implementation always throws a 198 * {@link MathUnsupportedOperationException} 199 * @throws MathUnsupportedOperationException in all cases 200 */ 201 @Override 202 public int getN() 203 throws MathUnsupportedOperationException { 204 throw new MathUnsupportedOperationException(); 205 } 206 }