Statismo  0.10.1
 All Classes Namespaces Functions Typedefs
PCAModelBuilder.hxx
1 /*
2  * This file is part of the statismo library.
3  *
4  * Author: Marcel Luethi (marcel.luethi@unibas.ch)
5  *
6  * Copyright (c) 2011 University of Basel
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  *
16  * Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  *
20  * Neither the name of the project's author nor the names of its
21  * contributors may be used to endorse or promote products derived from
22  * this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #ifndef __PCAModelBuilder_TXX
39 #define __PCAModelBuilder_TXX
40 
41 #include "PCAModelBuilder.h"
42 
43 #include <iostream>
44 
45 #include <Eigen/SVD>
46 
47 #include "CommonTypes.h"
48 #include "Exceptions.h"
49 
50 namespace statismo {
51 
52 template <typename T>
53 PCAModelBuilder<T>::PCAModelBuilder()
54  : Superclass() {
55 }
56 
57 
58 template <typename T>
59 typename PCAModelBuilder<T>::StatisticalModelType*
60 PCAModelBuilder<T>::BuildNewModel(const DataItemListType& sampleDataList, double noiseVariance, bool computeScores) const {
61 
62  unsigned n = sampleDataList.size();
63  if (n <= 0) {
64  throw StatisticalModelException("Provided empty sample set. Cannot build the sample matrix");
65  }
66 
67  unsigned p = sampleDataList.front()->GetSampleVector().rows();
68  const Representer<T>* representer = sampleDataList.front()->GetRepresenter();
69 
70  // Build the sample matrix X
71  MatrixType X(n, p);
72 
73  unsigned i = 0;
74  for (typename DataItemListType::const_iterator it = sampleDataList.begin();
75  it != sampleDataList.end();
76  ++it) {
77  assert ((*it)->GetSampleVector().rows() == p); // all samples must have same number of rows
78  assert ((*it)->GetRepresenter() == representer); // all samples have the same representer
79  X.row(i++) = (*it)->GetSampleVector();
80  }
81 
82 
83  // build the model
84  StatisticalModelType* model = BuildNewModelInternal(representer, X, noiseVariance);
85  MatrixType scores;
86  if (computeScores) {
87  scores = this->ComputeScores(X, model);
88  }
89 
90 
91  typename BuilderInfo::ParameterInfoList bi;
92  bi.push_back(BuilderInfo::KeyValuePair("NoiseVariance ", Utils::toString(noiseVariance)));
93 
94  typename BuilderInfo::DataInfoList dataInfo;
95  i = 0;
96  for (typename DataItemListType::const_iterator it = sampleDataList.begin();
97  it != sampleDataList.end();
98  ++it, i++) {
99  std::ostringstream os;
100  os << "URI_" << i;
101  dataInfo.push_back(BuilderInfo::KeyValuePair(os.str().c_str(),(*it)->GetDatasetURI()));
102  }
103 
104 
105  // finally add meta data to the model info
106  BuilderInfo builderInfo("PCAModelBuilder", dataInfo, bi);
107 
108  ModelInfo::BuilderInfoList biList;
109  biList.push_back(builderInfo);
110 
111  ModelInfo info(scores, biList);
112  model->SetModelInfo(info);
113 
114  return model;
115 }
116 
117 
118 template <typename T>
120 PCAModelBuilder<T>::BuildNewModelInternal(const Representer<T>* representer, const MatrixType& X, double noiseVariance) const {
121 
122  typedef Eigen::JacobiSVD<MatrixType> SVDType;
123  typedef Eigen::JacobiSVD<MatrixTypeDoublePrecision> SVDDoublePrecisionType;
124 
125  unsigned n = X.rows();
126  unsigned p = X.cols();
127 
128  RowVectorType mu = X.colwise().mean(); // needs to be row vector
129  MatrixType X0 = X.rowwise() - mu;
130 
131  // We destinguish the case where we have more variables than samples and
132  // the case where we have more samples than variable.
133  // In the first case we compute the (smaller) inner product matrix instead of the full covariance matrix.
134  // It is known that this has the same non-zero singular values as the covariance matrix.
135  // Furthermore, it is possible to compute the corresponding eigenvectors of the covariance matrix from the
136  // decomposition.
137 
138  if (n < p) {
139  // we compute the eigenvectors of the covariance matrix by computing an SVD of the
140  // n x n inner product matrix 1/(n-1) X0X0^T
141  MatrixType Cov = X0 * X0.transpose() * 1.0/(n-1);
142  SVDDoublePrecisionType SVD(Cov.cast<double>(), Eigen::ComputeThinV);
143  VectorType singularValues = SVD.singularValues().cast<ScalarType>();
144  MatrixType V = SVD.matrixV().cast<ScalarType>();
145 
146  unsigned numComponentsAboveTolerance = ((singularValues.array() - noiseVariance - Superclass::TOLERANCE) > 0).count();
147 
148  // there can be at most n-1 nonzero singular values in this case. Everything else must be due to numerical inaccuracies
149  unsigned numComponentsToKeep = std::min(numComponentsAboveTolerance, n - 1);
150  // compute the pseudo inverse of the square root of the singular values
151  // which is then needed to recompute the PCA basis
152  VectorType singSqrt = singularValues.array().sqrt();
153  VectorType singSqrtInv = VectorType::Zero(singSqrt.rows());
154  for (unsigned i = 0; i < numComponentsToKeep; i++) {
155  assert(singSqrt(i) > Superclass::TOLERANCE);
156  singSqrtInv(i) = 1.0 / singSqrt(i);
157  }
158 
159  // we recover the eigenvectors U of the full covariance matrix from the eigenvectors V of the inner product matrix.
160  // We use the fact that if we decompose X as X=UDV^T, then we get X^TX = UD^2U^T and XX^T = VD^2V^T (exploiting the orthogonormality
161  // of the matrix U and V from the SVD). The additional factor sqrt(n-1) is to compensate for the 1/sqrt(n-1) in the formula
162  // for the covariance matrix.
163  MatrixType pcaBasis = (X0.transpose() * V * singSqrtInv.asDiagonal() / sqrt(n-1.0)).topLeftCorner(p, numComponentsToKeep);;
164 
165  if (numComponentsToKeep == 0) {
166  throw StatisticalModelException("All the eigenvalues are below the given tolerance. Model cannot be built.");
167  }
168 
169  VectorType sampleVarianceVector = singularValues.topRows(numComponentsToKeep);
170  VectorType pcaVariance = (sampleVarianceVector - VectorType::Ones(numComponentsToKeep) * noiseVariance);
171 
172  StatisticalModelType* model = StatisticalModelType::Create(representer, mu, pcaBasis, pcaVariance, noiseVariance);
173 
174  return model;
175  } else {
176  // we compute an SVD of the full p x p covariance matrix 1/(n-1) X0^TX0 directly
177  SVDType SVD(X0.transpose() * X0 * 1.0/(n-1), Eigen::ComputeThinU);
178  VectorType singularValues = SVD.singularValues();
179  unsigned numComponentsToKeep = ((singularValues.array() - noiseVariance - Superclass::TOLERANCE) > 0).count();
180  MatrixType pcaBasis = SVD.matrixU().topLeftCorner(p, numComponentsToKeep);
181 
182  if (numComponentsToKeep == 0) {
183  throw StatisticalModelException("All the eigenvalues are below the given tolerance. Model cannot be built.");
184  }
185 
186  VectorType sampleVarianceVector = singularValues.topRows(numComponentsToKeep);
187  VectorType pcaVariance = (sampleVarianceVector - VectorType::Ones(numComponentsToKeep) * noiseVariance);
188  StatisticalModelType* model = StatisticalModelType::Create(representer, mu, pcaBasis, pcaVariance, noiseVariance);
189  return model;
190  }
191 }
192 
193 
194 } // namespace statismo
195 
196 #endif
Generic Exception class for the statismo Library.
Definition: Exceptions.h:68
Holds information about the data and the parameters used by a specific modelbuilder.
Definition: ModelInfo.h:125
void SetModelInfo(const ModelInfo &modelInfo)
Definition: StatisticalModel.hxx:486
stores meta information about the model, such as e.g. the name (uri) of the datasets used to build th...
Definition: ModelInfo.h:61
StatisticalModelType * BuildNewModel(const DataItemListType &samples, double noiseVariance, bool computeScores=true) const
Definition: PCAModelBuilder.hxx:60
float ScalarType
the type that is used for all vector and matrices throughout the library.
Definition: CommonTypes.h:60
Creates StatisticalModel using Principal Component Analysis.
Definition: PCAModelBuilder.h:60
A Point/Value pair that is used to specify a value at a given point.
Definition: StatisticalModel.h:100
ITK Wrapper for the statismo::StatisticalModel class.
Definition: itkStatisticalModel.h:62