Statismo  0.10.1
 All Classes Namespaces Functions Typedefs
ConditionalModelBuilder.hxx
1 /*
2  * Representer.hxx
3  *
4  * Created by Remi Blanc, Marcel Luethi
5  *
6  * Copyright (c) 2011 ETH Zurich
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  *
16  * Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  *
20  * Neither the name of the project's author nor the names of its
21  * contributors may be used to endorse or promote products derived from
22  * this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #ifndef __ConditionalModelBuilder_hxx
39 #define __ConditionalModelBuilder_hxx
40 
41 #include "ConditionalModelBuilder.h"
42 
43 #include <iostream>
44 
45 #include <Eigen/SVD>
46 
47 #include "Exceptions.h"
48 #include "PCAModelBuilder.h"
49 
50 namespace statismo {
51 
52 //
53 // ConditionalModelBuilder
54 //
55 //
56 
57 
58 template <typename T>
59 unsigned
60 ConditionalModelBuilder<T>::PrepareData(const DataItemListType& sampleDataList,
61  const SurrogateTypeInfoType& surrogateTypesInfo,
62  const CondVariableValueVectorType& conditioningInfo,
63  DataItemListType *acceptedSamples,
64  MatrixType *surrogateMatrix,
65  VectorType *conditions) const {
66  bool acceptSample;
67  unsigned nbAcceptedSamples = 0;
68  unsigned nbContinuousSurrogatesInUse = 0, nbCategoricalSurrogatesInUse = 0;
69  std::vector<unsigned> indicesContinuousSurrogatesInUse;
70  std::vector<unsigned> indicesCategoricalSurrogatesInUse;
71 
72  //first: identify the continuous and categorical variables, which are used for conditioning and which are not
73  for (unsigned i=0 ; i<conditioningInfo.size() ; i++) {
74  if (conditioningInfo[i].first) { //only variables that are used for conditioning are of interest here
75  if (surrogateTypesInfo.types[i] == DataItemWithSurrogatesType::Continuous) {
76  nbContinuousSurrogatesInUse++;
77  indicesContinuousSurrogatesInUse.push_back(i);
78  } else {
79  nbCategoricalSurrogatesInUse++;
80  indicesCategoricalSurrogatesInUse.push_back(i);
81  }
82  }
83  }
84  conditions->resize(nbContinuousSurrogatesInUse);
85  for (unsigned i=0 ; i<nbContinuousSurrogatesInUse ; i++) (*conditions)(i) = conditioningInfo[i].second;
86  surrogateMatrix->resize(nbContinuousSurrogatesInUse, sampleDataList.size()); //number of variables is now known: nbContinuousSurrogatesInUse ; the number of samples is yet unknown...
87 
88  //now, browse all samples to select the ones which fall into the requested categories
89  for (typename DataItemListType::const_iterator it = sampleDataList.begin(); it != sampleDataList.end(); ++it) {
90  const DataItemWithSurrogatesType* sampleData = dynamic_cast<const DataItemWithSurrogatesType*>(*it);
91  if (sampleData == 0) {
92  // this is a normal sample without surrogate information.
93  // we simply discard it
94  std::cout<<"WARNING: ConditionalModelBuilder, sample data "<< (*it)->GetDatasetURI()<<" has no surrogate data associated, and is ignored"<<std::endl;
95  continue;
96  }
97 
98  VectorType surrogateData = sampleData->GetSurrogateVector();
99  acceptSample = true;
100  for (unsigned i=0 ; i<nbCategoricalSurrogatesInUse ; i++) { //check that this sample respect the requested categories
101  if ( conditioningInfo[indicesCategoricalSurrogatesInUse[i]].second !=
102  surrogateData[indicesCategoricalSurrogatesInUse[i]] ) {
103  //if one of the categories does not fit the requested one, then the sample is discarded
104  acceptSample = false;
105  continue;
106  }
107  }
108 
109  if (acceptSample) { //if the sample is of the right category
110  acceptedSamples->push_back(*it);
111  //and fill in the matrix of continuous variables
112  for (unsigned j=0 ; j<nbContinuousSurrogatesInUse ; j++) {
113  (*surrogateMatrix)(j,nbAcceptedSamples) = surrogateData[indicesContinuousSurrogatesInUse[j]];
114  }
115  nbAcceptedSamples++;
116  }
117  }
118  //resize the matrix of surrogate data to the effective number of accepted samples
119  surrogateMatrix->conservativeResize(Eigen::NoChange_t(), nbAcceptedSamples);
120 
121  return nbAcceptedSamples;
122 }
123 
124 template <typename T>
125 typename ConditionalModelBuilder<T>::StatisticalModelType*
126 ConditionalModelBuilder<T>::BuildNewModel(const DataItemListType& sampleDataList,
127  const SurrogateTypeInfoType& surrogateTypesInfo,
128  const CondVariableValueVectorType& conditioningInfo,
129  float noiseVariance,
130  double modelVarianceRetained) const {
131  DataItemListType acceptedSamples;
132  MatrixType X;
133  VectorType x0;
134  unsigned nSamples = PrepareData(sampleDataList, surrogateTypesInfo, conditioningInfo, &acceptedSamples, &X, &x0);
135  assert(nSamples == acceptedSamples.size());
136 
137  unsigned nCondVariables = X.rows();
138 
139  // build a normal PCA model
140  typedef PCAModelBuilder<T> PCAModelBuilderType;
141  PCAModelBuilderType* modelBuilder = PCAModelBuilderType::Create();
142  StatisticalModelType* pcaModel = modelBuilder->BuildNewModel(acceptedSamples, noiseVariance);
143 
144  unsigned nPCAComponents = pcaModel->GetNumberOfPrincipalComponents();
145 
146  if ( X.cols() == 0 || X.rows() == 0) {
147  return pcaModel;
148  } else {
149  // the scores in the pca model correspond to the parameters of each sample in the model.
150  MatrixType B = pcaModel->GetModelInfo().GetScoresMatrix().transpose();
151  assert(B.rows() == nSamples);
152  assert(B.cols() == nPCAComponents);
153 
154  // A is the joint data matrix B, X, where X contains the conditional information for each sample
155  // Thus the i-th row of A contains the PCA parameters b of the i-th sample,
156  // together with the conditional information for each sample
157  MatrixType A(nSamples, nPCAComponents+nCondVariables);
158  A << B,X.transpose();
159 
160  // Compute the mean and the covariance of the joint data matrix
161  VectorType mu = A.colwise().mean().transpose(); // colwise returns a row vector
162  assert(mu.rows() == nPCAComponents + nCondVariables);
163 
164  MatrixType A0 = A.rowwise() - mu.transpose(); //
165  MatrixType cov = 1.0 / (nSamples-1) * A0.transpose() * A0;
166 
167  assert(cov.rows() == cov.cols());
168  assert(cov.rows() == pcaModel->GetNumberOfPrincipalComponents() + nCondVariables);
169 
170  // extract the submatrices involving the conditionals x
171  // note that since the matrix is symmetric, Sbx = Sxb.transpose(), hence we only store one
172  MatrixType Sbx = cov.topRightCorner(nPCAComponents, nCondVariables);
173  MatrixType Sxx = cov.bottomRightCorner(nCondVariables, nCondVariables);
174  MatrixType Sbb = cov.topLeftCorner(nPCAComponents, nPCAComponents);
175 
176  // compute the conditional mean
177  VectorType condMean = mu.topRows(nPCAComponents) + Sbx * Sxx.inverse() * (x0 - mu.bottomRows(nCondVariables));
178 
179  // compute the conditional covariance
180  MatrixType condCov = Sbb - Sbx * Sxx.inverse() * Sbx.transpose();
181 
182  // get the sample mean corresponding the the conditional given mean of the parameter vectors
183  VectorType condMeanSample = pcaModel->GetRepresenter()->SampleToSampleVector(pcaModel->DrawSample(condMean));
184 
185 
186  // so far all the computation have been done in parameter (latent) space. Go back to sample space.
187  // (see PartiallyFixedModelBuilder for a detailed documentation)
188  // TODO we should factor this out into the base class, as it is the same code as it is used in
189  // the partially fixed model builder
190  const VectorType& pcaVariance = pcaModel->GetPCAVarianceVector();
191  VectorTypeDoublePrecision pcaSdev = pcaVariance.cast<double>().array().sqrt();
192 
193  typedef Eigen::JacobiSVD<MatrixTypeDoublePrecision> SVDType;
194  MatrixTypeDoublePrecision innerMatrix = pcaSdev.asDiagonal() * condCov.cast<double>() * pcaSdev.asDiagonal();
195  SVDType svd(innerMatrix, Eigen::ComputeThinU);
196  VectorType singularValues = svd.singularValues().cast<ScalarType>();
197 
198  // keep only the necessary number of modes, wrt modelVarianceRetained...
199  double totalRemainingVariance = singularValues.sum(); //
200  //and count the number of modes required for the model
201  double cumulatedVariance = singularValues(0);
202  unsigned numComponentsToReachPrescribedVariance = 1;
203  while ( cumulatedVariance/totalRemainingVariance < modelVarianceRetained ) {
204  numComponentsToReachPrescribedVariance++;
205  if (numComponentsToReachPrescribedVariance==singularValues.size()) break;
206  cumulatedVariance += singularValues(numComponentsToReachPrescribedVariance-1);
207  }
208 
209  unsigned numComponentsToKeep = std::min<unsigned>( numComponentsToReachPrescribedVariance, singularValues.size() );
210 
211  VectorType newPCAVariance = singularValues.topRows(numComponentsToKeep);
212  MatrixType newPCABasisMatrix = (pcaModel->GetOrthonormalPCABasisMatrix() * svd.matrixU().cast<ScalarType>()).leftCols(numComponentsToKeep);
213 
214  StatisticalModelType* model = StatisticalModelType::Create(pcaModel->GetRepresenter(), condMeanSample, newPCABasisMatrix, newPCAVariance, noiseVariance);
215 
216  // add builder info and data info to the info list
217  MatrixType scores(0,0);
218  BuilderInfo::ParameterInfoList bi;
219 
220  bi.push_back(BuilderInfo::KeyValuePair("NoiseVariance ", Utils::toString(noiseVariance)));
221 
222  //generate a matrix ; first column = boolean (yes/no, this variable is used) ; second: conditioning value.
223  MatrixType conditioningInfoMatrix(conditioningInfo.size(), 2);
224  for (unsigned i=0 ; i<conditioningInfo.size() ; i++) {
225  conditioningInfoMatrix(i,0) = conditioningInfo[i].first;
226  conditioningInfoMatrix(i,1) = conditioningInfo[i].second;
227  }
228  bi.push_back(BuilderInfo::KeyValuePair("ConditioningInfo ", Utils::toString(conditioningInfoMatrix)));
229 
230  typename BuilderInfo::DataInfoList di;
231 
232  unsigned i = 0;
233  for (typename DataItemListType::const_iterator it = sampleDataList.begin();
234  it != sampleDataList.end();
235  ++it, i++) {
236  const DataItemWithSurrogatesType* sampleData = dynamic_cast<const DataItemWithSurrogatesType*>(*it);
237  std::ostringstream os;
238  os << "URI_" << i;
239  di.push_back(BuilderInfo::KeyValuePair(os.str().c_str(),sampleData->GetDatasetURI()));
240 
241  os << "_surrogates";
242  di.push_back(BuilderInfo::KeyValuePair(os.str().c_str(),sampleData->GetSurrogateFilename()));
243  }
244 
245  std::ostringstream os;
246  os << "surrogates_types";
247  di.push_back(BuilderInfo::KeyValuePair(os.str().c_str(),surrogateTypesInfo.typeFilename));
248 
249 
250  BuilderInfo builderInfo("ConditionalModelBuilder", di, bi);
251 
252  ModelInfo::BuilderInfoList biList;
253  biList.push_back(builderInfo);
254 
255  ModelInfo info(scores, biList);
256  model->SetModelInfo(info);
257 
258  delete pcaModel;
259 
260  return model;
261  }
262 
263 }
264 
265 } // namespace statismo
266 
267 #endif
StatisticalModelType * BuildNewModel(const DataItemListType &sampleSet, const SurrogateTypeInfoType &surrogateTypesInfo, const CondVariableValueVectorType &conditioningInfo, float noiseVariance, double modelVarianceRetained=1) const
Definition: ConditionalModelBuilder.hxx:126
const ModelInfo & GetModelInfo() const
Definition: StatisticalModel.hxx:493
MatrixType GetOrthonormalPCABasisMatrix() const
Definition: StatisticalModel.hxx:473
const MatrixType & GetScoresMatrix() const
Definition: ModelInfo.cxx:74
const VectorType & GetPCAVarianceVector() const
Definition: StatisticalModel.hxx:460
Holds information about the data and the parameters used by a specific modelbuilder.
Definition: ModelInfo.h:125
void SetModelInfo(const ModelInfo &modelInfo)
Definition: StatisticalModel.hxx:486
stores meta information about the model, such as e.g. the name (uri) of the datasets used to build th...
Definition: ModelInfo.h:61
const RepresenterType * GetRepresenter() const
Definition: StatisticalModel.h:564
unsigned int GetNumberOfPrincipalComponents() const
Definition: StatisticalModel.hxx:501
float ScalarType
the type that is used for all vector and matrices throughout the library.
Definition: CommonTypes.h:60
Creates StatisticalModel using Principal Component Analysis.
Definition: PCAModelBuilder.h:60
DatasetPointerType DrawSample(const VectorType &coefficients, bool addNoise=false) const
Definition: StatisticalModel.hxx:150
A Point/Value pair that is used to specify a value at a given point.
Definition: StatisticalModel.h:100