38 #ifndef __ConditionalModelBuilder_hxx
39 #define __ConditionalModelBuilder_hxx
41 #include "ConditionalModelBuilder.h"
47 #include "Exceptions.h"
48 #include "PCAModelBuilder.h"
60 ConditionalModelBuilder<T>::PrepareData(
const DataItemListType& sampleDataList,
61 const SurrogateTypeInfoType& surrogateTypesInfo,
62 const CondVariableValueVectorType& conditioningInfo,
63 DataItemListType *acceptedSamples,
64 MatrixType *surrogateMatrix,
65 VectorType *conditions)
const {
67 unsigned nbAcceptedSamples = 0;
68 unsigned nbContinuousSurrogatesInUse = 0, nbCategoricalSurrogatesInUse = 0;
69 std::vector<unsigned> indicesContinuousSurrogatesInUse;
70 std::vector<unsigned> indicesCategoricalSurrogatesInUse;
73 for (
unsigned i=0 ; i<conditioningInfo.size() ; i++) {
74 if (conditioningInfo[i].first) {
75 if (surrogateTypesInfo.types[i] == DataItemWithSurrogatesType::Continuous) {
76 nbContinuousSurrogatesInUse++;
77 indicesContinuousSurrogatesInUse.push_back(i);
79 nbCategoricalSurrogatesInUse++;
80 indicesCategoricalSurrogatesInUse.push_back(i);
84 conditions->resize(nbContinuousSurrogatesInUse);
85 for (
unsigned i=0 ; i<nbContinuousSurrogatesInUse ; i++) (*conditions)(i) = conditioningInfo[i].second;
86 surrogateMatrix->resize(nbContinuousSurrogatesInUse, sampleDataList.size());
89 for (
typename DataItemListType::const_iterator it = sampleDataList.begin(); it != sampleDataList.end(); ++it) {
90 const DataItemWithSurrogatesType* sampleData =
dynamic_cast<const DataItemWithSurrogatesType*
>(*it);
91 if (sampleData == 0) {
94 std::cout<<
"WARNING: ConditionalModelBuilder, sample data "<< (*it)->GetDatasetURI()<<
" has no surrogate data associated, and is ignored"<<std::endl;
98 VectorType surrogateData = sampleData->GetSurrogateVector();
100 for (
unsigned i=0 ; i<nbCategoricalSurrogatesInUse ; i++) {
101 if ( conditioningInfo[indicesCategoricalSurrogatesInUse[i]].second !=
102 surrogateData[indicesCategoricalSurrogatesInUse[i]] ) {
104 acceptSample =
false;
110 acceptedSamples->push_back(*it);
112 for (
unsigned j=0 ; j<nbContinuousSurrogatesInUse ; j++) {
113 (*surrogateMatrix)(j,nbAcceptedSamples) = surrogateData[indicesContinuousSurrogatesInUse[j]];
119 surrogateMatrix->conservativeResize(Eigen::NoChange_t(), nbAcceptedSamples);
121 return nbAcceptedSamples;
124 template <
typename T>
125 typename ConditionalModelBuilder<T>::StatisticalModelType*
127 const SurrogateTypeInfoType& surrogateTypesInfo,
128 const CondVariableValueVectorType& conditioningInfo,
130 double modelVarianceRetained)
const {
131 DataItemListType acceptedSamples;
134 unsigned nSamples = PrepareData(sampleDataList, surrogateTypesInfo, conditioningInfo, &acceptedSamples, &X, &x0);
135 assert(nSamples == acceptedSamples.size());
137 unsigned nCondVariables = X.rows();
141 PCAModelBuilderType* modelBuilder = PCAModelBuilderType::Create();
146 if ( X.cols() == 0 || X.rows() == 0) {
151 assert(B.rows() == nSamples);
152 assert(B.cols() == nPCAComponents);
157 MatrixType A(nSamples, nPCAComponents+nCondVariables);
158 A << B,X.transpose();
161 VectorType mu = A.colwise().mean().transpose();
162 assert(mu.rows() == nPCAComponents + nCondVariables);
164 MatrixType A0 = A.rowwise() - mu.transpose();
165 MatrixType cov = 1.0 / (nSamples-1) * A0.transpose() * A0;
167 assert(cov.rows() == cov.cols());
172 MatrixType Sbx = cov.topRightCorner(nPCAComponents, nCondVariables);
173 MatrixType Sxx = cov.bottomRightCorner(nCondVariables, nCondVariables);
174 MatrixType Sbb = cov.topLeftCorner(nPCAComponents, nPCAComponents);
177 VectorType condMean = mu.topRows(nPCAComponents) + Sbx * Sxx.inverse() * (x0 - mu.bottomRows(nCondVariables));
180 MatrixType condCov = Sbb - Sbx * Sxx.inverse() * Sbx.transpose();
191 VectorTypeDoublePrecision pcaSdev = pcaVariance.cast<
double>().array().sqrt();
193 typedef Eigen::JacobiSVD<MatrixTypeDoublePrecision> SVDType;
194 MatrixTypeDoublePrecision innerMatrix = pcaSdev.asDiagonal() * condCov.cast<
double>() * pcaSdev.asDiagonal();
195 SVDType svd(innerMatrix, Eigen::ComputeThinU);
196 VectorType singularValues = svd.singularValues().cast<
ScalarType>();
199 double totalRemainingVariance = singularValues.sum();
201 double cumulatedVariance = singularValues(0);
202 unsigned numComponentsToReachPrescribedVariance = 1;
203 while ( cumulatedVariance/totalRemainingVariance < modelVarianceRetained ) {
204 numComponentsToReachPrescribedVariance++;
205 if (numComponentsToReachPrescribedVariance==singularValues.size())
break;
206 cumulatedVariance += singularValues(numComponentsToReachPrescribedVariance-1);
209 unsigned numComponentsToKeep = std::min<unsigned>( numComponentsToReachPrescribedVariance, singularValues.size() );
211 VectorType newPCAVariance = singularValues.topRows(numComponentsToKeep);
217 MatrixType scores(0,0);
218 BuilderInfo::ParameterInfoList bi;
220 bi.push_back(BuilderInfo::KeyValuePair(
"NoiseVariance ", Utils::toString(noiseVariance)));
223 MatrixType conditioningInfoMatrix(conditioningInfo.size(), 2);
224 for (
unsigned i=0 ; i<conditioningInfo.size() ; i++) {
225 conditioningInfoMatrix(i,0) = conditioningInfo[i].first;
226 conditioningInfoMatrix(i,1) = conditioningInfo[i].second;
228 bi.push_back(BuilderInfo::KeyValuePair(
"ConditioningInfo ", Utils::toString(conditioningInfoMatrix)));
230 typename BuilderInfo::DataInfoList di;
233 for (
typename DataItemListType::const_iterator it = sampleDataList.begin();
234 it != sampleDataList.end();
236 const DataItemWithSurrogatesType* sampleData =
dynamic_cast<const DataItemWithSurrogatesType*
>(*it);
237 std::ostringstream os;
239 di.push_back(BuilderInfo::KeyValuePair(os.str().c_str(),sampleData->GetDatasetURI()));
242 di.push_back(BuilderInfo::KeyValuePair(os.str().c_str(),sampleData->GetSurrogateFilename()));
245 std::ostringstream os;
246 os <<
"surrogates_types";
247 di.push_back(BuilderInfo::KeyValuePair(os.str().c_str(),surrogateTypesInfo.typeFilename));
250 BuilderInfo builderInfo(
"ConditionalModelBuilder", di, bi);
252 ModelInfo::BuilderInfoList biList;
253 biList.push_back(builderInfo);
StatisticalModelType * BuildNewModel(const DataItemListType &sampleSet, const SurrogateTypeInfoType &surrogateTypesInfo, const CondVariableValueVectorType &conditioningInfo, float noiseVariance, double modelVarianceRetained=1) const
Definition: ConditionalModelBuilder.hxx:126
const ModelInfo & GetModelInfo() const
Definition: StatisticalModel.hxx:493
MatrixType GetOrthonormalPCABasisMatrix() const
Definition: StatisticalModel.hxx:473
const MatrixType & GetScoresMatrix() const
Definition: ModelInfo.cxx:74
const VectorType & GetPCAVarianceVector() const
Definition: StatisticalModel.hxx:460
Holds information about the data and the parameters used by a specific modelbuilder.
Definition: ModelInfo.h:125
void SetModelInfo(const ModelInfo &modelInfo)
Definition: StatisticalModel.hxx:486
stores meta information about the model, such as e.g. the name (uri) of the datasets used to build th...
Definition: ModelInfo.h:61
const RepresenterType * GetRepresenter() const
Definition: StatisticalModel.h:564
unsigned int GetNumberOfPrincipalComponents() const
Definition: StatisticalModel.hxx:501
float ScalarType
the type that is used for all vector and matrices throughout the library.
Definition: CommonTypes.h:60
Creates StatisticalModel using Principal Component Analysis.
Definition: PCAModelBuilder.h:60
DatasetPointerType DrawSample(const VectorType &coefficients, bool addNoise=false) const
Definition: StatisticalModel.hxx:150
A Point/Value pair that is used to specify a value at a given point.
Definition: StatisticalModel.h:100