12 assert(
f->numOutputs==1);
18 for(
auto metain : metains ) {
19 auto it = a.
meta.find(metain);
20 assert(it!=a.
meta.end());
22 if( it->second.type()==
typeid(Eigen::Vector2d) ) {
23 ins.push_back(boost::any_cast<Eigen::Vector2d const&>(it->second));
26 if( it->second.type()==
typeid(Eigen::Vector3d) ) {
27 ins.push_back(boost::any_cast<Eigen::Vector3d const&>(it->second));
30 if( it->second.type()==
typeid(Eigen::Vector4d) ) {
31 ins.push_back(boost::any_cast<Eigen::Vector4d const&>(it->second));
34 if( it->second.type()==
typeid(Eigen::VectorXd) ) {
35 ins.push_back(boost::any_cast<Eigen::VectorXd const&>(it->second));
38 if( it->second.type()==
typeid(
double) ) {
39 ins.push_back(Eigen::VectorXd::Constant(1, boost::any_cast<double const>(it->second)));
42 if( it->second.type()==
typeid(
float) ) {
43 ins.push_back(Eigen::VectorXd::Constant(1, boost::any_cast<float const>(it->second)));
46 if( it->second.type()==
typeid(
int) ) {
47 ins.push_back(Eigen::VectorXd::Constant(1, boost::any_cast<int const>(it->second)));
50 if( it->second.type()==
typeid(
unsigned int) ) {
51 ins.push_back(Eigen::VectorXd::Constant(1, boost::any_cast<unsigned int const>(it->second)));
59 for(
unsigned int i=0; i<a.
state.size(); ++i ) { assert(a.
state[i].size()==
f->inputSizes(i)); }
61 std::vector<Eigen::VectorXd> ins;
64 return f->Evaluate(ins) [0];
71 const int numBlocks = a.
state.size();
75 for(
int i=0; i<numBlocks; ++i){
77 currInd += a.
state.at(i).size();
91 const int numBlocks = a.
state.size();
95 for(
int i=0; i<numBlocks; ++i){
97 currInd += a.
state.at(i).size();
124 assert(startInd<
size());
125 assert(startInd+length<=
size());
127 std::shared_ptr<SampleCollection> output = std::make_shared<SampleCollection>();
128 for(
unsigned int i=startInd; i<startInd+length; i+=skipBy)
142 Eigen::VectorXd stateSum;
146 return (stateSum / weightSum).eval();
153 Eigen::VectorXd stateSum;
157 return (stateSum / weightSum).eval();
162 const int numSamps =
samples.size();
163 Eigen::MatrixXd samps;
164 Eigen::VectorXd weights(numSamps);
168 const int totalSize =
samples.at(0)->TotalDim();
169 const int numBlocks =
samples.at(0)->state.size();
171 samps.resize(totalSize, numSamps);
173 for(
int i=0; i<numSamps; ++i){
174 weights(i) =
samples.at(i)->weight;
177 for(
int block = 0; block<numBlocks; ++block){
178 samps.col(i).segment(currInd,
samples.at(i)->state.at(block).size()) =
samples.at(i)->state.at(block);
179 currInd +=
samples.at(i)->state.at(block).size();
186 const int blockSize =
samples.at(0)->state.at(blockInd).size();
188 samps.resize(blockSize, numSamps);
190 for(
int i=0; i<numSamps; ++i){
191 weights(i) =
samples.at(i)->weight;
192 samps.col(i) =
samples.at(i)->state.at(blockInd);
196 return (samps.colwise() - mean) * weights.asDiagonal() * (samps.colwise()-mean).transpose() / weights.sum();
204 const int numSamps =
size();
205 std::vector<Eigen::MatrixXd> runCov(numSamps);
208 std::shared_ptr<SamplingState> s =
at(0);
210 const int totalSize = s->TotalDim();
211 const int numBlocks = s->state.size();
212 double totWeight = s->weight;
214 Eigen::VectorXd samp(totalSize);
217 for(
int block = 0; block<numBlocks; ++block){
218 samp.segment(currInd, s->state.at(block).size()) = s->state.at(block);
219 currInd += s->state.at(block).size();
223 runCov[0] = totWeight*samp*samp.transpose();
225 for(
int i=1; i<numSamps; ++i){
229 for(
int block = 0; block<numBlocks; ++block){
230 samp.segment(currInd, s->state.at(block).size()) = s->state.at(block);
231 currInd += s->state.at(block).size();
234 totWeight += s->weight;
236 runCov[i] = ((totWeight-s->weight)*runCov[i-1] + s->weight*samp*samp.transpose())/totWeight;
240 std::shared_ptr<SamplingState> s =
at(0);
242 const int blockSize = s->state.at(blockInd).size();
243 double totWeight = s->weight;
245 Eigen::VectorXd samp = s->state.at(blockInd)-mean;
246 runCov[0] = totWeight*samp*samp.transpose();
248 for(
int i=1; i<numSamps; ++i){
250 totWeight += s->weight;
251 samp = s->state.at(blockInd);
252 runCov[i] = ((totWeight-s->weight)*runCov[i-1] + s->weight*samp*samp.transpose())/totWeight;
260 std::vector<std::shared_ptr<SamplingState>>::const_iterator end)
262 int numSamps = std::distance(start,end);
263 const int maxSamps = 20;
266 if(numSamps<maxSamps){
268 double weightSquareSum = (*start)->weight * (*start)->weight;
269 double weightSum = (*start)->weight;
271 for(
auto it=start+1; it!=end; ++it){
272 weightSquareSum += (*it)->weight * (*it)->weight;
273 weightSum += (*it)->weight;
275 return std::make_pair(weightSum, weightSquareSum);
278 int halfDist = std::floor(0.5*numSamps);
279 double weight1, weight2, squaredSum1, squaredSum2;
283 return std::make_pair(weight1+weight2, squaredSum1+squaredSum2);
292 }
else if(method==
"MultiBatch"){
295 std::stringstream msg;
296 msg <<
"Invalid method (" << method <<
") passed to SampleCollection::ESS. Valid options are \"Batch\" and \"MultiBatch\".";
297 throw std::runtime_error(msg.str());
298 return Eigen::VectorXd();
305 double numSamps =
size();
309 batchSize = std::min( std::floor(numSamps/(
BlockSize(blockInd)+1)), std::round(std::pow(numSamps, 1.0/2.0)));
313 overlap = std::floor(0.75*batchSize);
316 unsigned int numBatches = std::floor( (numSamps-batchSize)/(batchSize-overlap) );
319 unsigned int numNonOverlapBatches = std::floor( (numSamps / batchSize) - 1 );
322 if(numNonOverlapBatches <
BlockSize(blockInd)){
324 std::stringstream msg;
325 msg <<
"ERROR in SampleCollection::MultiBatchError. Batch size of " << batchSize;
326 msg <<
" results in " << numNonOverlapBatches <<
" non-overlapping batches, which";
327 msg <<
" is less than the parameter dimension " <<
BlockSize(blockInd);
328 msg <<
" and will result in a singular estimator covariance matrix.";
329 throw std::runtime_error(msg.str());
333 unsigned int numEstimators = std::floor( (numSamps - numNonOverlapBatches*batchSize)/
double(batchSize-overlap) );
336 Eigen::VectorXd mean =
Mean(blockInd);
339 Eigen::MatrixXd estCov = Eigen::MatrixXd::Zero(
BlockSize(blockInd),
BlockSize(blockInd));
342 for(
unsigned int estInd=0; estInd<numEstimators; ++estInd){
343 for(
unsigned int batchInd=0; batchInd<numNonOverlapBatches; ++batchInd){
344 Eigen::VectorXd diff =
segment(estInd*(batchSize-overlap) + batchInd*batchSize, batchSize)->Mean(blockInd) - mean;
345 estCov += diff * diff.transpose();
348 estCov /= (numNonOverlapBatches-1.0)*numEstimators;
349 estCov *= double(batchSize);
351 double covDet =
Covariance(mean).determinant();
353 return std::min(numSamps, numSamps*std::pow(covDet / estCov.determinant(), 1.0/
BlockSize(blockInd)) );
358 double numSamps =
size();
360 Eigen::VectorXd avgEstVar =
BatchError(blockInd, batchSize, overlap).array().square();
362 Eigen::VectorXd ess =
Variance(blockInd).array() / avgEstVar.array();
363 for(
int i=0; i<ess.size(); ++i)
364 ess(i) = std::min(ess(i), numSamps);
370 std::string
const& method)
const
374 }
else if(method==
"MultiBatch"){
377 std::stringstream msg;
378 msg <<
"Invalid method (" << method <<
") passed to SampleCollection::StandardError. Valid options are \"Batch\" and \"MultiBatch\".";
379 throw std::runtime_error(msg.str());
380 return Eigen::VectorXd();
386 double numSamps =
size();
390 batchSize = std::round(std::pow(numSamps, 1.0/2.0));
394 overlap = std::floor(0.75*batchSize);
397 unsigned int numBatches = std::floor( (numSamps-batchSize)/(batchSize-overlap) );
400 unsigned int numNonOverlapBatches = std::floor( (numSamps / batchSize) - 1 );
403 unsigned int numEstimators = std::floor( (numSamps - numNonOverlapBatches*batchSize)/
double(batchSize-overlap) );
405 Eigen::VectorXd mean =
Mean(blockInd);
408 Eigen::VectorXd avgEstVar = Eigen::VectorXd::Zero(
BlockSize(blockInd));
409 for(
unsigned int estInd=0; estInd<numEstimators; ++estInd){
410 for(
unsigned int batchInd=0; batchInd<numNonOverlapBatches; ++batchInd){
411 avgEstVar += (1.0/(numNonOverlapBatches-1.0)) * (
segment(estInd*(batchSize-overlap) + batchInd*batchSize, batchSize)->Mean(blockInd) - mean).array().square().matrix();
414 avgEstVar /= numEstimators;
415 avgEstVar *= (double(batchSize)/double(
size()));
417 return avgEstVar.cwiseSqrt();
423 return (
Variance() / ess).array().sqrt();
429 const unsigned int numSamps = samps.cols();
431 std::shared_ptr<SampleCollection> output = std::make_shared<SampleCollection>();
432 for(
unsigned int col=0; col<numSamps; ++col)
433 output->Add( std::make_shared<SamplingState>(samps.col(col)));
441 return Eigen::MatrixXd();
445 Eigen::MatrixXd output(
samples.at(0)->TotalDim(),
samples.size());
446 for(
int i=0; i<
samples.size(); ++i){
449 for(
int block=0; block<
samples.at(0)->state.size(); ++block){
450 int blockSize =
samples.at(0)->state.at(block).size();
451 output.col(i).segment(startInd, blockSize) =
samples.at(i)->state.at(block);
452 startInd += blockSize;
459 Eigen::MatrixXd output(
samples.at(0)->state.at(blockDim).size(),
samples.size());
460 for(
int i=0; i<
samples.size(); ++i)
461 output.col(i) =
samples.at(0)->state.at(blockDim);
468 Eigen::VectorXd output(
samples.size());
469 for(
int i=0; i<
samples.size(); ++i)
470 output(i) =
samples.at(i)->weight;
475 if( !hdf5file->IsDataSet(dataname) ) {
return true; }
477 Eigen::VectorXi
size = hdf5file->GetDataSetSize(dataname);
478 if(
size(0)!=dataSize ||
size(1)!=totSamps ) {
return true; }
483 void SampleCollection::WriteToFile(std::string
const& filename, std::string
const& dataset)
const {
484 if(
size()==0 ) {
return; }
487 auto hdf5file = std::make_shared<HDF5File>(filename);
490 hdf5file->WriteMatrix(dataset+
"/samples",
AsMatrix());
491 hdf5file->WriteMatrix(dataset+
"/weights", (Eigen::RowVectorXd)
Weights());
494 const std::unordered_map<std::string, Eigen::MatrixXd>& meta =
GetMeta();
497 for(
const auto& data : meta ) { hdf5file->WriteMatrix(dataset+
"/"+data.first, data.second); }
505 std::set<std::string> SampleCollection::ListMeta(
bool requireAll)
const{
507 std::set<std::string> output;
510 for(
auto& metaPair :
at(0)->meta)
511 output.insert(metaPair.first);
514 for(
unsigned int i=1; i<
size(); ++i) {
518 std::vector<std::string> eraseElements;
519 for(
auto& key : output){
520 if( !
at(i)->HasMeta(key) ){
521 eraseElements.push_back(key);
526 for(
auto& key : eraseElements){
531 for(
auto& metaPair :
at(i)->meta)
532 output.insert(metaPair.first);
542 Eigen::MatrixXd meta;
545 for(
unsigned int i=0; i<
size(); ++i ) {
547 auto it =
at(i)->meta.find(name);
548 if( it==
at(i)->meta.end() ) {
continue; }
550 if( it->second.type()==
typeid(Eigen::Vector2d) ) {
552 if( meta.size()==0 ) {
553 meta = Eigen::MatrixXd::Constant(2,
size(), std::numeric_limits<double>::quiet_NaN());
555 meta.col(i) = boost::any_cast<Eigen::Vector2d const&>(it->second);
560 if( it->second.type()==
typeid(Eigen::Vector3d) ) {
562 if( meta.size()==0 ) {
563 meta = Eigen::MatrixXd::Constant(3,
size(), std::numeric_limits<double>::quiet_NaN());
565 meta.col(i) = boost::any_cast<Eigen::Vector3d const&>(it->second);
570 if( it->second.type()==
typeid(Eigen::Vector4d) ) {
572 if( meta.size()==0 ) {
573 meta = Eigen::MatrixXd::Constant(4,
size(), std::numeric_limits<double>::quiet_NaN());
575 meta.col(i) = boost::any_cast<Eigen::Vector4d const&>(it->second);
580 if( it->second.type()==
typeid(Eigen::VectorXd) ) {
582 if( meta.size()==0 ) {
583 meta = Eigen::MatrixXd::Constant(boost::any_cast<Eigen::VectorXd const&>(it->second).size(),
size(), std::numeric_limits<double>::quiet_NaN());
586 meta.col(i) = boost::any_cast<Eigen::VectorXd const&>(it->second);
592 if( meta.size()==0 ) {
593 meta = Eigen::MatrixXd::Constant(1,
size(), std::numeric_limits<double>::quiet_NaN());
596 if( it->second.type()==
typeid(
double) ) {
597 meta(i) = boost::any_cast<double const>(it->second);
601 if( it->second.type()==
typeid(
float) ) {
602 meta(i) = boost::any_cast<float const>(it->second);
606 if( it->second.type()==
typeid(
int) ) {
607 meta(i) = boost::any_cast<int const>(it->second);
611 if( it->second.type()==
typeid(
unsigned int) ) {
612 meta(i) = boost::any_cast<unsigned int const>(it->second);
621 std::unordered_map<std::string, Eigen::MatrixXd> meta;
622 for(
unsigned int i=0; i<
size(); ++i ) {
624 for(
auto it =
at(i)->meta.begin(); it!=
at(i)->meta.end(); ++it ) {
626 auto mat = meta.find(it->first);
627 if( mat==meta.end() ) {
628 meta[it->first] =
GetMeta(it->first);
643 return (val / weight).eval();
646 std::vector<Eigen::VectorXd> SampleCollection::RunningExpectedValue(std::shared_ptr<muq::Modeling::ModPiece>
const& f, std::vector<std::string>
const& metains)
const {
647 const unsigned int numSamps =
size();
648 std::vector<Eigen::VectorXd> runningExpected(numSamps);
650 std::shared_ptr<SamplingState> s =
at(0);
651 double totWeight = s->weight;
653 std::vector<Eigen::VectorXd> ins;
656 runningExpected[0] = totWeight*f->Evaluate(ins) [0];
657 for(
unsigned int i=1; i<numSamps; ++i ) {
660 totWeight += s->weight;
662 runningExpected[i] = ((totWeight-s->weight)*runningExpected[i-1] + s->weight*f->Evaluate(ins) [0])/totWeight;
665 return runningExpected;
672 return samples.at(0)->TotalDim();
674 return samples.at(0)->state.at(blockInd).size();
680 return samples.at(0)->state.size();
685 unsigned int numSegments,
686 boost::property_tree::ptree options)
const
688 std::vector<std::shared_ptr<const SampleCollection>> chains;
689 chains.push_back( shared_from_this());
691 return Diagnostics::Rhat(Diagnostics::SplitChains(chains, numSegments), options);
void ExpectedValueInputs(SamplingState const &a, std::vector< std::string > const &metains, std::vector< Eigen::VectorXd > &ins)
Eigen::VectorXd const & operator()(SamplingState const &a)
ExpectedModPieceValue(std::shared_ptr< muq::Modeling::ModPiece > const &f, std::vector< std::string > const &metains)
const std::vector< std::string > metains
std::shared_ptr< muq::Modeling::ModPiece > f
virtual std::shared_ptr< SamplingState > at(unsigned i)
std::unordered_map< std::string, Eigen::MatrixXd > GetMeta() const
double MultiBatchESS(int blockInd=-1, int batchSize=-1, int overlap=-1) const
virtual Eigen::VectorXd ESS(std::string const &method="Batch") const override
Returns the effective sample size of the samples.
virtual Eigen::VectorXd MultiBatchError(int blockInd=-1, int batchSize=-1, int overlap=-1) const
virtual std::vector< Eigen::MatrixXd > RunningCovariance(int blockInd=-1) const
std::vector< std::shared_ptr< SamplingState > > samples
virtual Eigen::VectorXd CentralMoment(unsigned order, Eigen::VectorXd const &mean, int blockNum=-1) const override
Computes the componentwise central moments (e.g., variance, skewness, kurtosis, etc....
virtual std::shared_ptr< SampleCollection > segment(unsigned int startInd, unsigned int length, unsigned int skipBy=1) const
virtual void Add(std::shared_ptr< SamplingState > newSamp)
virtual Eigen::MatrixXd AsMatrix(int blockDim=-1) const
static std::pair< double, double > RecursiveWeightSum(std::vector< std::shared_ptr< SamplingState >>::const_iterator start, std::vector< std::shared_ptr< SamplingState >>::const_iterator end)
bool CreateDataset(std::shared_ptr< muq::Utilities::HDF5File > hdf5file, std::string const &dataname, int const dataSize, int const totSamps) const
virtual Eigen::VectorXd Rhat(int blockDim, unsigned int numSegments=4, boost::property_tree::ptree options=boost::property_tree::ptree()) const
Eigen::VectorXd BatchESS(int blockInd=-1, int batchSize=-1, int overlap=-1) const
virtual Eigen::VectorXd Mean(int blockInd=-1) const override
virtual Eigen::VectorXd Weights() const
virtual Eigen::VectorXd StandardError(int blockInd, std::string const &method) const override
virtual Eigen::MatrixXd Covariance(int blockInd=-1) const override
static std::shared_ptr< SampleCollection > FromMatrix(Eigen::MatrixXd const &samps)
static std::pair< double, Eigen::VectorXd > RecursiveSum(std::vector< std::shared_ptr< SamplingState >>::const_iterator start, std::vector< std::shared_ptr< SamplingState >>::const_iterator end, FuncType &f)
virtual const std::shared_ptr< SamplingState > back() const
virtual unsigned int size() const
virtual Eigen::VectorXd BatchError(int blockInd=-1, int batchSize=-1, int overlap=-1) const
virtual unsigned int BlockSize(int blockInd) const =0
virtual Eigen::VectorXd ExpectedValue(std::shared_ptr< muq::Modeling::ModPiece > const &f, std::vector< std::string > const &metains=std::vector< std::string >()) const =0
virtual Eigen::VectorXd Variance(int blockInd=-1) const
virtual unsigned int NumBlocks() const =0
Eigen::VectorXd const & operator()(SamplingState const &a)
Eigen::VectorXd const & operator()(SamplingState const &a)
const Eigen::VectorXd & mu
Each state is one sample generated by a sampling algorithm.
int TotalDim() const
The total number of parameters in the state, i.e., the sum of state[i].size()
std::unordered_map< std::string, boost::any > meta
A map containing extra information like the target density, run time, forward model output,...
std::vector< Eigen::VectorXd > state
The state variables.