MUQ  0.4.3
InvariantSampling.cpp
Go to the documentation of this file.
1 /***
2 ## Background
3 
4 A canonical inverse problem is to infer the spatially varying parameter field $K(x)$ in the following elliptic PDE
5 $$
6 -\nabla\cdot\left[ K \nabla h\right] = f,
7 $$
8 where $h(x)$ is the solution of the PDE and $f(x)$ is a source term. We consider this PDE in a one dimensional (i.e., $x\in\mathbb{R}$) setting with boundary conditions given by
9 $$
10 \begin{aligned}
11 h(0) &= 0\\
12 \left. \frac{\partial h}{\partial x}\right|_{x=1} &= 0.
13 \end{aligned}
14 $$
15 
16 This equation can be used to describe many different diffusive processes, such as heat conduction, groundwater flow, and contaminant diffusion. This example interprets the PDE in the context of groundwater flow, meaning that $h(x)$ represents the hydraulic head in a confined aquifer, $f(x)$ represents recharge of water entering the aquifer, and $K(x)$ is the hydraulic conductivity, which describes how difficult it is for water to pass through the aquifer.
17 
18 The hydraulic head $h(x)$ can be measured at individual points $x_1,x_2,\ldots,x_M$ by drilling boreholes into an aquifer and measuring the pressure in the borehole. The conductivity $K(x)$ however, cannot be observed directly. We will therefore consider estimating the hydraulic conductivity $K(x)$ from noisy measurements of $h(x_1), h(x_2),\ldots, h(x_M)$.
19 
20 
21 
22 **The goal of this example is to demonstrate the use of MUQ for solving Bayesian inverse with PDE models.** The implementation of the model itself will not be discussed in detail. The implementation of the PDE solver, adjoint gradients, and Hessian information is described in detail in the [FlowEquation](../../../../Modeling/FlowEquation/python/DarcyFlow.ipynb) modeling example.
23 
24 ### Includes
25 This example will use many different components of MUQ as well as the `FlowEquation` class defined in [FlowEquation.h](FlowEquation.h).
26 */
27 
28 #include "FlowEquation.h"
29 
32 
33 #include "MUQ/Modeling/ModPiece.h"
34 #include "MUQ/Modeling/WorkGraph.h"
39 
45 
47 
49 
50 #include <boost/property_tree/ptree.hpp>
51 
52 using namespace muq::Modeling;
53 using namespace muq::SamplingAlgorithms;
54 using namespace muq::Approximation;
55 using namespace muq::Utilities;
56 using namespace muq::Optimization;
57 
58 /***
59 ## Inverse Problem Formulation
60 
61 For notational simplicity, let $\theta = \log K$. In this example, we will define and subsequently sample a posterior density
62 $$
63 \pi(\theta | y=y_{obs}) \propto \pi(y=y_{obs} | \theta) \pi(\theta),
64 $$
65 where $y_{obs}$ is a vector of observed hydraulic heads at the points $x_1,\ldots, x_M$. The observable variable $y$ is defined in terms of the PDE solution $h(x)$ evaluated at the observation locations:
66 $$
67 y = \left[ \begin{array}{c} h(x_1) + \epsilon_1\\ h(x_2)+ \epsilon_2\\ \vdots\\ h(x_M)+ \epsilon_M \end{array}\right],
68 $$
69 where each $\epsilon_i\sim N(0,\sigma_\epsilon^2)$ is an independent Gaussian random variable with variance $\sigma_\epsilon^2$. This Gaussian assumption is common in practice, but may not be applicable in real-world situations where more complicated modeling errors and observation noise are present. We will ignore these issues in this example.
70 
71 For the prior, we will assume that $K(x)$ is a lognormal process and use MUQ's Gaussian process utilities to define $\pi(\theta)$.
72 
73 #### Discretization
74 We assume that $x\in\Omega=[0,1]$ and consider a uniform division of the domain $\Omega$ into $N$ sized cells. There are $N+1$ nodes in the discretization. To solve the PDE, we will use a linear finite element discretization. The hydraulic head $h(x)$ is therefore piecewise linear and defined in terms of $N+1$ coefficients. The hydraulic conductivity $K(x)$ is piecewise constant and can be represented with $N$ degrees of freedom.
75 
76 The `Discretization` class defined below holds information about the spatial discretization.
77 */
78 struct Discretization
79 {
80  Discretization(unsigned int numCellsIn)
81  {
82  numCells = numCellsIn;
83  numNodes = numCells+1;
84 
85  nodeLocs = Eigen::VectorXd::LinSpaced(numCells+1,0,1);
86 
87  cellLocs.resize(1,numCells);
88  cellLocs.row(0) = 0.5*(nodeLocs.tail(numCells) + nodeLocs.head(numCells));
89  };
90 
91  unsigned int numCells;
92  unsigned int numNodes;
93 
94  // Node locations
95  Eigen::VectorXd nodeLocs;
96 
97  // Cell locations in a row vector
98  Eigen::MatrixXd cellLocs;
99 };
100 
101 
102 /***
103 #### Prior Distribution
104 By definition, the conductivity field $K(x)$ must be positive. We will therefore define a prior distribution $\log K(x)$ to ensure that $K(x)>0$. In particular, we will model the log conductivity field as a zero mean Gaussian process with covariance kernel $k(x,x^\prime)$:
105 
106 $$
107 \log K(x) \sim GP(0,k(x,x^\prime))
108 $$
109 
110 Here, we employ a Matern kernel with parameter $\nu=3/2$. This kernel takes the form
111 
112 $$
113 k(x,x^\prime) = \sigma^2 \left(1+\frac{\sqrt{3}\|x-x^\prime\|}{L}\right)\exp\left[-\frac{\sqrt{3}\|x-x^\prime\|}{L}\right],
114 $$
115 
116 where $\sigma^2$ is the marginal variance of the prior distribution and $L$ is the correlation lengthscale of the kernel. Note that this kernel results in random fields that have continuous first derivatives, but discontinuous second derivatives. More generally, Matern kernels with $\nu=p+1/2$ result in fields with $p$ continuous derivatives.
117 
118 The MUQ `GaussianProcess` class is constructed from a mean function and a covariance kernel. The `GaussianProcess::Discretize` function can then to used to discretize the continuous Gaussian Process. `Discretize` takes a vector of points and constructs a finite dimensional Gaussian distribution by evaluating the mean function at each point and the covariance kernel at each pair of points.
119 
120 Note that this is not always the best way to discretize a Gaussian Process. For example, if the field $K(x)$ is represented with a finite number of basis functions $\phi_i(x)$, then it would be preferrable to project the Gaussian process onto the span of the basis functions. This approach is skipped here for simplicity, but can be important, especially when $\phi_i(x)$ are high order or spectral basis functions.
121 
122 <img src=Logk.svg width=600></img>
123 
124 Our discretization of the PDE (see [DarcyFlow.ipynb](../../../../Modeling/FlowEquation/python/DarcyFlow.ipynb)) assumes that $K(x)$ is piecewise constant over grid cell (see Figure above). Formally setting $\log K_i$ to the average of $\log K(x)$ over the cell $[x_i,x_{i+1})$ would result in a prior covariance matrix of the form
125 
126 $$
127 \text{Cov}\left[\log K_i, \log K_j\right] = \frac{1}{(x_{i+1}-x_{i})(x_{j+1}-x_{j})}\int_{x_i}^{x_{i+1}} \int_{x_j}^{x_{j+1}} k(x_1,x_2) \,\,dx_2dx_1
128 $$
129 
130 where $x_i$ is the location of the node on the left edge of cell $i$. Here we approximate this covariance by simply evaluating the covariance kernel at the centroids of the cells:
131 
132 $$
133 \text{Cov}\left[\log K_i, \log K_j\right] \approx k\left(\frac{1}{2}(x_i + x_{i+1}),\,\, \frac{1}{2}(x_j + x_{j+1}) \right).
134 $$
135 
136 Note that this is equivalent to approximating the integrals with a midpoint rule and a single interval.
137 
138 **The vector $\log K = \left[\log K_1, \ldots, \log K_N\right]$ will be used to denote the collection of log conductivities on each cell.**
139 */
140 std::shared_ptr<Gaussian> CreatePrior(Discretization const& mesh)
141 {
142  // Define the prior distribution
143  double priorVar = 1.0;
144  double priorLength = 0.2;
145  double priorNu = 3.0/2.0;
146 
147  auto covKernel = std::make_shared<MaternKernel>(1, priorVar, priorLength, priorNu); // The first argument "1" specifies we are working in 1d
148 
149  auto meanFunc = std::make_shared<ZeroMean>(1,1); // dimension of x, components in k(x) if it was vector-valued
150 
151  auto priorGP = std::make_shared<GaussianProcess>(meanFunc,covKernel);
152 
153  return priorGP->Discretize(mesh.cellLocs);
154 }
155 
156 /***
157 #### True data
158 After discretizing the PDE, the hydraulic head $h(x)$ is represented as a piecewise linear function characterized by values at the $N+1$ nodes in the discretization. We will assume that every $P=\lceil (N+1)/M\rceil$ of these nodes is observed, resuling in a total of $M$ hydraulic noisy head observations. Let $y_{obs}$ denote these observations.
159 
160 For this example, we will synthetically generate data $y_{obs}$ using a "true" log conductivity field $\log K(x) = \cos(10x)$ and a mesh with $2N$ cells. Noise with variance $\sigma^2$ is added to the model output to simulate $y_{obs}$.
161 */
162 
163 Eigen::VectorXd GetTrueLogConductivity(Discretization const& mesh)
164 {
165  return (10.0*mesh.cellLocs.row(0)).array().cos();
166 }
167 
168 /***
169 The `SliceOperator` class in MUQ is used to downscale the model output. Using numpy notation, the output of `SliceOperator` for an input vector $x$ is `x[startInd:endInd:skip]`. The arguments to the `SliceOperator` constructor are `x.shape[0]`, `startInd`, `endInd`, and `skip`.
170 */
171 
172 Eigen::VectorXd GenerateData(Discretization const& mesh, unsigned int obsThin, double obsVar)
173 {
174  // Generate the data
175  unsigned numRefine = 2;
176  Discretization fineMesh(numRefine*mesh.numCells);
177  Eigen::VectorXd trueCond = GetTrueLogConductivity(fineMesh).array().exp();
178 
179  // Create the model with twice the mesh resolution
180  Eigen::VectorXd recharge = Eigen::VectorXd::Ones(fineMesh.numCells);
181  auto mod = std::make_shared<FlowEquation>(recharge);
182 
183  // Solve the forward problem with the true conductivity
184  Eigen::VectorXd trueSol = mod->Evaluate( trueCond ).at(0);
185 
186  // Take every N node as an "observation"
187  auto slicer = std::make_shared<SliceOperator>(fineMesh.numNodes,0,fineMesh.numCells,numRefine*obsThin);
188 
189  return slicer->Evaluate(trueSol).at(0) + std::sqrt(obsVar)*RandomGenerator::GetNormal(slicer->outputSizes(0));
190 }
191 
192 /***
193 #### Likelihood Function
194 
195 To define a likelihood function $\pi(y|\theta)$, we need to construct a statistical model relating the PDE output $[h(x_0), h(x_P),\ldots, h(x_M)]$ to the observable quantity $y$. Here, we assume that $y$ is related to the hydraulic heads through an additive Gaussian error model
196 
197 $$
198 y = \left[ \begin{array}{l} h(x_{0}; \theta)\\ h(x_{P}; \theta)\\ h(x_{2P}; \theta)\\ \vdots\\ h(x_M; \theta) \end{array} \right] + \epsilon,
199 $$
200 
201 where $\epsilon\sim N(0,\sigma_{\epsilon}^2I)$ is an $M$-dimensional normal random variable with variance $\sigma_{\epsilon}^2$. With this noise model, the distribution of the observable quantity $y$ given log conductivities $\theta$ is then just normal distribution centered at the model output
202 
203 $$
204 \pi(y | \theta) = N\left([h(x_0), h(x_P),\ldots, h(x_M)], \,\, \sigma_{\epsilon}^2I\right)
205 $$
206 
207 The `ConstructPosterior` function uses a MUQ `WorkGraph` to compose modeling components that together, compute the likelihood function, prior density, and posterior density. In particular, operations for $\theta\rightarrow K$, $K\rightarrow h$, $h\rightarrow [h(x_0),\ldots, h(x_M)]$, and finally $[h(x_0), \ldots, h(x_M)]\rightarrow \pi(y | \theta)$.
208 
209 The `DensityProduct` ModPiece is used for combining the prior and likelihood. It takes 2 or more inputs representing log densities and returns the log of the density product (i.e., the sum of the log densities).
210 
211 If [graphviz](https://graphviz.org/) is installed on your computer, the `graph.Visualize()` function will produce a file called "LikelihoodGraph.png" with a visualization of the components making up the likelihood function.
212 */
213 
214 std::shared_ptr<ModPiece> ConstructPosterior(Discretization const& mesh,
215  std::shared_ptr<Gaussian> const& priorDist,
216  Eigen::VectorXd const& data,
217  unsigned int obsThin,
218  double obsVar)
219 {
220  // Define the forward model
221  Eigen::VectorXd recharge = Eigen::VectorXd::Ones(mesh.numCells);
222  auto forwardMod = std::make_shared<FlowEquation>(recharge);
223 
224  auto graph = std::make_shared<WorkGraph>();
225 
226  // /////////////////////////////////////////////
227  // LIKELIHOOD FUNCTION
228  // Create a exponential operator for \theta -> K
229  graph->AddNode(std::make_shared<IdentityOperator>(mesh.numCells), "Log Conductivity");
230  graph->AddNode(std::make_shared<ExpOperator>(mesh.numCells), "Conductivity");
231  graph->AddEdge("Log Conductivity", 0, "Conductivity", 0);
232 
233  // Add the forward model, which evaluates K->h
234  graph->AddNode(forwardMod, "Forward Model");
235  graph->AddEdge("Conductivity", 0, "Forward Model", 0);
236 
237  // Thin the model output to coincide with the observation locations
238  graph->AddNode(std::make_shared<SliceOperator>(mesh.numNodes,0,mesh.numCells,obsThin), "Observables");
239  graph->AddEdge("Forward Model", 0, "Observables", 0);
240 
241  // Create the Gaussian likelihood that evaluates [h(x_0),\ldots, h(x_M)]\rightarrow \pi(y | \theta)
242  auto likelihood = std::make_shared<Gaussian>(data, obsVar*Eigen::VectorXd::Ones(data.size()));
243  graph->AddNode(likelihood->AsDensity(), "Likelihood");
244  graph->AddEdge("Observables", 0, "Likelihood", 0);
245 
246  // /////////////////////////////////////////////
247  // PRIOR DENSITY
248  graph->AddNode(priorDist->AsDensity(), "Prior");
249  graph->AddEdge("Log Conductivity", 0, "Prior", 0);
250 
251  // /////////////////////////////////////////////
252  // POSTERIOR DENSITY
253  graph->AddNode(std::make_shared<DensityProduct>(2), "Posterior");
254  graph->AddEdge("Prior",0,"Posterior",0);
255  graph->AddEdge("Likelihood",0,"Posterior",1);
256 
257  // Create a file LikelihoodGraph.png that visualizes the sequence of operations
258  graph->Visualize("WorkGraph.pdf");
259 
260  return graph->CreateModPiece("Posterior");
261 }
262 
263 /***
264 ## Compute MAP Point
265 Now that we've define the posterior, we can start trying to estimate the parameters $\theta = \log K$ in the model. The following cell computes the maximum aposterior (MAP) point using MUQ's Newton-Steihaug trust region optimizer. The optimizer assumes it is given a minimization problem, while we want to maximize the log posterior density. The `ModPieceCostFunction` class allows us to specify a negative scaling of the log posterior to flip the maximization problem into a minimization problem that the optimizer can handle.
266 
267 In addition to the trust region solver used here, MUQ also can also leverage any method implemented in [NLOPT](https://nlopt.readthedocs.io/en/latest/). Try changing the `'NewtonTrust'` string to `'LBFGS'` to use the NLOPT limited memory BFGS implementation.
268 */
269 Eigen::VectorXd ComputeMAP(std::shared_ptr<ModPiece> const& logPosterior,
270  Eigen::VectorXd const& startPt)
271 {
272  std::cout << "\n======================================" << std::endl;
273  std::cout << "Computing MAP Point" << std::endl;
274  std::cout << "--------------------------------------" << std::endl;
275 
276  boost::property_tree::ptree opts;
277  opts.put("Algorithm","NewtonTrust");
278  opts.put("Ftol.AbsoluteTolerance", 1e-3);
279  opts.put("PrintLevel", 1);
280 
281  // Create an objective function we can minimize -- the negative log posterior
282  auto objective = std::make_shared<ModPieceCostFunction>(logPosterior, -1.0);
283  auto solver = Optimizer::Construct(objective, opts);
284 
285  Eigen::VectorXd xopt;
286  double fopt;
287  std::tie(xopt,fopt) = solver->Solve({startPt});
288 
289  std::cout << "\nMAP Point: " << std::endl;
290  std::cout << xopt.transpose() << std::endl;
291  return xopt;
292 }
293 
294 /***
295 ## Sample Posterior with DILI
296 
297 To sample the posterior distribution, we will employ MUQ's implementation of the dimension independent likelihood informed (DILI) method introduced in [Cui et al. 2015](https://arxiv.org/abs/1411.3688). The idea is to decompose the parameter space into two subspaces: one "likelihood informed" subspace where the posterior distribution is significantly different than the prior, and a complementary subspace where the posterior distribution is approximately the same as the prior distribution. Different MCMC kernels can be then be applied to each subspace. If the MCMC sampler used in the complementary space is discretization invariant (e.g., the precondition Crank-Nicolson method), then the entire DILI sampling scheme is discretization invariant.
298 
299 The subspaces are constructed by comparing the relative "strengths" of the likelihood and prior through the generalized eigenvalue problem
300 
301 $$
302 Hv = \lambda \Gamma^{-1}v,
303 $$
304 
305 where $\Gamma^{-1}$ is the prior precision matrix, and $H$ is the Hessian of the negative log likelihood. In this example, we will use the exact Hessian at the MAP point. The `HessianType` option is therefore set to `Exact` in the `opt` dictionary below. Gauss Newton approximations of the Hessian are also commonly employed in practice (see Eq. (19) of [Cui et al. 2015](https://arxiv.org/pdf/1411.3688.pdf)). This can be accomplished in MUQ by changing the `HessianType` option to `GaussNewton`. Note that the `Exact` Hessian is not always positive definite across the entire parameter space. In this example, we are only using the Hessian at the MAP point, so this will not be an issue. When the `Adapt Interval` option is nonzero however, the likelihood informed subspace will be formed from an average Hessian and, in that case, it is generally advisable to use the `GaussNewton` Hessian.
306 
307 
308 #### Choosing Starting Points
309 Metrics from MUQ's `MCMC::Diagnostics` class, like the multivariate potential scale reduction factor (MPSRF), can be used to evaluate whether our DILI sampler has converged. To compute these metrics however, we need to run multiple chains from a diffuse set of initial points. Generating these points in high dimensions can be challenging. In this example, we use a Gaussian approximation of the posterior (i.e., the Laplace Approximation) constructed using the Hessian of the log-posterior at the MAP point. Let $\theta^\ast$ denote the MAP point and $H$ the Hessian of the $2\pi(\theta | y=y_{obs})$. The posterior can then be approximated as
310 
311 $$
312 \pi(\theta | y=y_{obs}) \approx \tilde{\pi}(\theta|y=y_{obs}) = N(\theta^\ast, H^{-1})
313 $$
314 
315 The DILI implementation in MUQ uses a low rank approximation of $H^{-1}$ to define the likelihood-informed and complementary subspaces. We can use the same low rank structure to generate samples of the approximate posterior. Let $\theta_{pr}\sim \pi(\theta)$ be a prior random variable and let $z_{lis}$ be a standard normal random variable in $\mathbb{R}^N_{LIS}$. From these random variables we can construct a random variable $\theta_{app}\sim \tilde{\pi}(\theta|y=y_{obs})$ through
316 
317 $$
318 \theta_{app} = \theta^\ast + Vz_{lis} + P(\theta_{pr}-\mu_{pr}),
319 $$
320 
321 where the columns of $V$ span the likelihood-informed subspace while also accounting for posterior correlations and the matrix $P$ is an oblique projector onto the complementary space. Internally, MUQ constructs the matrix $V$ by projecting $H^{-1}$ onto the likelihood-informed subspace and then using a matrix square root to decorrelate the projected random variable.
322 
323 Fortunately, in this example we do not need consider the details of constructing $V$ and $P$. Both of these are constructed in MUQ's `DILIKernel` class and can be applied using the `ToCS`, `ToLIS`, and `FromLIS` functions. The `ToCS` function takes a vector in the full $N$ dimensional parameter space and projects it onto the prior-dominated complimentary space; it computes $P\theta_{pr}$ It returns another $N$ diemnsional vector. The `ToLIS` function has a similar purpose: it takes and $N$ dimensional vector and projects it onto the likelihood-informed subspace. However, `ToLIS` returns a vector with $N_{LIS}<N$ components. The `FromLIS` function can then be used to map a point on the $N_{LIS}$-dimensional likelihood-informed subspace to the full $N$-dimensional space.
324 
325 To generate diffuse initial starting points for the chains, we can "inflate" the variance of $z_{lis}$ and $\theta_{pr}$. Below, the `SampleInflatedLaplace` function accomlishes this by multiplying $z_{lis}$ and $\theta_{pr}-\mu_{pr}$ by an inflation factor of $1.2$.
326 */
327 Eigen::VectorXd SampleInflatedLaplace(std::shared_ptr<DILIKernel> const& diliKernel,
328  std::shared_ptr<Gaussian> const& priorDist,
329  Eigen::VectorXd const& mapPt)
330 {
331 
332  double inflation = 1.2;
333 
334  diliKernel->CreateLIS(std::vector<Eigen::VectorXd>{mapPt});
335 
336  Eigen::VectorXd csSamp = diliKernel->ToCS( mapPt + inflation*(priorDist->Sample() - priorDist->GetMean()) );
337  Eigen::VectorXd lisSamp = diliKernel->FromLIS( diliKernel->ToLIS(mapPt) + inflation*RandomGenerator::GetNormal(diliKernel->LISDim()) );
338 
339  return csSamp + lisSamp;
340 }
341 
342 /***
343 #### Running the Sampler
344 We are now ready to define and run the DILI sampler. The `SampleDili` function defines options for the MCMC samplers in both the likelihood-informed subspace and complementary space. It then constructs the DILI sampler, samples the inflated Gaussian posterior approxiamtion to obtain an initial point, and runs the chain.
345 
346 Note that the pCN (`CrankNicolsonProposal`) used in the complementary space has a stepsize parameter of $\beta=1$. This causes the pCN algorithm to use independent draws of the prior distribution as MCMC proposals. Because the complementary space is approximately the same as prior distribution, this still results in large acceptance rates.
347 */
348 std::shared_ptr<SampleCollection> SampleDILI(std::shared_ptr<ModPiece> const& posterior,
349  Eigen::VectorXd const& mapPt,
350  std::shared_ptr<Gaussian> const& priorDist,
351  unsigned int numSamps)
352 {
353  boost::property_tree::ptree pt;
354  pt.put("NumSamples",numSamps);
355  //pt.put("BurnIn", 0);
356  pt.put("PrintLevel",3);
357  pt.put("HessianType","Exact");
358  pt.put("Adapt Interval", 0);
359  pt.put("Initial Weight", 100);
360  pt.put("Prior Node", "Prior");
361  pt.put("Likelihood Node", "Likelihood");
362 
363  pt.put("LIS Block", "LIS");
364  pt.put("LIS.Method", "MHKernel");
365  pt.put("LIS.Proposal","MyProposal");
366  pt.put("LIS.MyProposal.Method","MALAProposal");
367  pt.put("LIS.MyProposal.StepSize", 0.4);
368 
369  pt.put("CS Block", "CS");
370  pt.put("CS.Method", "MHKernel");
371  pt.put("CS.Proposal","MyProposal");
372  pt.put("CS.MyProposal.Method", "CrankNicolsonProposal");
373  pt.put("CS.MyProposal.Beta",1.0);
374  pt.put("CS.MyProposal.PriorNode","Prior");
375 
376  // create a sampling problem
377  auto problem = std::make_shared<SamplingProblem>(posterior);
378 
379  auto diliKernel = std::make_shared<DILIKernel>(pt, problem);
380  std::vector<std::shared_ptr<TransitionKernel>> kernels(1);
381  kernels.at(0) = diliKernel;
382 
383  auto sampler = std::make_shared<SingleChainMCMC>(pt, kernels);
384 
385  Eigen::VectorXd startPt = SampleInflatedLaplace(diliKernel, priorDist, mapPt);
386 
387  return sampler->Run(startPt);
388 }
389 
390 /***
391 ## Sample Posterior with pCN
392 For comparison with the DILI results, we will use a standard preconditioned Crank-Nicolson proposal. This method is discretization invariant, but does not leverage the same structure as DILI and has much poorer performance. Other geometry-aware enhancements of the pCN proposal are also available in MUQ (e.g., [$\infty$-MALA](https://mituq.bitbucket.io/source/_site/latest/classmuq_1_1SamplingAlgorithms_1_1InfMALAProposal.html)), but are not shown in this example.
393 
394 */
395 std::shared_ptr<SampleCollection> SamplePCN(std::shared_ptr<ModPiece> const& posterior,
396  Eigen::VectorXd const& startPt,
397  unsigned int numSamps)
398 {
399  boost::property_tree::ptree pt;
400  pt.put("NumSamples", numSamps); // number of Monte Carlo samples
401  pt.put("BurnIn",0);
402  pt.put("PrintLevel",3);
403  pt.put("KernelList", "Kernel1"); // the transition kernel
404  pt.put("Kernel1.Method","MHKernel");
405  pt.put("Kernel1.Proposal", "MyProposal"); // the proposal
406  pt.put("Kernel1.MyProposal.Method", "CrankNicolsonProposal");
407  pt.put("Kernel1.MyProposal.Beta", 0.05);
408  pt.put("Kernel1.MyProposal.PriorNode", "Prior"); // The node in the WorkGraph containing the prior density
409 
410  // create a sampling problem
411  auto problem = std::make_shared<SamplingProblem>(posterior);
412 
413  auto sampler = std::make_shared<SingleChainMCMC>(pt,problem);
414 
415  return sampler->Run(startPt); // Use a true posterior sample to avoid burnin
416 }
417 
418 
419 /***
420 ## Put it all together
421 */
422 int main(){
423 
424 
425  // Define the mesh
426  unsigned int numCells = 50;
427  Discretization mesh(numCells);
428 
429  // Generate synthetic "truth" data
430  unsigned int obsThin = 4;
431  double obsVar = 0.01*0.01;
432  auto data = GenerateData(mesh, obsThin, obsVar);
433 
434  // Create the priro distribution
435  std::shared_ptr<Gaussian> prior = CreatePrior(mesh);
436 
437  // Construct the posterior
438  std::shared_ptr<ModPiece> posterior = ConstructPosterior(mesh, prior, data, obsThin, obsVar);
439 
440  // Comptue the MAP point starting from the prior mean
441  Eigen::VectorXd mapPt = ComputeMAP(posterior, prior->GetMean());
442 
443  // Run DILI multiple times
444  unsigned int numSamps = 30000;
445  unsigned int numChains = 4;
446  std::vector<std::shared_ptr<SampleCollection>> chains(numChains);
447 
448  for(int i=0; i<numChains; ++i){
449  std::cout << "\n===============================" << std::endl;
450  std::cout << "Running DILI Chain " << i+1 << " / " << numChains << std::endl;
451  std::cout << "-------------------------------" << std::endl;
452 
453  chains.at(i) = SampleDILI(posterior, mapPt, prior, numSamps);
454  }
455 
456  /***
457  #### Assess Convergence
458  We now use the multiple DILI chains to assess convergence with the MPSRF diagnostic described in [[Brooks and Gelman, 1998](https://www.tandfonline.com/doi/abs/10.1080/10618600.1998.10474787)] and the Multivariate effective sample size of [[Vats et al., 2019](https://doi.org/10.1093/biomet/asz002)]. Note that when computing the MPSRF, we are also passing the `'Split':True` option, which follows the suggestions of [[Vehtari et al., 2021](https://arxiv.org/ct?url=https%3A%2F%2Fdx.doi.org%2F10.1214%2F20-BA1221&v=8607c76e)] and splits each chain in half.
459  */
460  double diliESS = 0;
461  for(auto& chain : chains)
462  diliESS += chain->ESS("MultiBatch")(0);
463 
464  boost::property_tree::ptree opts;
465  opts.put("Split", true);
466  opts.put("Transform", false);
467  opts.put("Multivariate",true);
468 
469  Eigen::VectorXd diliMPSRF = Diagnostics::Rhat(chains, opts);
470 
471  std::cout << "\n\nDILI Diagnostics:\n Multivariate ESS: " << diliESS << "\n MPSRF: " << diliMPSRF(0) << std::endl;
472 
473 
474  if(diliMPSRF(0)>1.01){
475  std::cout << "\nDILI HAS NOT CONVERGED!" << std::endl;
476  }else{
477  std::cout << "\nDILI CONVERGED!" << std::endl;
478  }
479 
480 
481  /***
482  #### Run pCN for comparison
483  */
484  std::vector<std::shared_ptr<SampleCollection>> pcnChains(numChains);
485 
486  for(int i=0; i<numChains; ++i){
487  std::cout << "\n===============================" << std::endl;
488  std::cout << "Running pCN Chain " << i+1 << " / " << numChains << std::endl;
489  std::cout << "-------------------------------" << std::endl;
490 
491  pcnChains.at(i) = SamplePCN(posterior, chains.at(i)->at(0)->ToVector(), numSamps);
492  }
493 
494 
495  double pcnESS = 0;
496  for(auto& chain : pcnChains)
497  pcnESS += chain->ESS("MultiBatch")(0);
498 
499  Eigen::VectorXd pcnMPSRF = Diagnostics::Rhat(pcnChains, opts);
500 
501  std::cout << "\n\npCN Diagnostics:\n Multivariate ESS: " << pcnESS << "\n MPSRF: " << pcnMPSRF(0) << std::endl;
502 
503 
504  if(pcnMPSRF(0)>1.01){
505  std::cout << "\npCN HAS NOT CONVERGED!" << std::endl;
506  }else{
507  std::cout << "\npCN CONVERGED!" << std::endl;
508  }
509  return 0;
510 }
std::shared_ptr< SampleCollection > SamplePCN(std::shared_ptr< ModPiece > const &posterior, Eigen::VectorXd const &startPt, unsigned int numSamps)
Eigen::VectorXd GenerateData(Discretization const &mesh, unsigned int obsThin, double obsVar)
std::shared_ptr< ModPiece > ConstructPosterior(Discretization const &mesh, std::shared_ptr< Gaussian > const &priorDist, Eigen::VectorXd const &data, unsigned int obsThin, double obsVar)
std::shared_ptr< SampleCollection > SampleDILI(std::shared_ptr< ModPiece > const &posterior, Eigen::VectorXd const &mapPt, std::shared_ptr< Gaussian > const &priorDist, unsigned int numSamps)
Eigen::VectorXd SampleInflatedLaplace(std::shared_ptr< DILIKernel > const &diliKernel, std::shared_ptr< Gaussian > const &priorDist, Eigen::VectorXd const &mapPt)
Eigen::VectorXd GetTrueLogConductivity(Discretization const &mesh)
std::shared_ptr< Gaussian > CreatePrior(Discretization const &mesh)
int main()
Eigen::VectorXd ComputeMAP(std::shared_ptr< ModPiece > const &logPosterior, Eigen::VectorXd const &startPt)