diff --git a/notebooks/mnist.qmd b/notebooks/mnist.qmd
index 3ca0ba0d1753be4d56de5da65c9567ba92163bb1..7bc3795d2f30392209fcc4caf4ef1bcee557d069 100644
--- a/notebooks/mnist.qmd
+++ b/notebooks/mnist.qmd
@@ -171,7 +171,7 @@ _finaliser = x -> x                         # finaliser function
 sampler = ConditionalSampler(
     𝒟x, 𝒟y, 
     input_size=(input_dim,), 
-    batch_size=1
+    batch_size=1,
 )
 α = [1.0,1.0,1e-2]      # penalty strengths
 ```
diff --git a/paper/paper.pdf b/paper/paper.pdf
index d59b8d6eb51bd67c8b0070adfbd3bf6244bdcd2f..5fbfae48270587a178731d95257dff7b8c5b6c19 100644
Binary files a/paper/paper.pdf and b/paper/paper.pdf differ
diff --git a/paper/paper.tex b/paper/paper.tex
index f4bd6b8ad15a1921a1426800f8b795fbf0afd73c..b8414ce512df0df28a563fb676797ad7f05e82ca 100644
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -45,7 +45,7 @@
 \newtheorem{definition}{Definition}[section]
 
 
-\title{ECCoEs from the Black Box: Letting Models speak for Themselves}
+\title{ECCos from the Black Box: Letting Models speak for Themselves}
 
 
 % The \author macro works with any number of authors. There are two commands
@@ -96,12 +96,12 @@
 
 
 \begin{abstract}
-  We propose Eccoe: an effortless and rigorous way to produce plausible and conformal Counterfactual Explanations for Black Box Models using Conformal Prediction. To address the need for plausible explanations, existing work has primarily relied on surrogate models to learn the data-generating process. This effectively reallocates the task of learning realistic representations of the data from the model itself to the surrogate. Consequently, the generated explanations may look plausible to humans but not necessarily conform with the behaviour of the Black Box Model. We formalise this notion through the introduction of new evaluation measures. In order to still address the need for plausibility, we build on a recent approach that works by minimizing predictive model uncertainty. Using differentiable Conformal Prediction, we relax the previous assumption that the Black Box Model can produce predictive uncertainty estimates.
+  Counterfactual Explanations offer an intuitive and straightforward way to explain Black Box Models but they are not unique. To address the need for plausible explanations, existing work has primarily relied on surrogate models to learn how the input data is distributed. This effectively reallocates the task of learning realistic representations of the data from the model itself to the surrogate. Consequently, the generated explanations may look plausible to humans but not necessarily conform with the behaviour of the Black Box Model. We formalise this notion of model conformity through the introduction of tailored evaluation measures and propose a novel algorithmic framework for generating \textbf{E}nergy-Constrained \textbf{C}onformal \textbf{Co}unterfactuals that are only as plausible as the model permits. To do so, \textbf{ECCo} leverages recent advances in energy-based modelling and predictive uncertainty quantification through conformal inference. Through illustrative examples and extensive empirical studies, we demonstrate that ECCos reconcile the need for plausibility and model conformity. 
 \end{abstract}
 
 \section{Introduction}\label{intro}
 
-Counterfactual Explanations are a powerful, flexible and intuitive way to not only explain Black Box Models but also enable affected individuals to challenge them through the means of Algorithmic Recourse. Instead of opening the black box, Counterfactual Explanations work under the premise of strategically perturbing model inputs to understand model behaviour \citep{wachter2017counterfactual}. Intuitively speaking, we generate explanations in this context by asking simple what-if questions of the following nature: `Our credit risk model currently predicts that this individual's credit profile is too risky to offer them a loan. What if they reduced their monthly expenditures by 10\%? Will our model then predict that the individual is credit-worthy'? 
+Counterfactual Explanations provide a powerful, flexible and intuitive way to not only explain Black Box Models but also enable affected individuals to challenge them through the means of Algorithmic Recourse. Instead of opening the black box, Counterfactual Explanations work under the premise of strategically perturbing model inputs to understand model behaviour \citep{wachter2017counterfactual}. Intuitively speaking, we generate explanations in this context by asking simple what-if questions of the following nature: `Our credit risk model currently predicts that this individual's credit profile is too risky to offer them a loan. What if they reduced their monthly expenditures by 10\%? Will our model then predict that the individual is credit-worthy'? 
 
 This is typically implemented by defining a target outcome $\mathbf{y}^* \in \mathcal{Y}$ for some individual $\mathbf{x} \in \mathcal{X}=\mathbb{R}^D$ described by $D$ attributes, for which the model $M_{\theta}:\mathcal{X}\mapsto\mathcal{Y}$ initially predicts a different outcome: $M_{\theta}(\mathbf{x})\ne \mathbf{y}^*$. Counterfactuals are then searched by minimizing a loss function that compares the predicted model output to the target outcome: $\text{yloss}(M_{\theta}(\mathbf{x}),\mathbf{y}^*)$. Since Counterfactual Explanations (CE) work directly with the Black Box Model, valid counterfactuals always have full local fidelity by construction \citep{mothilal2020explaining}. Fidelity is defined as the degree to which explanations approximate the predictions of the Black Box Model. This is arguably one of the most important evaluation metrics for model explanations, since any explanation that explains a prediction not actually made by the model is useless \citep{molnar2020interpretable}. 
 
@@ -116,10 +116,12 @@ When people talk about Black Box Models, this is usually the type of model they
 
 In the context of CE, the idea that no two explanations are the same arises almost naturally. Even the baseline approach proposed by \citet{wachter2017counterfactual} can yield a diverse set of explanations if counterfactuals are initialised randomly. This multiplicity of explanations has not only been acknowledged in the literature but positively embraced: since individuals seeking Algorithmic Recourse (AR) have unique preferences,~\citet{mothilal2020explaining}, for example, have prescribed \textit{diversity} as an explicit goal for counterfactuals. More generally, the literature on CE and AR has brought forward a myriad of desiderata for explanations, which we will discuss in more detail in the following section.
 
-\section{From Adversarial Examples to Plausible Explanations}\label{background}
+\section{Background and Related Work}\label{background}
 
 In this section, we provide some background on Counterfactual Explanations and our motivation for this work. To start off, we briefly introduce the methodology uncerlying most state-of-the-art (SOTA) counterfactual generators.
 
+\subsection{Gradient-Based Counterfactual Search}\label{gradient}
+
 While Counterfactual Explanations can be generated for arbitrary regression models \citep{spooner2021counterfactual}, existing work has primarily focused on classification problems. Let $\mathcal{Y}=(0,1)^K$ denote the one-hot-encoded output domain with $K$ classes. Then most SOTA counterfactual generators rely on gradient descent to optimize different flavours of the following counterfactual search objective:
 
 \begin{equation} \label{eq:general}
@@ -132,10 +134,14 @@ Here $\text{yloss}$ denotes the primary loss function already introduced above a
 
 Solutions to Equation~\ref{eq:general} are considered valid as soon as the predicted label matches the target label. A stripped-down counterfactual explanation is therefore little different from an adversarial example. In Figure~\ref{fig:adv}, for example, we have the baseline approach proposed in \citet{wachter2017counterfactual} to MNIST data (centre panel). This approach solves Equation~\ref{eq:general} through gradient-descent in the feature space with a penalty for the distance between the factual $\mathbf{x}$ and the counterfactual $\mathbf{x}^{\prime}$. The underlying classifier $M_{\theta}$ is a simple Multi-Layer Perceptron (MLP) with good test accuracy. For the generated counterfactual $\mathbf{x}^{\prime}$ the model predicts the target label with high confidence (centre panel in Figure~\ref{fig:adv}). The explanation is valid by definition, even though it looks a lot like an Adversarial Example \citep{goodfellow2014explaining}. \citet{schut2021generating} make the connection between Adversarial Examples and Counterfactual Explanations explicit and propose using a Jacobian-Based Saliency Map Attack (JSMA) to solve Equation~\ref{eq:general}. They demonstrate that this approach yields realistic and sparse counterfactuals for Bayesian, adversarially robust classifiers. Applying their approach to our simple MNIST classifier does not yield a realistic counterfactual but this one, too, is valid (right panel in Figure~\ref{fig:adv}). 
 
+\subsection{From Adversial Examples to Plausible Explanations}
+
 The crucial difference between Adversarial Examples (AE) and Counterfactual Explanations is one of intent. While an AE is intended to go unnoticed, a CE should have certain desirable properties. The literature has made this explicit by introducing various so-called \textit{desiderata}. To properly serve both AI practitioners and individuals affected by AI decision-making systems, counterfactuals should be sparse, proximate~\citep{wachter2017counterfactual}, actionable~\citep{ustun2019actionable}, diverse~\citep{mothilal2020explaining}, plausible~\citep{joshi2019realistic,poyiadzi2020face,schut2021generating}, robust~\citep{upadhyay2021robust,pawelczyk2022probabilistically,altmeyer2023endogenous} and causal~\citep{karimi2021algorithmic} among other things. 
 
 Researchers have come up with various ways to meet these desiderata, which have been extensively surveyed and evaluated in various studies~\citep{verma2020counterfactual,karimi2020survey,pawelczyk2021carla,artelt2021evaluating,guidotti2022counterfactual}. Perhaps unsurprisingly, the different desiderata are often positively correlated. For example, \citet{artelt2021evaluating} find that plausibility typically also leads to improved robustness. Similarly, plausibility has also been connected to causality in the sense that plausible counterfactuals respect causal relationships \citep{mahajan2020preserving}. 
 
+\subsubsection{Plausibility through Surrogates}
+
 Arguably, the plausibility of counterfactuals has been among the primary concerns and some have focused explicitly on this goal. \citet{joshi2019realistic}, for example, were among the first to suggest that instead of searching counterfactuals in the feature space $\mathcal{X}$, we can instead traverse a latent embedding $\mathcal{Z}$ that implicitly codifies the data generating process (DGP) of $\mathbf{x}\sim\mathcal{X}$. To learn the latent embedding, they introduce a surrogate model. In particular, they propose to use the latent embedding of a Variational Autoencoder (VAE) trained to generate samples $\mathbf{x}^* \leftarrow \mathcal{G}(\mathbf{z})$ where $\mathcal{G}$ denotes the decoder part of the VAE. Provided the surrogate model is well-trained, their proposed approach ---REVISE--- can yield compelling counterfactual explanations like the one in the centre panel of Figure~\ref{fig:vae}. 
 
 Others have proposed similar approaches. \citet{dombrowski2021diffeomorphic} traverse the base space of a normalizing flow to solve Equation~\ref{eq:general}, essentially relying on a different surrogate model for the generative task. \citet{poyiadzi2020face} use density estimators ($\hat{p}: \mathcal{X} \mapsto [0,1]$) to constrain the counterfactual paths. \citet{karimi2021algorithmic} argue that counterfactuals should comply with the causal model that generates the data. All of these different approaches share a common goal: ensuring that the generated counterfactuals comply with the true and unobserved DGP. To summarize this broad objective, we propose the following definition:
@@ -163,24 +169,22 @@ Surrogate models offer an obvious solution to achieve this objective. Unfortunat
   \end{minipage}
 \end{figure}
 
-\section{Evaluating the Faithfulness of Counterfactuals}\label{conformity}
+\subsubsection{Plausibility through Minimal Predictive Uncertainty}
 
-In Section~\ref{background} we explained that Counterfactual Explanations work directly with Black Box Model, so fidelity is not a concern. This may explain why research has primarily focused on other desiderata, most notably plausibility (Definition~\ref{def:plausible}). Enquiring about the plausibility of a counterfactual essentially boils down to the following question: `Is this counterfactual consistent with the underlying data'? To introduce this section, we posit a related, slightly more nuanced question: `Is this counterfactual consistent with what the model has learned about the underlying data'? We will argue that fidelity is not a sufficient evaluation measure to answer this question and propose a novel way to assess if explanations conform with model behaviour. Finally, we will introduce a framework for Conformal Counterfactual Explanations, that reconciles the notions of plausibility and model conformity. 
+\citet{schut2021generating} show that to meet the plausibility objective we need not explicitly model the input distribution. Pointing to the undesirable engineering overhead induced by surrogate models, they propose that we rely on the implicit minimisation of predictive uncertainty instead. Their proposed methodology solves Equation~\ref{eq:general} by greedily applying JSMA in the feature space with standard cross-entropy loss and no penalty at all. They demonstrate theoretically and empirically that their approach yields counterfactuals for which the model $M_{\theta}$ predicts the target label $\mathbf{y}^*$ with high confidence. Provided the model is well-specified, these counterfactuals are plausible.  
 
-\subsection{From Fidelity to Model Conformity}
+Unfortunately, this idea hinges on the assumption that the Black Box Model provides well-calibrated predictive uncertainty estimates. The authors argue that in light of rapid advances in Bayesian Deep Learning (DL), this assumption is overall less costly than the engineering overhead induced by using surrogate models. This is even more true today, as recent work has put Laplace Approximation back on the map for truly effortless Bayesian DL \citep{immer2020improving,daxberger2021laplace,antoran2023sampling}. Nonetheless, the need for Bayesian methods may be too restrictive in some cases. Our proposed methodology, which we will turn to next, relaxes this restriction.
 
-The word \textit{fidelity} stems from the Latin word `fidelis', which means `faithful, loyal, trustworthy' \citep{mw2023fidelity}. As we explained in Section~\ref{background}, model explanations are considered faithful if their corresponding predictions coincide with the predictions made by the model itself. Since this definition of faithfulness is not useful in the context of Counterfactual Explanations, we propose an adapted version: 
+\section{Energy-Constrained Conformal Counterfactuals}\label{ecco}
 
-\begin{definition}[Conformal Counterfactuals]
-  \label{def:conformal}
-  Let $\mathcal{X}_{\theta}|\mathbf{y}^* = p_{\theta}(x|\mathbf{y}^*)$ denote the conditional distribution of $\mathbf{x}$ in the target class $\mathbf{y}^*$, where $\theta$ denotes the parameters of model $M_{\theta}$. Then for $\mathbf{x}^{\prime}$ to be considered a conformal counterfactual, we need: $\mathbf{x}^{\prime} \sim \mathcal{X}_{\theta}|\mathbf{y}^*$.
-\end{definition}
+The primary objective of this work has been to develop a methodology for generating maximally plausible counterfactuals under minimal intervention. Our proposed framework is based on the premise that explanations should be plausible but not plausible at all costs. Energy-Constrained Conformal Counterfactuals (ECCo) achieve this goal in two ways: firstly, they rely on the Black Box itself for the generative task; and, secondly, they involve an approach to predictive uncertainty quantification that is model-agnostic.
+
+\subsection{Quantifying the Model's Generative Property}
 
-In words, conformal counterfactuals conform with what the predictive model has learned about the input data $\mathbf{x}$. Since this definition works with distributional properties, it explicitly accounts for the multiplicity of explanations we discussed earlier. Except for the posterior conditional distribution $p_{\theta}(\mathbf{x}|\mathbf{y}^*)$, we already have access to all the ingredients in Definition~\ref{def:conformal}.
 
 How can we quantify $p_{\theta}(\mathbf{x}|\mathbf{y}^*)$? After all, the predictive model $M_{\theta}$ was trained to discriminate outputs conditional on inputs, which is a different conditional distribution: $p_{\theta}(\mathbf{y}|\mathbf{x})$. Learning the distribution over inputs $p_{\theta}(\mathbf{x}|\mathbf{y}^*)$ is a generative task that $M_{\theta}$ was not explicitly trained for. In the context of Counterfactual Explanations, it is the task that existing approaches have reallocated from the model itself to a surrogate. 
 
-Fortunately, recent work by \citet{grathwohl2020your} on Energy Based Models (EBM) has pointed out that there is a `generative model hidden within every standard discriminative model'. The authors show that we can draw samples from the posterior conditional distribution $p_{\theta}(\mathbf{x}|\mathbf{y})$ using Stochastic Gradient Langevin Dynamics (SGLD). In doing so, it is possible to train classifiers jointly for the discriminative task using standard cross-entropy and the generative task using SGLD. They demonstrate empirically that among other things this improves predictive uncertainty quantification for discriminative models. 
+Recent work by \citet{grathwohl2020your} on Energy Based Models (EBM) has pointed out that there is a `generative model hidden within every standard discriminative model'. The authors show that we can draw samples from the posterior conditional distribution $p_{\theta}(\mathbf{x}|\mathbf{y})$ using Stochastic Gradient Langevin Dynamics (SGLD). In doing so, it is possible to train classifiers jointly for the discriminative task using standard cross-entropy and the generative task using SGLD. They demonstrate empirically that among other things this improves predictive uncertainty quantification for discriminative models. 
 
 To see how their proposed conditional sampling strategy can be applied in our context, note that if we fix $\mathbf{y}$ to our target value $\mathbf{y}^*$, we can sample from $p_{\theta}(\mathbf{x}|\mathbf{y}^*)$ using SGLD as follows, 
 
@@ -194,47 +198,7 @@ where $\mathbf{r}_j \sim \mathcal{N}(\mathbf{0},\mathbf{I})$ is the stochastic t
 
 While $\mathbf{x}_J$ is only guaranteed to distribute as $p_{\theta}(\mathbf{x}|\mathbf{y}^*)$ if $\epsilon \rightarrow 0$ and $J \rightarrow \infty$, the bias introduced for a small finite $\epsilon$ is negligible in practice \citep{murphy2023probabilistic,grathwohl2020your}. While \citet{grathwohl2020your} use Equation~\ref{eq:sgld} during training, we are interested in applying the conditional sampling procedure in a post hoc fashion to any standard discriminative model. Generating multiple samples in this manner yields an empirical distribution $\hat{\mathcal{X}}_{\theta}|\mathbf{y}^*$, which we can use to assess if a given counterfactual $\mathbf{x}^{\prime}$ conforms with the model $M_{\theta}$ (Definition~\ref{def:conformal}). 
 
-\textbf{TBD}
-
-\begin{itemize}
-  \item What exact sampler do we use? ImproperSGLD as in \citet{grathwohl2020your} seems to work best.
-\end{itemize}
-
-\subsection{Evaluation Measures}\label{evaluation}
-
-Above we have defined plausibility (\ref{def:plausible}) and conformity (\ref{def:conformal}) for Counterfactual Explanations. In this subsection, we introduce evaluation measures that facilitate a quantitative evaluation of counterfactuals for these objectives. 
-
-Firstly, in order to assess the plausibility of counterfactuals we adapt the implausibility metric proposed in \citet{guidotti2022counterfactual}. The authors propose to evaluate plausibility in terms of the distance of the counterfactual $\mathbf{x}^{\prime}$ from its nearest neighbour in the target class $\mathbf{y}^*$: the smaller this distance, the more plausible the counterfactual. Instead of focusing only on the nearest neighbour of $\mathbf{x}^{\prime}$, we suggest computing the average over distances from multiple (possibly all) observed instances in the target class. Formally, for a single counterfactual, we have:
-
-\begin{equation}\label{eq:impl}
-  \begin{aligned}
-    \text{impl} = \frac{1}{\lvert\mathbf{x} \in \mathcal{X}|\mathbf{y}^*\rvert} \sum_{\mathbf{x} \in \mathcal{X}|\mathbf{y}^*} \text{dist}(\mathbf{x}^{\prime},\mathbf{x})
-  \end{aligned}
-\end{equation}
-
-This measure is straightforward to compute and should be less sensitive to outliers in the target class than the one based on the nearest neighbour. It also gives rise to a very similar evaluation measure for conformity. We merely swap out the subsample of individuals in the target class for the empirical distribution of generated conditional samples:
-
-\begin{equation}\label{eq:conf}
-  \begin{aligned}
-    \text{conf} = \frac{1}{\lvert\mathbf{x} \in \mathcal{X}_{\theta}|\mathbf{y}^*\rvert} \sum_{\mathbf{x} \in \mathcal{X}_{\theta}|\mathbf{y}^*} \text{dist}(\mathbf{x}^{\prime},\mathbf{x})
-  \end{aligned}
-\end{equation}
-
-As noted by \citet{guidotti2022counterfactual}, these distance-based measures are simplistic and more complex alternative measures may ultimately be more appropriate for the task. For example, we considered using statistical divergence measures instead. This would involve generating not one but many counterfactuals and comparing the generated empirical distribution to the target distributions in Definitions~\ref{def:plausible} and~\ref{def:conformal}. While this approach is potentially more rigorous, generating enough counterfactuals is not always practical. 
-
-\section{A Framework for Conformal Counterfactual Explanations}\label{cce}
-
-Now that we have a framework for evaluating Counterfactual Explanations in terms of their plausibility and conformity, we are interested in finding a way to generate counterfactuals that are as plausible and conformal as possible. We hypothesize that a narrow focus on plausibility may come at the cost of reduced conformity. Using a surrogate model for the generative task, for example, may improve plausibility but inadvertently yield counterfactuals that are more consistent with the surrogate than the Black Box Model itself. We suggest that one way to ensure model conformity is to rely strictly on the model itself. In this section, we introduce a novel framework that meets this requirement, works under minimal assumptions and does not impede the plausibility objective: Conformal Counterfactual Explanations.
-
-\subsection{Plausible Counterfactuals through Minimal Uncertainty}
-
-Our proposed methodology is built on the findings presented in~\citet{schut2021generating}. The authors demonstrate that it is not only possible but remarkably easy to generate plausible counterfactuals for Black Box Models that provide predictive uncertainty estimates. Their proposed algorithm solves Equation~\ref{eq:general} by greedily applying JSMA in the feature space with standard cross-entropy loss and no penalty at all. They show that this is equivalent to minimizing predictive uncertainty and hence yields counterfactuals for which the model $M_{\theta}$ predicts the target label $\mathbf{y}^*$ with high confidence. Provided the model is well-calibrated, these counterfactuals are plausible which the authors demonstrate empirically through benchmarks \citep{schut2021generating}.
-
-Unfortunately, this idea hinges on the crucial assumption that the Black Box Model provides predictive uncertainty estimates. The authors argue that in light of rapid advances in Bayesian Deep Learning (DL), this assumption is overall less costly than the engineering overhead induced by using surrogate models. This is even more true today, as recent work has put Laplace Approximation back on the map for truly effortless Bayesian DL \citep{immer2020improving,daxberger2021laplace,antoran2023sampling}. Nonetheless, the need for Bayesian methods may be too restrictive in some cases. 
-
-In looking for ways to lift that restriction, we found a promising alternative candidate for predictive uncertainty quantification (UQ) that we will briefly introduce next: Conformal Prediction. 
-
-\subsection{Conformal Prediction}
+\subsection{Quantifying the Model's Predictive Uncertainty}
 
 Conformal Prediction (CP) is a scalable and statistically rigorous approach to predictive UQ that works under minimal distributional assumptions \citep{angelopoulos2021gentle}. It has recently gained popularity in the Machine Learning community \citep{angelopoulos2021gentle,manokhin2022awesome}. Crucially for our intended application, CP is model-agnostic and can be applied at test time. This allows us to relax the assumption that the Black Box Model needs to learn to generate predictive uncertainty estimates during training. In other words, CP promises to provide a way to generate plausible counterfactuals for any standard discriminative model without the need for surrogate models. 
 
@@ -278,6 +242,49 @@ Since we can still retrieve unperturbed softmax outputs from our conformal class
 
 In order to generate prediction sets $C_{\theta}(f(\mathbf{Z}^\prime);\alpha)$ for any Black Box Model we merely need to perform a single calibration pass through a holdout set $\mathcal{D}_{\text{cal}}$. Arguably, data is typically abundant and in most applications practitioners tend to hold out a test data set anyway. Our proposed approach for ECCCo therefore removes the restriction on the family of predictive models, at the small cost of reserving a subset of the available data for calibration. 
 
+\section{Evaluation Framework}\label{conformity}
+
+In Section~\ref{background} we explained that Counterfactual Explanations work directly with Black Box Model, so fidelity is not a concern. This may explain why research has primarily focused on other desiderata, most notably plausibility (Definition~\ref{def:plausible}). Enquiring about the plausibility of a counterfactual essentially boils down to the following question: `Is this counterfactual consistent with the underlying data'? To introduce this section, we posit a related, slightly more nuanced question: `Is this counterfactual consistent with what the model has learned about the underlying data'? We will argue that fidelity is not a sufficient evaluation measure to answer this question and propose a novel way to assess if explanations conform with model behaviour. Finally, we will introduce a framework for Conformal Counterfactual Explanations, that reconciles the notions of plausibility and model conformity. 
+
+\subsection{From Fidelity to Model Conformity}
+
+The word \textit{fidelity} stems from the Latin word `fidelis', which means `faithful, loyal, trustworthy' \citep{mw2023fidelity}. As we explained in Section~\ref{background}, model explanations are considered faithful if their corresponding predictions coincide with the predictions made by the model itself. Since this definition of faithfulness is not useful in the context of Counterfactual Explanations, we propose an adapted version: 
+
+\begin{definition}[Conformal Counterfactuals]
+  \label{def:conformal}
+  Let $\mathcal{X}_{\theta}|\mathbf{y}^* = p_{\theta}(x|\mathbf{y}^*)$ denote the conditional distribution of $\mathbf{x}$ in the target class $\mathbf{y}^*$, where $\theta$ denotes the parameters of model $M_{\theta}$. Then for $\mathbf{x}^{\prime}$ to be considered a conformal counterfactual, we need: $\mathbf{x}^{\prime} \sim \mathcal{X}_{\theta}|\mathbf{y}^*$.
+\end{definition}
+
+In words, conformal counterfactuals conform with what the predictive model has learned about the input data $\mathbf{x}$. Since this definition works with distributional properties, it explicitly accounts for the multiplicity of explanations we discussed earlier. Except for the posterior conditional distribution $p_{\theta}(\mathbf{x}|\mathbf{y}^*)$, we already have access to all the ingredients in Definition~\ref{def:conformal}.
+
+\textbf{TBD}
+
+\begin{itemize}
+  \item What exact sampler do we use? ImproperSGLD as in \citet{grathwohl2020your} seems to work best.
+\end{itemize}
+
+\subsection{Evaluation Measures}\label{evaluation}
+
+Above we have defined plausibility (\ref{def:plausible}) and conformity (\ref{def:conformal}) for Counterfactual Explanations. In this subsection, we introduce evaluation measures that facilitate a quantitative evaluation of counterfactuals for these objectives. 
+
+Firstly, in order to assess the plausibility of counterfactuals we adapt the implausibility metric proposed in \citet{guidotti2022counterfactual}. The authors propose to evaluate plausibility in terms of the distance of the counterfactual $\mathbf{x}^{\prime}$ from its nearest neighbour in the target class $\mathbf{y}^*$: the smaller this distance, the more plausible the counterfactual. Instead of focusing only on the nearest neighbour of $\mathbf{x}^{\prime}$, we suggest computing the average over distances from multiple (possibly all) observed instances in the target class. Formally, for a single counterfactual, we have:
+
+\begin{equation}\label{eq:impl}
+  \begin{aligned}
+    \text{impl} = \frac{1}{\lvert\mathbf{x} \in \mathcal{X}|\mathbf{y}^*\rvert} \sum_{\mathbf{x} \in \mathcal{X}|\mathbf{y}^*} \text{dist}(\mathbf{x}^{\prime},\mathbf{x})
+  \end{aligned}
+\end{equation}
+
+This measure is straightforward to compute and should be less sensitive to outliers in the target class than the one based on the nearest neighbour. It also gives rise to a very similar evaluation measure for conformity. We merely swap out the subsample of individuals in the target class for the empirical distribution of generated conditional samples:
+
+\begin{equation}\label{eq:conf}
+  \begin{aligned}
+    \text{conf} = \frac{1}{\lvert\mathbf{x} \in \mathcal{X}_{\theta}|\mathbf{y}^*\rvert} \sum_{\mathbf{x} \in \mathcal{X}_{\theta}|\mathbf{y}^*} \text{dist}(\mathbf{x}^{\prime},\mathbf{x})
+  \end{aligned}
+\end{equation}
+
+As noted by \citet{guidotti2022counterfactual}, these distance-based measures are simplistic and more complex alternative measures may ultimately be more appropriate for the task. For example, we considered using statistical divergence measures instead. This would involve generating not one but many counterfactuals and comparing the generated empirical distribution to the target distributions in Definitions~\ref{def:plausible} and~\ref{def:conformal}. While this approach is potentially more rigorous, generating enough counterfactuals is not always practical. 
+
 \section{Experiments}
 
 \begin{itemize}