using ChainRules: ignore_derivatives using ConformalPrediction using CounterfactualExplanations.Models using Flux using MLJBase using MLJEnsembles using MLJFlux using MLUtils using Statistics const CompatibleAtomicModel = Union{<:MLJFlux.MLJFluxProbabilistic,MLJEnsembles.ProbabilisticEnsembleModel{<:MLJFlux.MLJFluxProbabilistic}} """ ConformalModel <: Models.AbstractDifferentiableModel Constructor for models trained in `Flux.jl`. """ struct ConformalModel <: Models.AbstractDifferentiableModel model::ConformalPrediction.ConformalProbabilisticSet fitresult::Any likelihood::Union{Nothing,Symbol} function ConformalModel(model, fitresult, likelihood) if likelihood ∈ [:classification_binary, :classification_multi] || isnothing(likelihood) new(model, fitresult, likelihood) else throw( ArgumentError( "`likelihood` should either be `nothing` or in `[:classification_binary,:classification_multi]`", ), ) end end end """ _get_chains(fitresult) Private function that extracts the chains from a fitted model. """ function _get_chains(fitresult) chains = [] ignore_derivatives() do if fitresult isa MLJEnsembles.WrappedEnsemble _chains = map(res -> res[1], fitresult.ensemble) else _chains = [fitresult[1]] end push!(chains, _chains...) end return chains end """ _outdim(fitresult) Private function that extracts the output dimension from a fitted model. """ function _outdim(fitresult) if fitresult isa MLJEnsembles.WrappedEnsemble outdim = length(fitresult.ensemble[1][2]) else outdim = length(fitresult[2]) end return outdim end """ _get_sampler(model::AbstractFittedModel) Private helper function that extracts the sampler from a fitted model. """ function _get_sampler(model::AbstractFittedModel) _mod = model.model if hasfield(typeof(_mod), :model) if _mod.model isa MLJEnsembles.EitherEnsembleModel _mod = _mod.model end if _mod.model isa JointEnergyClassifier sampler = _mod.model.sampler else sampler = false end else sampler = false end return sampler end """ _has_sampler(model::AbstractFittedModel) Private helper function that checks if a fitted model has a sampler. """ function _has_sampler(model::AbstractFittedModel) return !(_get_sampler(model) isa Bool) end """ ConformalModel(model, fitresult=nothing; likelihood::Union{Nothing,Symbol}=nothing) Outer constructor for `ConformalModel`. If `fitresult` is not specified, the model is not fitted and `likelihood` is inferred from the model. If `fitresult` is specified, `likelihood` is inferred from the output dimension of the model. If `likelihood` is not specified, it defaults to `:classification_binary`. """ function ConformalModel(model, fitresult=nothing; likelihood::Union{Nothing,Symbol}=nothing) # Check if model is fitted and infer likelihood: if isnothing(fitresult) @info "Conformal Model is not fitted." end # Default to binary classification, if not specified or inferred: if isnothing(likelihood) likelihood = :classification_multi @info "Likelihood not specified. Defaulting to $likelihood." end # Construct model: testmode!.(_get_chains(fitresult)) M = ConformalModel(model, fitresult, likelihood) return M end """ get_logits(f::Flux.Chain, x) Helper function to return logits in case final layer is an activation function. """ get_logits(f::Flux.Chain, x) = f[end] isa Function ? f[1:end-1](x) : f(x) # Methods @doc raw""" Models.logits(M::ConformalModel, X::AbstractArray) To keep things consistent with the architecture of `CounterfactualExplanations.jl`, this method computes logits $\beta_i x_i$ (i.e. the linear predictions) for a Conformal Classifier. By default, `MLJ.jl` and `ConformalPrediction.jl` return probabilistic predictions. To get the underlying logits, we invert the softmax function. Let $\hat{p}_i$ denote the estimated softmax output for feature $i$. Then in the multi-class case the following formula can be applied: ```math \beta_i x_i = \log (\hat{p}_i) + \log (\sum_i \exp(\hat{p}_i)) ``` For a short derivation, see here: https://math.stackexchange.com/questions/2786600/invert-the-softmax-function. In the binary case logits are fed through the sigmoid function instead of softmax, so we need to further adjust as follows, ```math \beta x = \beta_1 x_1 - \beta_0 x_0 ``` which follows from the derivation here: https://stats.stackexchange.com/questions/233658/softmax-vs-sigmoid-function-in-logistic-classifier """ function Models.logits(M::ConformalModel, X::AbstractArray) fitresult = M.fitresult function predict_logits(fitresult, x) ŷ = MLUtils.stack(map(chain -> get_logits(chain,x),_get_chains(fitresult))) |> y -> mean(y, dims=ndims(y)) |> y -> MLUtils.unstack(y, dims=ndims(y))[1] if ndims(ŷ) == 2 ŷ = [ŷ] end ŷ = reduce(hcat, ŷ) if M.likelihood == :classification_binary ŷ = reduce(hcat, (map(y -> y[2] - y[1], eachcol(ŷ)))) end ŷ = ndims(ŷ) > 1 ? ŷ : permutedims([ŷ]) return ŷ end yhat = predict_logits(fitresult, X) return yhat end """ Models.probs(M::ConformalModel, X::AbstractArray) Returns the estimated probabilities for a Conformal Classifier. """ function Models.probs(M::ConformalModel, X::AbstractArray) if M.likelihood == :classification_binary output = σ.(Models.logits(M, X)) elseif M.likelihood == :classification_multi output = softmax(Models.logits(M, X)) end return output end """ train(M::ConformalModel, data::CounterfactualData; kwrgs...) Trains a Conformal Classifier `M` on `data`. """ function Models.train(M::ConformalModel, data::CounterfactualData; kwrgs...) X, y = data.X, data.output_encoder.labels X = table(permutedims(X)) conf_model = M.model mach = machine(conf_model, X, y) fit!(mach; kwrgs...) likelihood, _ = CounterfactualExplanations.guess_likelihood(data.output_encoder.y) return ConformalModel(mach.model, mach.fitresult, likelihood) end