```{r} library(data.table) library(kableExtra) ``` ```{r} res_path <- "results/" files <- list.files(res_path) dt <- lapply(files[grepl("_bmk.csv", files)], function(x) { fread(file.path(res_path, x)) }) dt <- Reduce(function(x,y) {rbind(x,y, fill=TRUE)}, dt) dt[,ce:=NULL] synth <- c("Moons", "Circles", "Linearly Separable") tabular <- c("GMSC", "German Credit", "California Housing") dt[,source:=ifelse(dataname %in% synth, "synthetic", ifelse(dataname %in% tabular, "tabular", "vision"))] ``` ```{r} # Generator names dt[,generator:=factor(generator)] levels(dt$generator)[match("ECCCo",levels(dt$generator))] <- "ECCCo-L1" levels(dt$generator)[match("ECCCo (no CP)",levels(dt$generator))] <- "ECCCo-L1 (no CP)" levels(dt$generator)[match("ECCCo (no EBM)",levels(dt$generator))] <- "ECCCo-L1 (no EBM)" levels(dt$generator)[match("ECCCo-Δ",levels(dt$generator))] <- "ECCCo" levels(dt$generator)[match("ECCCo-Δ (latent)",levels(dt$generator))] <- "ECCCo+" levels(dt$generator)[match("ECCCo-Δ (no CP)",levels(dt$generator))] <- "ECCCo (no CP)" levels(dt$generator)[match("ECCCo-Δ (no EBM)",levels(dt$generator))] <- "ECCCo (no EBM)" ``` ```{r} # Adjust measure names dt[source=="vision" & variable=="distance_from_targets_ssim", variable:="implausibility"] dt[source=="vision" & variable=="distance_from_energy_ssim", variable:="unfaithfulness"] dt[source!="vision" & variable=="distance_from_targets_l2", variable:="implausibility"] dt[source!="vision" & variable=="distance_from_energy_l2", variable:="unfaithfulness"] ``` ```{r} dt[,non_valid:=variable=="validity" & value==0.0,.(sample,dataname,generator,model,target,factual,source)] dt[,non_valid:=any(non_valid==TRUE),.(sample,dataname,generator,model,target,factual,source)] dt_valid <- dt[non_valid==FALSE] ``` ```{r} generators <- unique(dt$generator)[sapply(unique(dt$generator), function(x) {!grepl("L1",x)})] tab <- dt[ generator %in% generators, .( value=sprintf("%1.2f ± %1.2f", mean(value), sd(value)), val = mean(value), std = sd(value) ), .(dataname, generator, model, variable, source) ] tab$top_val = F tab$one_std_wachter = F tab$two_std_wachter = F # Measures to be minimized: min_measures <- c( "distance", "implausibility", "unfaithfulness", "distance_from_energy", "distance_from_energy_l2", "distance_from_targets", "distance_from_targets_l2", "set_size_penalty" ) tab[variable %in% min_measures,top_val:=val==min(val),.(model, dataname, variable)] tab[variable %in% min_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab[variable %in% min_measures,two_std_wachter:=val+2*std<val[generator=="Wachter"],.(model, dataname, variable)] tab[variable %in% min_measures,one_std_wachter:=val+1*std<val[generator=="Wachter"],.(model, dataname, variable)] # Measures to be maximized: max_measures <- c( "validity", "redundancy" ) tab[variable %in% max_measures,top_val:=val==max(val),.(model, dataname, variable)] tab[variable %in% max_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab[variable %in% max_measures,two_std_wachter:=val-2*std>val[generator=="Wachter"],.(model, dataname, variable)] tab[variable %in% max_measures,one_std_wachter:=val-1*std>val[generator=="Wachter"],.(model, dataname, variable)] # Add conditional formatting: tab$value <- cell_spec(tab$value, "latex", bold=tab$top_val) tab[one_std_wachter==T,value:=paste0(value,"*")] tab[one_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] tab[two_std_wachter==T,value:=paste0(value,"*")] tab[two_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] # Remove redundant columns: tab[,val:=NULL] tab[,std:=NULL] tab[,top_val:=NULL] tab[,two_std_wachter:=NULL] tab[,one_std_wachter:=NULL] ``` ```{r} generators <- unique(dt$generator)[sapply(unique(dt$generator), function(x) {!grepl("L1",x)})] tab_valid <- dt_valid[ generator %in% generators, .( value=sprintf("%1.2f ± %1.2f", mean(value), sd(value)), val = mean(value), std = sd(value) ), .(dataname, generator, model, variable, source) ] tab_valid$top_val = F tab_valid$one_std_wachter = F tab_valid$two_std_wachter = F # Measures to be minimized: min_measures <- c( "distance", "implausibility", "unfaithfulness", "distance_from_energy", "distance_from_energy_l2", "distance_from_targets", "distance_from_targets_l2", "set_size_penalty" ) tab_valid[variable %in% min_measures,top_val:=val==min(val),.(model, dataname, variable)] tab_valid[variable %in% min_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab_valid[variable %in% min_measures,two_std_wachter:=val+2*std<val[generator=="Wachter"],.(model, dataname, variable)] tab_valid[variable %in% min_measures,one_std_wachter:=val+1*std<val[generator=="Wachter"],.(model, dataname, variable)] # Measures to be maximized: max_measures <- c( "validity", "redundancy" ) tab_valid[variable %in% max_measures,top_val:=val==max(val),.(model, dataname, variable)] tab_valid[variable %in% max_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab_valid[variable %in% max_measures,two_std_wachter:=val-2*std>val[generator=="Wachter"],.(model, dataname, variable)] tab_valid[variable %in% max_measures,one_std_wachter:=val-1*std>val[generator=="Wachter"],.(model, dataname, variable)] # Add conditional formatting: tab_valid$value <- cell_spec(tab_valid$value, "latex", bold=tab_valid$top_val) tab_valid[one_std_wachter==T,value:=paste0(value,"*")] tab_valid[one_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] tab_valid[two_std_wachter==T,value:=paste0(value,"*")] tab_valid[two_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] # Remove redundant columns: tab_valid[,val:=NULL] tab_valid[,std:=NULL] tab_valid[,top_val:=NULL] tab_valid[,two_std_wachter:=NULL] tab_valid[,one_std_wachter:=NULL] ``` ## Main tables ```{r} # Choices: measures <- c( "unfaithfulness", "implausibility", "set_size_penalty" ) measure_names <- c( "Unfaithfulness ↓", "Implausibility ↓", "Uncertainty ↓" ) chosen_source <- "tabular" # Order: chosen_data <- c( "California Housing", "GMSC" ) chosen_model <- c( "MLP", "JEM Ensemble" ) tab_i <- tab # Logic: tab_i <- tab_i[variable %in% measures] tab_i[,variable:=factor(variable, levels=measures)] tab_i <- tab_i[dataname %in% chosen_data] tab_i <- tab_i[model %in% chosen_model] tab_i[,model:=factor(model,levels=chosen_model)] tab_i[,dataname:=factor(dataname,levels=chosen_data)] tab_i <- dcast(tab_i, model + generator ~ dataname + variable) col_names <- c( "Model", "Generator", rep(measure_names,length(chosen_data)) ) caption <- sprintf( "Results for %s datasets: sample averages +/- one standard deviation across counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (\\textit{Wachter}). \\label{tab:results-%s} \\newline", chosen_source, chosen_source ) file_name <- sprintf( "paper/contents/table-%s.tex", chosen_source ) sub_header <- rep(length(measures), length(chosen_data)) names(sub_header) <- chosen_data header <- c( " " = 2, sub_header ) line_sep <- c(rep("",length(measures)-1),"\\addlinespace") algin_cols <- c(rep('l',2),rep('c',ncol(tab_i)-2)) kbl( tab_i, caption = caption, align = algin_cols, col.names=col_names, booktabs = T, escape=F, format="latex", linesep = line_sep, table.env="table*" ) %>% kable_styling( latex_options = c("scale_down") ) %>% kable_paper(full_width = F) %>% add_header_above(header) %>% collapse_rows(columns = 1:2, latex_hline = "major", valign = "middle") %>% save_kable(file_name) ``` ```{r} # Choices: measures <- c( "unfaithfulness", "implausibility" ) measure_names <- c( "Unfaithfulness ↓", "Implausibility ↓" ) chosen_source <- "vision" # Order: chosen_data <- c( "MNIST" ) chosen_model <- c( "MLP", "LeNet-5" ) tab_i <- tab # Logic: tab_i <- tab_i[variable %in% measures] tab_i[,variable:=factor(variable, levels=measures)] tab_i <- tab_i[dataname %in% chosen_data] tab_i <- tab_i[model %in% chosen_model] tab_i[,model:=factor(model,levels=chosen_model)] tab_i[,dataname:=factor(dataname,levels=chosen_data)] tab_i <- dcast(tab_i, model + generator ~ dataname + variable) col_names <- c( "Model", "Generator", rep(measure_names,length(chosen_data)) ) caption <- sprintf( "Results for %s dataset. Formatting details are the same as in Table~\\ref{tab:results-tabular}. \\label{tab:results-%s} \\newline", chosen_source, chosen_source ) file_name <- sprintf( "paper/contents/table-%s.tex", chosen_source ) sub_header <- rep(length(measures), length(chosen_data)) names(sub_header) <- chosen_data header <- c( " " = 2, sub_header ) line_sep <- c(rep("",length(measures)-1),"\\addlinespace") algin_cols <- c(rep('l',2),rep('c',ncol(tab_i)-2)) kbl( tab_i, caption = caption, align = algin_cols, col.names=col_names, booktabs = T, escape=F, format="latex", linesep = line_sep ) %>% kable_styling( latex_options = c("scale_down") ) %>% kable_paper(full_width = F) %>% add_header_above(header) %>% collapse_rows(columns = 1:2, latex_hline = "major", valign = "middle") %>% save_kable(file_name) ``` ## Full tables ```{r} tab <- dt[ , .( value=sprintf("%1.2f ± %1.2f", mean(value), sd(value)), val = mean(value), std = sd(value) ), .(dataname, generator, model, variable, source) ] tab$top_val = F tab$one_std_wachter = F tab$two_std_wachter = F # Measures to be minimized: min_measures <- c( "distance", "implausibility", "unfaithfulness", "distance_from_energy", "distance_from_energy_l2", "distance_from_targets", "distance_from_targets_l2", "set_size_penalty" ) tab[variable %in% min_measures,top_val:=val==min(val),.(model, dataname, variable)] tab[variable %in% min_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab[variable %in% min_measures,two_std_wachter:=val+2*std<val[generator=="Wachter"],.(model, dataname, variable)] tab[variable %in% min_measures,one_std_wachter:=val+1*std<val[generator=="Wachter"],.(model, dataname, variable)] # Measures to be maximized: max_measures <- c( "validity", "redundancy" ) tab[variable %in% max_measures,top_val:=val==max(val),.(model, dataname, variable)] tab[variable %in% max_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab[variable %in% max_measures,two_std_wachter:=val-2*std>val[generator=="Wachter"],.(model, dataname, variable)] tab[variable %in% max_measures,one_std_wachter:=val-1*std>val[generator=="Wachter"],.(model, dataname, variable)] # Add conditional formatting: tab$value <- cell_spec(tab$value, "latex", bold=tab$top_val) tab[one_std_wachter==T,value:=paste0(value,"*")] tab[one_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] tab[two_std_wachter==T,value:=paste0(value,"*")] tab[two_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] # Remove redundant columns: tab[,val:=NULL] tab[,std:=NULL] tab[,top_val:=NULL] tab[,two_std_wachter:=NULL] tab[,one_std_wachter:=NULL] # Measures: measures <- c( "unfaithfulness", "implausibility", "set_size_penalty", "distance", "redundancy", "validity" ) measure_names <- c( "Unfaithfulness ↓", "Implausibility ↓", "Uncertainty ↓", "Cost ↓", "Redundancy ↑", "Validity ↑" ) tab <- tab[variable %in% measures] tab[,variable:=factor(variable, levels=measures)] ``` ```{r} tab_valid <- dt_valid[ , .( value=sprintf("%1.2f ± %1.2f", mean(value), sd(value)), val = mean(value), std = sd(value) ), .(dataname, generator, model, variable, source) ] tab_valid$top_val = F tab_valid$one_std_wachter = F tab_valid$two_std_wachter = F # Measures to be minimized: min_measures <- c( "distance", "implausibility", "unfaithfulness", "distance_from_energy", "distance_from_energy_l2", "distance_from_targets", "distance_from_targets_l2", "set_size_penalty" ) tab_valid[variable %in% min_measures,top_val:=val==min(val),.(model, dataname, variable)] tab_valid[variable %in% min_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab_valid[variable %in% min_measures,two_std_wachter:=val+2*std<val[generator=="Wachter"],.(model, dataname, variable)] tab_valid[variable %in% min_measures,one_std_wachter:=val+1*std<val[generator=="Wachter"],.(model, dataname, variable)] # Measures to be maximized: max_measures <- c( "validity", "redundancy" ) tab_valid[variable %in% max_measures,top_val:=val==max(val),.(model, dataname, variable)] tab_valid[variable %in% max_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)] tab_valid[variable %in% max_measures,two_std_wachter:=val-2*std>val[generator=="Wachter"],.(model, dataname, variable)] tab_valid[variable %in% max_measures,one_std_wachter:=val-1*std>val[generator=="Wachter"],.(model, dataname, variable)] # Add conditional formatting: tab_valid$value <- cell_spec(tab_valid$value, "latex", bold=tab_valid$top_val) tab_valid[one_std_wachter==T,value:=paste0(value,"*")] tab_valid[one_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] tab_valid[two_std_wachter==T,value:=paste0(value,"*")] tab_valid[two_std_wachter==F,value:=paste0(value,"\\hphantom{*}")] # Remove redundant columns: tab_valid[,val:=NULL] tab_valid[,std:=NULL] tab_valid[,top_val:=NULL] tab_valid[,two_std_wachter:=NULL] tab_valid[,one_std_wachter:=NULL] # Measures: measures <- c( "unfaithfulness", "implausibility", "set_size_penalty", "distance", "redundancy", "validity" ) measure_names <- c( "Unfaithfulness ↓", "Implausibility ↓", "Uncertainty ↓", "Cost ↓", "Redundancy ↑", "Validity ↑" ) tab_valid <- tab_valid[variable %in% measures] tab_valid[,variable:=factor(variable, levels=measures)] ``` ### Tables (all) ```{r} for (name in unique(tab$dataname)) { data_indicator <- gsub(" ", "-", tolower(unique(name))) # Choices: tab_full <- dcast(tab[dataname==name], model + generator ~ variable) col_names <- c( "Model", "Generator", measure_names ) algin_cols <- c(rep('l',3),rep('c',ncol(tab_full)-3)) file_name <- sprintf( "paper/contents/table-%s.tex", data_indicator ) cap <- sprintf( "All results for %s dataset: sample averages +/- one standard deviation over all counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (\\textit{Wachter}). \\label{tab:results-%s} \\newline", name, data_indicator ) kbl( tab_full, caption = cap, align = "c", col.names=col_names, booktabs = T, escape=F, format="latex" ) %>% kable_styling(latex_options = c("scale_down")) %>% kable_paper(full_width = F) %>% collapse_rows(columns = 1:2, latex_hline = "custom", valign = "top") %>% save_kable(file_name) } ``` ## Full table (valid only) ```{r} for (name in unique(tab_valid$dataname)) { data_indicator <- gsub(" ", "-", tolower(unique(name))) # Choices: tab_full <- dcast(tab_valid[dataname==name], model + generator ~ variable) col_names <- c( "Model", "Generator", measure_names ) algin_cols <- c(rep('l',3),rep('c',ncol(tab_full)-3)) file_name <- sprintf( "paper/contents/table-%s-valid.tex", data_indicator ) cap <- sprintf( "All results for %s dataset: sample averages +/- one standard deviation over all valid counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (\\textit{Wachter}). \\label{tab:results-%s-valid} \\newline", name, data_indicator ) kbl( tab_full, caption = cap, align = "c", col.names=col_names, booktabs = T, escape=F, format="latex" ) %>% kable_styling(latex_options = c("scale_down")) %>% kable_paper(full_width = F) %>% collapse_rows(columns = 1:2, latex_hline = "custom", valign = "top") %>% save_kable(file_name) } ``` ## EBM ```{r} files <- list.files(paste0(res_path,"params")) dt <- lapply(files[sapply(files, function(x) grepl("model_params.csv",x))], function(x) { fread(file.path(paste0(res_path,"params"), x)) }) dt <- Reduce(function(x,y) {rbind(x,y, fill=TRUE)}, dt) setcolorder( dt, c( "dataname", "n_obs", "n_hidden", "n_layers", "activation", "n_ens", "epochs", "batch_size", "jem_sampling_steps", "sgld_batch_size", "lambda" ) ) dataset_order = c( "Linearly Separable", "Moons", "Circles", "California Housing", "GMSC", "German Credit", "MNIST", "Fashion MNIST" ) dt[,dataname:=factor(dataname, levels=dataset_order)] dt <- dt[order(dataname)] dt_ebm <- dt[,.(dataname, jem_sampling_steps, sgld_batch_size, lambda)] col_names <- c( "Dataset", "SGLD Steps", "Batch Size", "$\\lambda$" ) kbl( dt_ebm, caption = "EBM hyperparemeter choices for our experiments. \\label{tab:ebmparams} \\newline", align = "r", col.names=col_names, booktabs = T, escape=F, format="latex", linesep = "" ) %>% kable_styling(font_size = 8) %>% kable_paper(full_width = F) %>% save_kable("paper/contents/table_ebm_params.tex") ``` ## Experimental setup ```{r} dt_exp <- dt[,.(dataname, n_obs, n_hidden, n_layers, activation, n_ens, epochs, batch_size)] col_names <- c( "Dataset", "Sample Size", "Hidden Units", "Hidden Layers", "Activation", "Ensemble Size", "Epochs", "Batch Size" ) header <- c(" " = 2, "Network Architecture" = 4, "Training" = 2) kbl( dt_exp, caption = "Paremeter choices for our experiments. \\label{tab:params} \\newline", align = "r", col.names=col_names, booktabs = T, escape=F, format="latex", linesep = "" ) %>% kable_styling(latex_options = c("scale_down")) %>% kable_paper(full_width = F) %>% add_header_above(header) %>% save_kable("paper/contents/table_params.tex") ``` ```{r} dt <- lapply(files[sapply(files, function(x) grepl("generator_params.csv",x))], function(x) { fread(file.path(paste0(res_path,"params"), x)) }) dt <- Reduce(function(x,y) {rbind(x,y, fill=TRUE)}, dt) dt[is.na(reg_strength),reg_strength:=0] dt <- dt[,.(dataname,eta,lambda_1_Δ,lambda_2_Δ,lambda_3_Δ,reg_strength)] dt[,dataname:=factor(dataname, levels=dataset_order)] dt <- dt[order(dataname)] col_names <- c( "Dataset", "$\\eta$", "$\\lambda_1$", "$\\lambda_2$", "$\\lambda_3$", "Ridge penalty" ) kbl( dt, caption = "Generator hyperparameters: the optimiser step size ($\\eta$); penalty strengths where $\\lambda_1$ applies to all generators but \\textit{Schut} and the other parameter are specific to \\textit{ECCCo}; finally, the strength for the Ridge penalty on energy for \\textit{ECCCo}.\\label{tab:genparams} \\newline", align = "r", col.names=col_names, booktabs = T, escape=F, format="latex", linesep = "" ) %>% kable_styling(font_size = 8) %>% kable_paper(full_width = F) %>% save_kable("paper/contents/table_gen_params.tex") ``` ```{r} dt <- lapply(files[grepl("_model_performance.csv", files)], function(x) { fread(file.path(paste0(res_path,"params"), x)) }) dt <- Reduce(function(x,y) {rbind(x,y, fill=TRUE)}, dt) dt[,dataname:=factor(dataname, levels=dataset_order)] dt <- dt[order(dataname,mod_name)] setcolorder( dt, c( "dataname", "mod_name", "acc", "precision", "f1score" ) ) col_names <- c("Dataset", "Model", "Accuracy", "Precision", "F1-Score") kbl( dt, caption = "Various standard performance metrics for our different models grouped by dataset. \\label{tab:perf} \\newline", align = "r", col.names=col_names, booktabs = T, escape=F, format="latex", digits=2, linesep = "" ) %>% kable_styling(font_size = 8) %>% kable_paper(full_width = F) %>% add_header_above(c(" "=2, "Performance Metrics" = 3)) %>% collapse_rows(columns = 1, latex_hline = "custom", valign = "top", custom_latex_hline = 1) %>% save_kable("paper/contents/table_perf.tex") ```