diff --git a/experiments/california_housing.jl b/experiments/california_housing.jl index 755b33dfbbc708160f7519c03002be9dc8aee6e0..b4cd94078de759903d10c363a678423ebcfe3b1d 100644 --- a/experiments/california_housing.jl +++ b/experiments/california_housing.jl @@ -1,32 +1,39 @@ +# Data: +dataname = "California Housing" counterfactual_data, test_data = train_test_split(load_california_housing(nothing); test_size=TEST_SIZE) -nobs = size(counterfactual_data.X, 2) -# Default builder: -n_hidden = 32 -activation = Flux.relu -builder = MLJFlux.@builder Flux.Chain( - Dense(n_in, n_hidden, activation), - Dense(n_hidden, n_hidden, activation), - Dense(n_hidden, n_out), -) +# Model tuning: +model_tuning_params = DEFAULT_MODEL_TUNING_LARGE + +# Tuning parameters: +tuning_params = DEFAULT_GENERATOR_TUNING -# Number of individuals: -n_ind = N_IND_SPECIFIED ? N_IND : 100 +# Parameter choices: +params = ( + n_hidden=32, + activation=Flux.relu, + builder=MLJFlux.@builder Flux.Chain( + Dense(n_in, n_hidden, activation), + Dense(n_hidden, n_hidden, activation), + Dense(n_hidden, n_out), + ), + α = [1.0, 1.0, 1e-1], + sampling_batch_size = 10, + sampling_steps = 30, + use_ensembling = true, + opt = Flux.Optimise.Descent(0.05) +) -run_experiment( - counterfactual_data, test_data; - dataname="California Housing", - epochs=100, - builder=builder, - α=[1.0, 1.0, 1e-1], - sampling_batch_size=10, - sampling_steps=30, - use_ensembling=true, - opt=Flux.Optimise.Descent(0.05), - n_individuals=n_ind, - min_batch_size=250, - use_variants=true, - Λ=[0.1, 0.2, 0.2], - nsamples=100, - niter_eccco=100 -) \ No newline at end of file +if !GRID_SEARCH + run_experiment( + counterfactual_data, test_data; + dataname=dataname, + params... + ) +else + grid_search( + counterfactual_data, test_data; + dataname=dataname, + tuning_params=tuning_params + ) +end \ No newline at end of file diff --git a/experiments/credit_default.jl b/experiments/credit_default.jl index 3f76823091a7d930fbd05d9dba9564b8f4c0a2b3..059c29c5ed616297fd0ad695e866b99588c48e15 100644 --- a/experiments/credit_default.jl +++ b/experiments/credit_default.jl @@ -1,29 +1,39 @@ +# Data: +dataname = "Credit Default" counterfactual_data, test_data = train_test_split(load_credit_default(nothing); test_size=TEST_SIZE) -# Default builder: -n_hidden = 32 -activation = Flux.relu -builder = MLJFlux.@builder Flux.Chain( - Dense(n_in, n_hidden, activation), - Dense(n_hidden, n_hidden, activation), - Dense(n_hidden, n_out), -) +# Model tuning: +model_tuning_params = DEFAULT_MODEL_TUNING_LARGE -# Number of individuals: -n_ind = N_IND_SPECIFIED ? N_IND : 100 +# Tuning parameters: +tuning_params = DEFAULT_GENERATOR_TUNING -run_experiment( - counterfactual_data, test_data; - dataname="Credit Default", - builder=builder, +# Parameter choices: +params = ( + n_hidden = 32, + activation = Flux.relu, + builder = MLJFlux.@builder Flux.Chain( + Dense(n_in, n_hidden, activation), + Dense(n_hidden, n_hidden, activation), + Dense(n_hidden, n_out), + ), α=[1.0, 1.0, 1e-1], sampling_batch_size=10, sampling_steps=30, use_ensembling=true, - opt=Flux.Optimise.Descent(0.05), - n_individuals=n_ind, - use_variants=true, - Λ=[0.1, 0.2, 0.2], - nsamples=100, - niter_eccco=100 -) \ No newline at end of file + opt=Flux.Optimise.Descent(0.05) +) + +if !GRID_SEARCH + run_experiment( + counterfactual_data, test_data; + dataname=dataname, + params... + ) +else + grid_search( + counterfactual_data, test_data; + dataname=dataname, + tuning_params=tuning_params + ) +end \ No newline at end of file diff --git a/experiments/german_credit.jl b/experiments/german_credit.jl index f75670f0bc4ba5e03a7f124a19297f5de18be4cf..e315b97eb7322a6863aceb508c4e5da3facb660e 100644 --- a/experiments/german_credit.jl +++ b/experiments/german_credit.jl @@ -1,29 +1,39 @@ +# Data: +dataname = "German Credit" counterfactual_data, test_data = train_test_split(load_german_credit(nothing); test_size=TEST_SIZE) -# Default builder: -n_hidden = 32 -activation = Flux.relu -builder = MLJFlux.@builder Flux.Chain( - Dense(n_in, n_hidden, activation), - Dense(n_hidden, n_hidden, activation), - Dense(n_hidden, n_out), -) +# Model tuning: +model_tuning_params = DEFAULT_MODEL_TUNING_LARGE + +# Tuning parameters: +tuning_params = DEFAULT_GENERATOR_TUNING -# Number of individuals: -n_ind = N_IND_SPECIFIED ? N_IND : 100 +# Parameter choices: +params = ( + n_hidden=32, + activation=Flux.relu, + builder=MLJFlux.@builder Flux.Chain( + Dense(n_in, n_hidden, activation), + Dense(n_hidden, n_hidden, activation), + Dense(n_hidden, n_out), + ), + α = [1.0, 1.0, 1e-1], + sampling_batch_size = 10, + sampling_steps = 30, + use_ensembling = true, + opt = Flux.Optimise.Descent(0.05) +) -run_experiment( - counterfactual_data, test_data; - dataname="German Credit", - builder=builder, - α=[1.0, 1.0, 1e-1], - sampling_batch_size=10, - sampling_steps=30, - use_ensembling=true, - opt=Flux.Optimise.Descent(0.05), - n_individuals=n_ind, - use_variants=true, - Λ=[0.1, 0.2, 0.2], - nsamples=100, - niter_eccco=100 -) \ No newline at end of file +if !GRID_SEARCH + run_experiment( + counterfactual_data, test_data; + dataname=dataname, + params... + ) +else + grid_search( + counterfactual_data, test_data; + dataname=dataname, + tuning_params=tuning_params + ) +end \ No newline at end of file diff --git a/experiments/gmsc.jl b/experiments/gmsc.jl index c84ff07b6cfdf509adbf0b90123051b55bdf8713..f41b003e77b7104a68b4868555a8f9253ca4825d 100644 --- a/experiments/gmsc.jl +++ b/experiments/gmsc.jl @@ -1,32 +1,40 @@ +# Data: +dataname = "GMSC" counterfactual_data, test_data = train_test_split(load_gmsc(nothing); test_size=TEST_SIZE) nobs = size(counterfactual_data.X, 2) -# Default builder: -n_hidden = 32 -activation = Flux.relu -builder = MLJFlux.@builder Flux.Chain( - Dense(n_in, n_hidden, activation), - Dense(n_hidden, n_hidden, activation), - Dense(n_hidden, n_out), -) +# Model tuning: +model_tuning_params = DEFAULT_MODEL_TUNING_LARGE -# Number of individuals: -n_ind = N_IND_SPECIFIED ? N_IND : 100 +# Tuning parameters: +tuning_params = DEFAULT_GENERATOR_TUNING -run_experiment( - counterfactual_data, test_data; - dataname="GMSC", - epochs=100, - builder = builder, - α=[1.0, 1.0, 1e-1], - sampling_batch_size=10, +# Parameter choices: +params = ( + n_hidden=32, + activation=Flux.relu, + builder=MLJFlux.@builder Flux.Chain( + Dense(n_in, n_hidden, activation), + Dense(n_hidden, n_hidden, activation), + Dense(n_hidden, n_out), + ), + α = [1.0, 1.0, 1e-1], + sampling_batch_size = 10, sampling_steps = 30, use_ensembling = true, - opt = Flux.Optimise.Descent(0.05), - n_individuals = n_ind, - min_batch_size = 250, - use_variants=true, - Λ=[0.1, 0.2, 0.2], - nsamples = 100, - niter_eccco = 100, -) \ No newline at end of file + opt = Flux.Optimise.Descent(0.05) +) + +if !GRID_SEARCH + run_experiment( + counterfactual_data, test_data; + dataname=dataname, + params... + ) +else + grid_search( + counterfactual_data, test_data; + dataname=dataname, + tuning_params=tuning_params + ) +end diff --git a/experiments/jobscripts/counterfactuals/california_housing.sh b/experiments/jobscripts/generators/california_housing.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/california_housing.sh rename to experiments/jobscripts/generators/california_housing.sh diff --git a/experiments/jobscripts/counterfactuals/credit_default.sh b/experiments/jobscripts/generators/credit_default.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/credit_default.sh rename to experiments/jobscripts/generators/credit_default.sh diff --git a/experiments/jobscripts/counterfactuals/fmnist.sh b/experiments/jobscripts/generators/fmnist.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/fmnist.sh rename to experiments/jobscripts/generators/fmnist.sh diff --git a/experiments/jobscripts/counterfactuals/german_credit.sh b/experiments/jobscripts/generators/german_credit.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/german_credit.sh rename to experiments/jobscripts/generators/german_credit.sh diff --git a/experiments/jobscripts/counterfactuals/gmsc.sh b/experiments/jobscripts/generators/gmsc.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/gmsc.sh rename to experiments/jobscripts/generators/gmsc.sh diff --git a/experiments/jobscripts/counterfactuals/mnist.sh b/experiments/jobscripts/generators/mnist.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/mnist.sh rename to experiments/jobscripts/generators/mnist.sh diff --git a/experiments/jobscripts/counterfactuals/synthetic.sh b/experiments/jobscripts/generators/synthetic.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/synthetic.sh rename to experiments/jobscripts/generators/synthetic.sh diff --git a/experiments/jobscripts/counterfactuals/tabular.sh b/experiments/jobscripts/generators/tabular.sh similarity index 100% rename from experiments/jobscripts/counterfactuals/tabular.sh rename to experiments/jobscripts/generators/tabular.sh diff --git a/experiments/jobscripts/tuning/synthetic.sh b/experiments/jobscripts/tuning/generators/synthetic.sh similarity index 100% rename from experiments/jobscripts/tuning/synthetic.sh rename to experiments/jobscripts/tuning/generators/synthetic.sh diff --git a/experiments/jobscripts/tuning/generators/tabular.sh b/experiments/jobscripts/tuning/generators/tabular.sh new file mode 100644 index 0000000000000000000000000000000000000000..556d97ac636096b73d5b94153217f88b1f6a8b18 --- /dev/null +++ b/experiments/jobscripts/tuning/generators/tabular.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +#SBATCH --job-name="Grid-search Tabular (ECCCo)" +#SBATCH --time=06:00:00 +#SBATCH --ntasks=100 +#SBATCH --cpus-per-task=1 +#SBATCH --partition=compute +#SBATCH --mem-per-cpu=4GB +#SBATCH --account=research-eemcs-insy +#SBATCH --mail-type=END # Set mail type to 'END' to receive a mail when the job finishes. + +module load 2023r1 openmpi + +srun julia --project=experiments experiments/run_experiments.jl -- data=gmsc,german_credit,credit_default,california_housing output_path=results mpi grid_search > experiments/synthetic.log \ No newline at end of file diff --git a/experiments/jobscripts/tuning/models/synthetic.sh b/experiments/jobscripts/tuning/models/synthetic.sh new file mode 100644 index 0000000000000000000000000000000000000000..f904b4b84c578c0106daaf2dddeaf342644a983e --- /dev/null +++ b/experiments/jobscripts/tuning/models/synthetic.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +#SBATCH --job-name="Tune Synthetic Model (ECCCo)" +#SBATCH --time=03:00:00 +#SBATCH --ntasks=1 +#SBATCH --gpus-per-task=1 +#SBATCH --cpus-per-task=1 +#SBATCH --partition=gpu +#SBATCH --mem-per-cpu=8GB +#SBATCH --account=research-eemcs-insy +#SBATCH --mail-type=END # Set mail type to 'END' to receive a mail when the job finishes. + +srun julia --project=experiments experiments/run_experiments.jl -- data=linearly_separable,moons,circles output_path=results tune_model \ No newline at end of file diff --git a/experiments/jobscripts/tuning/models/tabular.sh b/experiments/jobscripts/tuning/models/tabular.sh new file mode 100644 index 0000000000000000000000000000000000000000..5d56815349b600443b7b3c1187ed167c8eb30ed6 --- /dev/null +++ b/experiments/jobscripts/tuning/models/tabular.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +#SBATCH --job-name="Tune Tabular Model (ECCCo)" +#SBATCH --time=03:00:00 +#SBATCH --ntasks=1 +#SBATCH --gpus-per-task=1 +#SBATCH --cpus-per-task=1 +#SBATCH --partition=gpu +#SBATCH --mem-per-cpu=8GB +#SBATCH --account=research-eemcs-insy +#SBATCH --mail-type=END # Set mail type to 'END' to receive a mail when the job finishes. + +srun julia --project=experiments experiments/run_experiments.jl -- data=gmsc,german_credit,credit_default,california_housing output_path=results tune_model \ No newline at end of file