diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7a6f1506f3f6664098dd3eb140b9b25f33d8fea5..4d1cf8758cb67b1b70d7500e81309a91dd5863e1 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -26,24 +26,12 @@ test-build:
   rules:
     - if: $CI_PIPELINE_SOURCE == "merge_request_event"
     - if: $CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"
+    - if: $CI_COMMIT_BRANCH == "publish" && $CI_PIPELINE_SOURCE == "push"
   script:
     - pip install -r requirements.txt
     - jupyter-book clean book/
     - jupyter-book build book/
 
-# This job builds the HTML when it gets pushed to the publish branch
-build-book:
-  stage: build
-  only:
-    - publish
-    - main
-  script:
-    - pip install -r requirements.txt
-    - bash ./build-lite.sh false
-  artifacts:
-    paths:
-      - book/_build/html/
-
 # This job commits the built HTML to the publish-html branch
 # push-html:
 #   stage: deploy
diff --git a/README.md b/README.md
index 0c2fcb7c8b91f30633f583e126e4d17bcea1d533..460f1bb96e5dde90a06b51f589613428802d2b7c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,12 @@
 # MUDE Jupyter Book 2023/2024
 
-**UPDATE FOR THOSE BUILDING THE BOOK:** _we've been adding packages to the build, for example, Tom has added some examples of the quiz features we can use in the Cookbook, which requires a special package from our CS colleagues. If your book build is breaking due to package issues, simply run the following:_
+**NOTES:**
+- use 5 M's (`MMMMM`) anywhere in MUDE repositories to mark to-do's for teachers and TA's. The idea is that you will use text search in VS Code or another platform to find them. Feel free to add your own letters or initials afterwards to make it easier to find certain things yourself or with a sub-team (e.g., `MMMMM-RL` for Robert).
+- unless you want a notebook to execute when the jupyter book is built, you should include "_dont_execute" at the end of the file name. This means cell output in the notebook will be rendered in the JB HTML as-is once the document is saved.
+- to see interactive stuff locally, run `build-book.sh` and follow instructions for the (local) Python server to use it
+- these notes will be incorporated in JB documentation sometime in the future
+
+**UPDATE FOR THOSE BUILDING THE BOOK:** _we've been adding a lot of functionality related to interactive features, for example, Python code that runs in your browser, and Tom has added some examples of the quiz features we can use in the Cookbook. You can still use the "normal" approach of building the book with_ `jupyter book build book` _but if you want to use the interactive features, see the instructions in the Live Code chapter of the Cookbook. If your book build with the "normal" is breaking due to package issues, simply run the following:_
 
 ```
 pip install -r requirements.txt
diff --git a/book/_toc.yml b/book/_toc.yml
index 32719929f6bc5cb501bcb8db98ac25a470df7c52..cb5feb7d601e51411e0afe526e72f6343363a6f3 100644
--- a/book/_toc.yml
+++ b/book/_toc.yml
@@ -2,9 +2,20 @@ format: jb-book
 root: intro
 
 parts:
-  - caption: New Material
+  - caption: Programming
     chapters:
-    - file: new/blank
+    - file: programming/overview
+      title: Overview
+    - file: programming/golden_rules_dont_execute.ipynb
+      title: The Golden Rules
+    - file: programming/computers/overview
+      title: Computers
+    - file: programming/code/overview
+      title: Code
+    - file: programming/programs/overview
+      title: Programs
+    - file: programming/communication/overview
+      title: Communication
   - caption: Sandbox
     chapters:
     - file: sandbox/blank
@@ -23,8 +34,15 @@ parts:
       - file: sandbox/continuous/empirical
       - file: sandbox/continuous/Param_distr.md
       - file: sandbox/continuous/other_distr.md
+      - file: sandbox/continuous/Uniform.md
+      - file: sandbox/continuous/Exponential.md
+      - file: sandbox/continuous/Exponential_quiz.md
+      - file: sandbox/continuous/Gumbel.md
+      - file: sandbox/continuous/Gumbel_quiz.md
       - file: sandbox/continuous/Parameterization.md
       - file: sandbox/continuous/fitting.md
+      - file: sandbox/continuous/moments.md
+      - file: sandbox/continuous/moments_quiz.md
       - file: sandbox/continuous/GOF
     - file: sandbox/SanPart/ObservationTheory/01_Introduction.md
       title: Sensing and Observation Theory
@@ -36,6 +54,7 @@ parts:
       - file: sandbox/SanPart/ObservationTheory/06_MLE.md
       - file: sandbox/SanPart/ObservationTheory/07_NLSQ.md
       - file: sandbox/SanPart/ObservationTheory/08_Testing.md
+      - file: sandbox/SanPart/ObservationTheory/09_TestingSandM.md
       - file: sandbox/SanPart/ObservationTheory/99_NotationFormulas.md
     - file: sandbox/SanPart/premath/00_00_PreMath.md
       title: Pre-Math
diff --git a/book/cookbook/benchmarks/benchmark_aux_dont_execute.ipynb b/book/cookbook/benchmarks/benchmark_aux_dont_execute.ipynb
index adbcaa50635466e495beb10fc74d1a86c033691b..fafb72619c1a5ab8881cd2d8c5be3eb83b01ba29 100644
--- a/book/cookbook/benchmarks/benchmark_aux_dont_execute.ipynb
+++ b/book/cookbook/benchmarks/benchmark_aux_dont_execute.ipynb
@@ -53,6 +53,18 @@
     "\n",
     "my_library.hello()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Local library in a folder\n",
+    "\n",
+    "import nested.nested.nested_library\n",
+    "nested.nested.nested_library.hello()"
+   ]
   }
  ],
  "metadata": {
diff --git a/book/cookbook/benchmarks/nested/__init__.py b/book/cookbook/benchmarks/nested/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..412c8a8935af19333390692327d990f19bb0ccfe
--- /dev/null
+++ b/book/cookbook/benchmarks/nested/__init__.py
@@ -0,0 +1 @@
+# This can be empty
diff --git a/book/cookbook/benchmarks/nested/nested/__init__.py b/book/cookbook/benchmarks/nested/nested/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..412c8a8935af19333390692327d990f19bb0ccfe
--- /dev/null
+++ b/book/cookbook/benchmarks/nested/nested/__init__.py
@@ -0,0 +1 @@
+# This can be empty
diff --git a/book/cookbook/benchmarks/nested/nested/nested_library.py b/book/cookbook/benchmarks/nested/nested/nested_library.py
new file mode 100644
index 0000000000000000000000000000000000000000..f586cc4beaedbf96beb8679ceb720079aa883a6c
--- /dev/null
+++ b/book/cookbook/benchmarks/nested/nested/nested_library.py
@@ -0,0 +1,2 @@
+def hello():
+	print("Hello world!")
diff --git a/book/cookbook/blank.md b/book/cookbook/blank.md
index 53d3da816dfd1b422ba7cbbe27c0a901fb6a9a0c..ec9cf679535d6a389c27cffde70ba6ee46670c4e 100644
--- a/book/cookbook/blank.md
+++ b/book/cookbook/blank.md
@@ -1,3 +1,11 @@
 # Cookbook
 
-Examples. Don't forget to check our [other book](https://interactivetextbooks.citg.tudelft.nl/manual/intro.html), the [repo for the other book](https://gitlab.tudelft.nl/interactivetextbooks-citg/jupyter-book-manual)and the [Jupyter Book website](https://jupyterbook.org/en/stable/).
\ No newline at end of file
+This Part of the book is a collection point for various instructions and examples. In particular, the following chapters lay out two types of interactive features:
+1. non-code examples (e.g., how to make different types of quiz questions)
+2. code examples. This is brand new and unique to MUDE; it is now possible to run Python code directly in the book!
+
+Especially for type 2, there is a lot of potential for providing immediate feedback to students when they run a code cell, but this is not documented extensively yet. **For those developing educational content:** if you know how you want to implement a particular type of question in your book, *send Robert a description as soon as possible!* For example, "I want to have students write a function to implement method A, and when they run the code cell they see if it gives the right answer. It will be checked by confirming a few inputs and outputs of the function, but I don't want the students to see the correct answer (values nor function code)."
+
+_This book temporarily supercedes the Cookbook in our other (non-MUDE) book (links to: [website](https://interactivetextbooks.citg.tudelft.nl/) and [repo](https://gitlab.tudelft.nl/interactivetextbooks-citg/jupyter-book-manual)). Prior to Q1 starting, the content here will be moved o the other book._
+
+Remember to visit the [Jupyter Book website](https://jupyterbook.org/en/stable/) for basic functionality, and create a GitLab issue if you still have problems or questions.
\ No newline at end of file
diff --git a/book/cookbook/live_code.md b/book/cookbook/live_code.md
index 5164c643082cdc2e2ee991064625373d8fbc9b68..fc0721f8690e46e4f068420f67727445832396e1 100644
--- a/book/cookbook/live_code.md
+++ b/book/cookbook/live_code.md
@@ -1,6 +1,10 @@
 # Live Code
 
-Our book has been enable to run Python code live in the browser (thanks Max!). Detailed instructions will be added elsewhere later, but for now, try this:
+Our book has been enable to run Python code live in the browser (thanks Max!). This page contains some installation instructions, and the other sections in this chapter provide some Benchmarks which illustrate the functionality that has been added (note that examples for how this functionality can be used in an educational context is still missing). See also the next chapter in the Cookbook, where Tom has implemented an example with widgets working in the browser!
+
+Detailed instructions will be added elsewhere later, but for now, try the following steps to use the interactive features on your local computer. If you would like to see them live on the MUDE website, create a Merge Request and tag Robert for review. Note that by the beginning of Q1 these examples will be moved to the generic Manual [here](https://interactivetextbooks.citg.tudelft.nl/intro.html).
+
+## Instructions: local build
 
 1. build the book by running the shell script `build-book.sh` (type `./build-book.sh true` in the terminal)
 2. Open the book by going to [http://127.0.0.1:8000/](http://127.0.0.1:8000/) in your browser, or use the "live" version of the book at [https://mude.citg.tudelft.nl/book-draft/](https://mude.citg.tudelft.nl/book-draft/)
diff --git a/book/intro.md b/book/intro.md
index 2292ffa80c680d6048859b130de815392f7794cb..5d7d4566f3ed006fddfde8c605908c0d5586ed5b 100644
--- a/book/intro.md
+++ b/book/intro.md
@@ -1,8 +1,27 @@
-# Home
+# Welcome to the MUDE Textbook
 
-```{note}
+Welcome to the MUDE textbook.
+
+_this is where stuff will be written later_
+
+````{admonition} Update for MUDE Teachers
+:class: note
+I got rid of the README that used to be here, because you should have your workflow set by now, and you should know by now that you can always check it on GitLab [here](https://gitlab.tudelft.nl/mude/book/). **I'll put recent updates here for the 2 weeks leading up to the start of Q1**:
+- Draft of Programming Part is ready---the first content for the "new" book to be placed! Note that it is located in `book/book/programming`.
+- As you finalize the ToC for your material, move it to the same level directory as the programming part. All Parts should be a sub-directory of `book`.
+- By the start of Q1, all material in `cookbook`, `old`, `sandbox` should be removed, moved or hidden (preferably by YOU!)
+````
+
+````{admonition} Dicatorial Proclamation by the MUDE Manager
+:class: warning
+The MUDE content formerly known as **coding** shall henceforth be called **programming.**
+
+Don't like it? Too bad, maybe you can become manager next year and change it back. I have plenty of reasons, but rather than type them I believe they are better discussed over a cold beverage.
+````
+
+<!-- ```{note}
 Right now, the home page of the book simply displays `README.md` which is located at `./book/` or you can [read on GitLab here](https://gitlab.tudelft.nl/mude/book/-/blob/main/README.md).
 ```
 
 ```{include} ../README.md
-```
+``` -->
diff --git a/book/new/blank.md b/book/new/blank.md
deleted file mode 100644
index ac8948e6de7cd9d99bf2499c11bb42a56c368b07..0000000000000000000000000000000000000000
--- a/book/new/blank.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# blank chapter
-
-There is no content here yet. This file is a chapter. Use this for unorganized material. If it's organized (e.g., the prob design/risk part) it does not need to go into `book/book/new/`
\ No newline at end of file
diff --git a/book/programming/code/overview.md b/book/programming/code/overview.md
new file mode 100644
index 0000000000000000000000000000000000000000..3ab2e6e44eff7605f6225c1d1a0b07007e475e03
--- /dev/null
+++ b/book/programming/code/overview.md
@@ -0,0 +1,7 @@
+# Code 
+
+text. Matlab also high level and multi-purpose like Python.
+
+Things that will definitely go here:
+- functions (a brief overview/highlight of features, then link to the object part of programs)
+- complexity: where does it come from and why is it important (work backwards to know which concepts need to be included (e.g., bits and flops etc)); can find good reference rather than repeat chapters of numerical analysis textbook material
\ No newline at end of file
diff --git a/book/programming/communication/overview.md b/book/programming/communication/overview.md
new file mode 100644
index 0000000000000000000000000000000000000000..8bd01f8d16276f92b583c26f6647cda08aca7c05
--- /dev/null
+++ b/book/programming/communication/overview.md
@@ -0,0 +1,3 @@
+# Communication 
+
+text.
\ No newline at end of file
diff --git a/book/programming/computers/overview.md b/book/programming/computers/overview.md
new file mode 100644
index 0000000000000000000000000000000000000000..13c659f033f96443eaf7b7c296ac0ffd257689b7
--- /dev/null
+++ b/book/programming/computers/overview.md
@@ -0,0 +1,4 @@
+# Computers 
+
+text.
+
diff --git a/book/programming/golden_rules_dont_execute.ipynb b/book/programming/golden_rules_dont_execute.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..c3d9fb8e1bc80eeaddc3d06205ab4399cc58ce66
--- /dev/null
+++ b/book/programming/golden_rules_dont_execute.ipynb
@@ -0,0 +1,556 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "6c618b60",
+   "metadata": {},
+   "source": [
+    "(programming-gr)=\n",
+    "# The Golden Rules\n",
+    "    \n",
+    "*This is the first document presenting the MUDE Golden Rules of Code, a series of guidelines and reccommendations that are focused on promoting good habits related to coding and programming. We expect to periodically update this document and perhaps share other topics in the 'Golden Rules' series. Whether you are a student, teacher or practitioner, we encourage you to refer back to this document often and feel free to share.*\n",
+    "\n",
+    "*We greatly appreciate feedback from anyone, especially students (send to R.C.Lanzafame@tudelft.nl).*\n",
+    "\n",
+    "Contributors: Robert Lanzafame, Kiril Vasilev and a list of reviewers, too long to name.\n",
+    "\n",
+    "<div class=\"alert alert-block alert-success\">\n",
+    "<b> Note to MUDE Students </b>\n",
+    "    \n",
+    "For the September 13 workshop, you should read the entire document, but if short on time focus on Rules 1, 2, 3 and 5 (but only the one-line docstrings for 5). Some topics will be revisited or extended in later workshops. You are not required to install `autopep8`, but we reccommend it since it is easy to use. You are expected to incorporate the guidelins in this document progressively throughout the semester, so we suggest you refer to it often.\n",
+    "</div>"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "9c81abf5",
+   "metadata": {},
+   "source": [
+    "## Why do the Golden Rules for Documentation and Communication exist?\n",
+    "\n",
+    "You will spend a lot of your study hours in the MUDE module writing, reading and debugging Python code, regardless of your prior programming experience. Your time is valuable, especially once you start working after graduation! Yet it is all too common that most code is never re-used because it is poorly *documented*, making it not understandable to others, or even to yourself at a later moment. In addition, as a young practicing engineer or researcher it is critical that you are able to use your code to effectively *communicate* your results, otherwise the effort put into your analysis is wasted. Good code writing, code documentation and presentation of results such as figures, tables and numbers are all essential for effective collaboration. Even when working individually, it is important to preserve your work such that you can easily understand and re-use in the future. This document will provide and explain a number of reccommendations to help you accomplish that.\n",
+    "\n",
+    "The following quote summarizes the philosophy of this document concisely:\n",
+    "\n",
+    "<div style=\"text-align: right\"><em>Any fool can write code that a computer can understand. Good programmers write code that humans can understand.</em></div>  \n",
+    "<div style=\"text-align: right\"><em> â€• Martin Fowler </em></div> \n",
+    "\n",
+    "### A note of encouragement\n",
+    "\n",
+    "This document contains a lot of information that might not appear to be useful to you...*yet*. Keep in mind that the content here was created in collaboration with many colleagues who are working in industry and will be looking to hire you after you graduate from TU Delft: they will appreciate good coding documentation and communication skills! Our primary advice in MUDE, and for the rest of your MSc program, is that you take a look at this material now, but also keep the Golden Rules in mind and refer back to them often. If you can incorporate them into your work in MUDE, you will have made good habits for yourself that we know will serve you well in the future.  \n",
+    "\n",
+    "We focused on illustrating good coding practice in a format and language that is approachable to engineers with a varied programming background. As such, specific fundamental programming and computer science concepts are avoided. If you would like to learn more about theses concepts, specifically related to Python, the textbook by Guttag (2021) is an excellent reference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e283df9f",
+   "metadata": {},
+   "source": [
+    "## Contents\n",
+    "- [Documentation and Communication](#Documentation-and-Communication)\n",
+    "- [The Golden Rules](#The-Golden-Rules)\n",
+    "- [References and resources](#References-and-resources)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2245c370",
+   "metadata": {},
+   "source": [
+    "## Documentation and Communication\n",
+    "\n",
+    "The words in the title of this lesson were chosen very carefully. *Documentation* refers to all the work related to a problem you are working on. In MUDE, documentation is not limited to describing the purpose and function of your code only. Clearly documented code is important, but in CEG it is also critical to describe all the steps in your analysis, for example: data and data sources, assumptions, results, discussion and recommendations. Jupyter notebooks provide the ideal platform for combining documentation and analysis in one file, which is why we rely on them heavily in MUDE and the focus of this lesson. However, remember there are many other options available to you for proper documentation of your work, and the topics we cover here always apply. \n",
+    "\n",
+    "*Communication* refers to how you use your code, the analysis you run and the format by which you share it. Rarely are you running a piece of code purely for yourself, but often to share with others---perhaps for a homework assignment, to include in a thesis, a scientific publication, a design at a consultancy, et cetera. Why go through all the trouble of writing and using code if no one can understand what you did with it? It may be clear to you right now, but what if someone wants to use it in the future, or ask about your results, when you are not there? Will the audience understand your work if you have unreadable or overly complex tables and figures in a MSc thesis or conference presentation? Good communication is therefore critical, and there are many aspects, for example: visual formatting (visual appearance of code, text output, figures, tables); transparency and understadability (what did you do? is it repeatable? will someone understand it in the future, perhaps out of context?); and, for code explicitly: readability. This lesson provides a number of useful guidelines that will help your communication improve.\n",
+    "\n",
+    "In summary, the title of this document is a bit simplistic. In reality, it should be: *the Golden Rules: a non-exhaustive list of highly recommended habits to build into your life to make sure you can easily and consistently document your work in a way that ensures effective communication with others, as well as your future self.* But that doesnâ€™t roll off the tongue like our current title does. And it doesnâ€™t fit in 79 characters either ðŸ˜‰."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac1ccc55",
+   "metadata": {},
+   "source": [
+    "## The Golden Rules\n",
+    "\n",
+    "These are the Golden rules:\n",
+    "1. [Use descriptive names](#Rule-1:-Use-descriptive-names)\n",
+    "1. [Make readable code](#Rule-2:-Make-readable-code)\n",
+    "1. [Make readable results](#Rule-3:-Make-readable-results)\n",
+    "1. [Write simple functions, then use them](#Rule-4:-Write-simple-functions,-then-use-them)\n",
+    "1. [Document your code](#Rule-5:-Document-your-code)\n",
+    "1. [Test your code](#Rule-6:-Test-your-code)\n",
+    "1. [Learn to collaborate](#Rule-7:-Learn-to-collaborate)\n",
+    "\n",
+    "The sections below will provide explanations and examples of each Golden Rule, but first we need to introduce something that may seem like it belongs in a Computer Science course. However, we will see that it contains a number of useful guidelines and insight into how one can write readable Python code: the PEP-8 Style Guide."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a98880d3",
+   "metadata": {},
+   "source": [
+    "## Style Guide for Python (PEP 8)\n",
+    "\n",
+    "PEP stands for Python Enhancement Proposals, which is how the Python community describes new features and guidelines related to the development and (reccommended) use of the programming language. Although there are many [PEPs](https://peps.python.org/), the MUDE Golden Rules are in part inspired by one in particular: [PEP 8 - Style Guide for Python Code](https://peps.python.org/pep-0008/). PEP 8 is important because it directly addresses the readability of Python code and is thus directly aligned with the objectives of our Golden Rules. Unfortunately, PEP 8 can also seem boring, tedious and unimportant to engineers who are learning to use Python. The Golden Rules are created in part to help with this, so for now, we ask that you read carefully the [first](https://peps.python.org/pep-0008/#introduction) and [second](https://peps.python.org/pep-0008/#a-foolish-consistency-is-the-hobgoblin-of-little-minds) sections of PEP 8 to help understand the general motivation, then scan through PEP 8 to get an idea of the structure. You may want to use the website [pep8.org](https://pep8.org/), which uses text formatting and is (visually) much easier to read. Don't just open the website; you should devote at least 15 minutes reading the text and example code and consider how this might influence the way you write code. You don't need to read every word, but rather focus on the background, justification and examples for the sections that are most relevant to us:  \n",
+    "- Code lay-out: indentation, line length, line breaks, blank lines\n",
+    "- Whitespace\n",
+    "- Comments\n",
+    "- Naming conventions  \n",
+    "\n",
+    "After around 15 minutes of reading, you should have a good idea of the general philosophy and specific ways to change the way you write Python code. Many of these guidelines are incorporated into the Golden Rules, but sometimes PEP 8 will provide more detail or background, and most importantly: example code. Therefore, you should bookmark the website and refer back to it often.\n",
+    "\n",
+    "You may have noticed that checking some of the PEP 8 guidelines could be quite tedious, like counting 4 spaces on indentation or checking to make sure there are no spaces at the end of a line. This is just the type of thing that computers are good at! Fortunately there are a number of tools available to autocheck (and even autocorrect) Python code based on some (but not all!) of the reccommendations in PEP 8. The elements of PEP 8 that are most relevant to us are summarized here: \n",
+    "- Indentation\n",
+    "- Whitespace and blank lines\n",
+    "- Line length and line break location\n",
+    "- Import statements\n",
+    "\n",
+    "Compliance with PEP 8 is carried out by the package [`pycodestyle`](https://pycodestyle.pycqa.org/), and you can see a detailed list of what is being checked in your code by looking at the the [list of error codes](https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes). There are a few other PEP 8 guidelines that are not included in the list above or in our Golden Rules (specifically, issues in the statement and runtime groups in `pycodestyle`). If these come up as warnings or errors when reviewing your code with an autochecker you can read more about what the problem is in the PEP 8 documentation.\n",
+    "\n",
+    "The next section covers PEP 8 autocheckers, but you should **always remember: only a subset the PEP 8 guidelines can be evaluated by automatic PEP 8 checkers and fixers.** For example, a computer cannot tell you whether or not your variable name is descriptive enough, or consider what variable name length is appropriate for the specific piece of code you are writing---this is something that you must determine on your own or with your team that may be hard at first, but becomes easier with a bit of practice. To convey this perspective, we adapt the quote by Martin Fowler given above:\n",
+    "\n",
+    "\n",
+    "<div style=\"text-align: right\"><em>Any fool can use </em><code>autopep8</code><em> to comply with PEP 8 guidelines. Good engineers use PEP 8 carefully to write code that humans can understand.</em></div>  \n",
+    "<div style=\"text-align: right\"><em> â€• MUDE Golden Rules, adapted from Martin Fowler </em></div> "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a85327b9",
+   "metadata": {},
+   "source": [
+    "### Checking PEP 8 automatically with `autopep8`\n",
+    "\n",
+    "Although PEP 8 may look quite complicated and tedious at first glance, automatic PEP 8 formatters exist. The one we recommend you to use is `autopep8`, which can be applied on both `.py` files and `.ipynb` jupyter notebooks. A full explanation about the capabilities of this tool can be found at the [Python Package Index page](https://pypi.org/project/autopep8/).  \n",
+    "\n",
+    "*Bonus tip: the Python Package Index, PyPI, is the official website for Python software, where most publicly available packages can be found, and is the default source when you install packages using `pip`.*\n",
+    "\n",
+    "#### Installation (Optional, but reccommended)\n",
+    "In order to install `autopep8`, you can simply run the following `pip` command in your terminal:\n",
+    "```bash\n",
+    "pip install autopep8\n",
+    "```\n",
+    "\n",
+    "#### Usage\n",
+    "\n",
+    "You can run the following command to auto-format python files and jupyter notebooks in-place. After the command execution has finished, your files will have been autoformatted. Make sure you replace `<filename>` with the name and extension of the file you wish to auto-format.\n",
+    "```bash\n",
+    "autopep8 --in-place <filename>\n",
+    "```\n",
+    "\n",
+    "#### Additional Resources\n",
+    "\n",
+    "`autopep8` can be added as an extension to JupyterHub, which allows you to interact with the tool through a Jupyter Notebook rather than the terminal. This can be achieved with the help of a `Jupyter Nbextensions Configurator`, which you may need to install first, before setting up `autopep8` as an extension.\n",
+    "\n",
+    "Many of the IDEs such as Spyder and PyCharm contain add-ons that can be installed to automate the PEP 8 checks. Rather than providing a long list of installation instructions here, we'd encourage you to already get started with Rule X: using Google and Stack Overflow to help you identify and install a PEP 8 tool that will work for you. If you are using the JupyterHub, you can always copy and paste text into [PEP8 Online](http://pep8online.com/), as done in the first example above."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "909271f9",
+   "metadata": {},
+   "source": [
+    "## Rule 1: Use descriptive names\n",
+    "\n",
+    "*My colleague/student constantly shares code with me but the variables are acronyms and are overwritten, which makes it difficult to read and use.*\n",
+    "\n",
+    "*X1, X2, X3... oh, again X1 (overwrite). Was the previous X1 important?*\n",
+    "\n",
+    "Often when you work, you will be sharing your code with other people and you need make sure that they understand what each variable is doing and how it relates to the other variables. However, this may difficult or even impossible when the variable names do not represent the purpose they have.\n",
+    "\n",
+    "For example, variable names such as `a`, `a1`, `x`, `plot`, `plot1` may not be descriptive enough to use in your code. Additionally, `kruidnoten` may not be a good variable name for a weather time series plot.\n",
+    "\n",
+    "Coming up with descriptive names requires practice, but when in doubt don't be afraid to use longer names to be specific. It's easy to change names in a script later with find and replace. The only downside if your names are too long is that you have to add a few more line breaks (we give you a few tips for this in ***Golden Role***), however, the downside of names that are too short is that you or a colleague has to spend hours in the future interpreting, or even rewriting your code.\n",
+    "\n",
+    "However, there are some names to avoid. Namely, never use the characters `l` (lowercase letter el), `O` (uppercase letter oh), or `I` (uppercase letter eye) as single character variable names. In some fonts, these characters are indistinguishable from the numerals one and zero. When tempted to use `l`, use `L` instead. Better yet: consider a variable name that is longer than one character.\n",
+    "\n",
+    "Function names should be lowercase, with words separated by underscores as necessary to improve readability:  \n",
+    "`this_is_a_properly_formatted_function_name_its_too_long_but_very_readable`\n",
+    "\n",
+    "Variable names follow the same convention as function names. We will cover some of the other naming guidelines in later tutorials. ***For now, remember: functions and variables should be written in all lower case and underscores.*** That makes it easy to remember! For everything else, you can refer to PEP 8."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a3fbc8c5",
+   "metadata": {},
+   "source": [
+    "## Rule 2: Make readable code\n",
+    "\n",
+    "*I hate it when a piece of code is passed to me that I cannot understand on itself.*\n",
+    "\n",
+    "This rule pretty much speaks for itself, and is also the Golden Rule most closely linked to PEP 8. So rather than rewrite it, we reccommend that at the start you focus on the four aspects of PEP 8 listed here. And don't forget to read the document every once in a while to refresh your memory and see examples.  \n",
+    "- blank lines are great for making space, but don't overuse them\n",
+    "- use indentation to make your line breaks readable\n",
+    "- space around operators, especially =, +, -, and after function arguments\n",
+    "- incorporate a maximum line length\n",
+    "\n",
+    "The 79 character maximum line length reccommendation in PEP 8 is often a contentious one. It's important to limit line length because not all documentation software, word processers, text viewers, etc, know how to present long lines of code. Line breaks may introduced in a way that makes the code completely unreadable, or worse, large portions of your code may not be visible when printed. However, you will find that in practice it is very difficult to limit your lines to 79 characters, especially if you are using descriptive variable and function names (Golden Rule 1). On the other hand, once you learn a few tricks for making easy line breaks (below), it becomes much easier. So, our advice is to *definitely* incorporate a maximum line length in your code, but start with 79 and feel free to increase it doesn't work for a certain project. It's generally not a problem to increase the maximum length to 100 or even 120 characters, and autoformatters can be set up to help enforce this. Whatever you do, just make sure that your lines don't get cut off or broken in a way that makes your code unreadable. To help, we provide a few tips for adding line breaks to your code. Can you find the PEP 8 violations in this example? Try copying this code into [pep8online.com](http://pep8online.com/) to test yourself.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "63b5a341",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can always break items inside brackets wherever you like\n",
+    "my_sum = sum([1,\n",
+    "              2])\n",
+    "\n",
+    "bad_bracket_alignment = sum([1,\n",
+    "                             2\n",
+    "                            ]) \n",
+    "\n",
+    "# When you split function arguments use indentation to make sure\n",
+    "# the entire function can be easily read with breaks\n",
+    "OK = sum([1, 2, 3, 4, 5, 6, 7, 8,\n",
+    "          9, 10, 11, 12])\n",
+    "\n",
+    "not_good = sum([1, 2, 3,\n",
+    "           4, 5, 6,\n",
+    "               7, 8, 9, 10, 11, 12])\n",
+    "\n",
+    "much_better = sum([1, 2, 3, 4,\n",
+    "                   5, 6, 7, 8,\n",
+    "                   9, 10, 11, 12])\n",
+    "\n",
+    "# Line breaks with strings\n",
+    "this_is_one_string_in_two_parts = (\"This string has to be split in two. \"\n",
+    "                                   \"Notice how this line is aligned above.\\n\")\n",
+    "\n",
+    "print(this_is_one_string_in_two_parts) \n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "876df9a5",
+   "metadata": {},
+   "source": [
+    "## Rule 3: Make readable results\n",
+    "\n",
+    "*Your professors are old and their eyes are bad---why torture them with thin lines and tiny text on your figures?*\n",
+    "\n",
+    "There are courses and books devoted to this Golden Rule, not to mention professional editors and typesetters. While you should aways make sure your results are readable, there are three main categories where it is easy to make significant improvements to readability and communication: figures, tables and numbers.\n",
+    "\n",
+    "### Figures\n",
+    "\n",
+    "You've gone through all the work to write code, run analyses, collect results and present them in figures. Why not spend a little extra effort to make the figure easy for your reader or audience to interpret? You should always make sure a figure contains enough information for a viewer to understand the main variables even if taken out of context, which can be done easily by including axis labels and a title, both with units, as well as a legend. Use of high contrast colors, symbols and line types is important if a lot of curves or data types are included. Finally, depending on the importance and use consider: setting good axis limits and increments (e.g., tics every 5 instead of 7), equal scale axes, grid lines, etc. \n",
+    "\n",
+    "Our advice is to get in the habit of always adding a few simple commands to format to make it easy to repeat when it matters. Rather than take up space here with examples, keep an eye out for nicely formatted figures in MUDE notebooks.\n",
+    "\n",
+    "### Tables\n",
+    "\n",
+    "Similar to figures, make sure key information about the contents such as variable name, type and units are included. Limiting the number of significant figures to 3, adding more only when needed, can significantly improve the readability of a table. There are a number of tools available for creating tables, so we encourage you to use Google to find a few that work for your working method.\n",
+    "\n",
+    "### Significant digits  \n",
+    "\n",
+    "Did you know that computers can store decimals up to 16 digits? If you let it, Python will try to return as many of these digits as possible. When you present results of your calculation with 6 or 8 decimal values, you give a false (and at times silly) impression of precision. For example, is that wind speed *really* 14.4613587 m/s? Especially when the accuracy of the sensor is +/-10% and the wind is gusting significantly? For most engineering purposes, 3 significant digits is plenty and can be considered a default. You can always add more if you have a good reason to do so.\n",
+    "\n",
+    "Fortunately Python has a great tool to help you use an appropriate number of significant digits: `f-strings`. A few quick examples are provided below, and you can [read more about f-strings here](https://realpython.com/python-f-strings/), or any of the other good resources online."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "725b7cc3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from math import pi\n",
+    "# Easiest f-string:\n",
+    "print(f'Add f before string, use curly braces to round pi = {pi:.3f}')\n",
+    "print(f'Different formats are possible, pi = {pi:.3e}\\n')\n",
+    "\n",
+    "three_things = [1.21581651, 3.8, 3.43454]\n",
+    "\n",
+    "print('Unpack all numbers using the same format:')\n",
+    "print(*[f'{i:.3f}' for i in three_things])\n",
+    "\n",
+    "print('\\nNow list each on a separate line:')\n",
+    "print(*[f'{i:.3f}' for i in three_things], sep='\\n')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3bfcb9b1",
+   "metadata": {},
+   "source": [
+    "## Rule 4: Write simple functions, then use them\n",
+    "\n",
+    "*I hate it when students/colleagues copy-paste their code with small modifications instead of using functions or classes*\n",
+    "\n",
+    "Often in our code, it may be necessary to do the same thing multiple times. Without the use of functions, the code may become unreadable and there is also high risk of making errors. Take as an example the calculation of daily water use for a residential house as a function of time. You would also like to consider how this demand changes throughout the day depending on who is living in the house, specifically, the number and ages of the residents. based on the number and age of the residents as well as time of day. The following example illustrates how this is commonly evaluated when you don't think in advance about your code structure, or the use of functions.\n",
+    "\n",
+    "*Note that in reality each line described below may be multiple lines in the 'real' file.*\n",
+    "```\n",
+    "1 calculate water use for 1 adult resident\n",
+    "2 plot result\n",
+    "  \n",
+    "3 copy of line 1: modified to consider teenager\n",
+    "4 copy of line 1: modified to consider child\n",
+    "  \n",
+    "5 copy of line 2: plot redrawn with 3 lines\n",
+    "  \n",
+    "6 copy lines 1, 4, 5 to sum water use for several users\n",
+    "```\n",
+    "Here are a few examples of how this style of code-writing can go wrong (numbers correspond to the line in the example):  \n",
+    "- The code calculating water use as a function of time of day (1) may be wrong. Thus, the mistake will be repeated in multiple places (4, 5, 6). Extracting that code in a separate function and calling it multiple times is beneficial.  \n",
+    "- The code becomes too long. As the calculations of water use (1) and plotting functions (2) in reality span multiple lines, the code can grow significantly. Therefore, using a separate function will reduce code duplication (3, 4, 5, 6).  \n",
+    "- Functions (with descriptive names!) can increase code readability. Suppose you have water use calculations for different types of water users inside your code (e.g., industrial, commercial, residential). Making new functions for each will reduce the complexity of your code because the logic behind them will be placed somewhere else.  \n",
+    "\n",
+    "```\n",
+    "1 define function for water use with resident as input\n",
+    "2 define function to plot water use (uses function on line 1)\n",
+    " \n",
+    "3 plot water use for one resident               (uses line 2)\n",
+    "4 plot water use for multiple residents         (uses line 2)\n",
+    "5 sum water use for several users               (uses line 1)\n",
+    "```\n",
+    "\n",
+    "It is clear that lines 1 and 2 are used repeatedly, not duplicated. If we had written out these steps entirely, rather than outlining with pseudocode, the total number of lines would be significantly less because the steps to calculate water use (1) are written once. In addition, the instructions for formatting the figure can be included in the plotting function (2).\n",
+    "\n",
+    "This illustrates the concept of **modularity**: using functions to decompose the code into smaller pieces that minimize repetition and the chance of including errors. **Modules** and modular code are important general programming concepts, but also have particular significance for the Python language. This topic will be explained in a later workshop, so for now we encourage you to use functions and modularise your code. Simply taking a few moments to think about: 1) which part(s) of your analysis can be defined in a function, 2) write the functions at the top of your notebook or script (after importing your packages), then using them below, hence the name of this rule: **write simple functions, then use them.** It looks like this:\n",
+    "\n",
+    "```\n",
+    "import Python packages\n",
+    "define your new functions\n",
+    "use    your new functions\n",
+    "```\n",
+    "\n",
+    "Would you like to learn more about this? In more general programming or computer science fields, modularity is a result of applying the concepts of *decomposition* and *abstraction.* See Guttag (2021).\n",
+    "\n",
+    "*The next version of this document will also discuss hardcoding, and why it is better to pass fixed values into your function as arguments to keep the functions simple and easy to use.*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf647dda",
+   "metadata": {},
+   "source": [
+    "## Rule 5: Document your code\n",
+    "\n",
+    "*I had to hand over my code (semi-english, semi-dutch, unreadable and a complete mess) to real programmers about a year ago and I am still embarrassed by it.*\n",
+    "\n",
+    "Do you remember what you had for dinner 2 weeks ago? If the answer is **no**, then documenting your code could be useful not only for others, but also for you.\n",
+    "\n",
+    "Document what your functions and classes are doing. In addition, if you have many nested for loops or some very complex code, do not hesitate to put some in-line comments to describe what your code is doing. Overall, do not overdo it by commenting every single line, but rather try to elaborate in the way you structure your code and give names to your objects. This is why ***Golden Rule*** is closely tied to ***Rule 1: Use descriptive names***: if you do the latter, the use of the former is much less needed. Here are a few examples to illustrate unnecessary comments or poorly documented code: \n",
+    "\n",
+    "```\n",
+    "1 x = case.one + case.too #sum\n",
+    "\n",
+    "2 f = first.second.third.fourth() #gets the data\n",
+    "\n",
+    "3 engineeringConcept = 2 * thisAwesomeVariable #multiplication by 2\n",
+    "\n",
+    "4 global globalVariable ## define global variable globalVar\n",
+    "\n",
+    "5 for i in range(200): \n",
+    "      for j in range(300):\n",
+    "          for k in range(i,j):\n",
+    "              answer = (i + j)**k\n",
+    "\n",
+    "6 if a and b or c and d or not b and not c:\n",
+    "      print('here')\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c770286",
+   "metadata": {},
+   "source": [
+    "Did you notice in example 4 that the variable name in the comment didn't match the code? Clearly the variable name changed after the comment was written. To quote PEP 8: *comments that contradict the code are worse than no comments. Always make a priority of keeping the comments up-to-date when the code changes!* Relying too much on comments to document your code can easily result in a lot of extra work; use of descriptive object names can help minimize the number of comments necessary to provide sufficient documentation.\n",
+    "\n",
+    "### Comparison of bad and good documentation\n",
+    "\n",
+    "The following functions are the same, the only difference is the documentation and object names, can you tell what the function does from the bad example?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c0d4b841",
+   "metadata": {},
+   "source": [
+    "#### Example with bad documentation "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e57f2a3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def HF(h,p):\n",
+    "    return 0.5*p*9.81*h**2\n",
+    "    \n",
+    "print('HF(10,1) = ',HP(10,1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e93a3cd7",
+   "metadata": {},
+   "source": [
+    "#### Example with good documentation "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fda75b0f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def force_on_wall(wall_height, density_water):\n",
+    "    '''Calculate hydrostatic force on a vertical wall.'''\n",
+    "    return 0.5*density_water*9.81*wall_height**2\n",
+    "    \n",
+    "print(f'force_on_wall(wall_height=10, density_water=1) = {force_on_wall(10,1):.1f} N')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b062d7e3",
+   "metadata": {},
+   "source": [
+    "It seems pretty obvious that the second example is more clear, and even if you understood what was happening in the first function, the second one probably would have brought you to the same conclusion much faster. And it didn't take much longer to write the second function.\n",
+    "\n",
+    "The example above includes a string immediately after the function definition line (`def`), except it uses three quote symbols. This is a ***documentation string***, also known as  a \"***docstring***\" a key tool for documenting your code. It actually has it's own PEP guideline ([PEP 257]() if  you are curious), but we will cover this more during a future week in MUDE. ***For now, our advice is to get in the habit of at least using a one-line docstring whenever you create function.***\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "69434da9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def my_well_documented_function():\n",
+    "    \"\"\"Illustrate a simple one-line docstring---that's it!\"\"\"\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9cc60029",
+   "metadata": {},
+   "source": [
+    "You should use the imperative verb tense, a command, for example: return, integrate or compute. This helps keep the docstring compact. Compare the following one--liners with the non-imperative equivalents:  \n",
+    "- `Compute area of circle`  \n",
+    "- `Computes the area of a circle`  \n",
+    "\n",
+    "And:  \n",
+    "- `Return int number_of_PSOR_coins`  \n",
+    "- `Returns the number of PSOR coins as an integer`  \n",
+    "\n",
+    "If told us that it seems silly to write a one-line docstring for such a simple function, especially with such descriptive object names, we'd actually agree with you, especially if you were working in a small group of CEG colleagues, or by yourself. However, there is another benefit of adding a docstring: Python automatically knows to include the first line of a docsting in its built-in help functionality. Try it out here by adding a `?` after the function name (this is a handy trick that works in Jupyter notebooks)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65dbd634",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_well_documented_function?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44256b0b",
+   "metadata": {},
+   "source": [
+    "You can also use the `help()` function:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1a76cfc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "help(my_well_documented_function)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e95e7e91",
+   "metadata": {},
+   "source": [
+    "## Rule 6: Test your code\n",
+    "\n",
+    "*A team member once sent me a script that predicts X for the next 5 weeks. Unfortunately, the script gave me the exact same predictions every time I used it.*\n",
+    "\n",
+    "Testing that your code works properly is very important to mitigate such problems. Testing can be done in many ways, but the most simple of all is by manually running your code with different values and verifying that the final result is indeed correct.\n",
+    "\n",
+    "In one of the next workshops, we are going to teach you how to debug and test your code, however, for now the recommendation for you would be to test if your code is doing its job correctly by trying different variable values.\n",
+    "\n",
+    "For example, suppose you have a method that calculates the volume of a pyramid given its height and base width and length. Testing the method can be done by trying different values for each of the 3 inputs and manually checking if the final volume matches your hand calculations.  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7428c43d",
+   "metadata": {},
+   "source": [
+    "## Rule 7: Learn to collaborate\n",
+    "\n",
+    "*Let me just make a new zip file of the 200MB project and send it to you via Google Drive...*\n",
+    "\n",
+    "Exchanging project archives and lack of version control are key problems in project collaborations. First, the changes made are not explicitly visible, unless you go over the entire project and compare new and old versions. Second, such exchanges of files is dangerous because one may confuse which is the final version. This is especially common with file names, for example, \"Document-1\", \"Document-2\", \"Document-2-Draft\", \"Document-2-DraftFinal\", \"Document-2-DraftFinal2\", et cetera. Third, it hardly allows working in parallel as merging work requires manual copy-pasting of each member's progress.\n",
+    "\n",
+    "In one of the coming coding workshops, we will cover version control systems, namely collaborating using `git` and GitLab. Although it may seem complicated at first, we are confident that once you learn about the capabilities of these systems, you will not regret it. ðŸ˜‰"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8658baf",
+   "metadata": {},
+   "source": [
+    "## References and resources\n",
+    "\n",
+    "Guttag, J. V. (2021). *Introduction to Computation and Programming Using Python: With Application to Computational Modeling and Understanding Data.* MIT Press.\n",
+    "\n",
+    "PEP 8 - Style Guide for Python code - https://peps.python.org/pep-0008/\n",
+    "\n",
+    "Python 3's f-Strings: An Improved String Formatting Syntax (Guide) - https://realpython.com/python-f-strings/"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "b3d2c42f8ea709076bc5b173c4ba6d021b4bbbddbef7af6142452ecb0f2070bf"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/book/programming/overview.md b/book/programming/overview.md
new file mode 100644
index 0000000000000000000000000000000000000000..1558d33ef448f516add608a75facbcfac73ffaaa
--- /dev/null
+++ b/book/programming/overview.md
@@ -0,0 +1,36 @@
+# Programming Overview
+
+Riccardo ideas. For now, when in doubt add content to the JB. Can move text to website later as needed.
+1. theory + best practices with examples for efficient numerical practices
+2. theory + best practices with examples of for loops (and not for loops)
+3. Visualization:
+   1. how to reduce the size of images (matplotlib plots) when large datasets are used (can use SHIP project as a case study)
+   2. advanced figure creation: define axes as objects and modifying attribues (i have some recent examples from another TA if needed)
+   3. saving a figure (vector/raster; file types; optimize file size; automatically regenerate it; add a test?)
+4. Memory considerations: how can you tell whether or not your code is working efficiently? (e.g., loading data, copies of objects, recopying or reloading on every nb run)
+5. Run time:
+   1. may need a bit of theory about computations, processor speeds, cores, comparison of disk, RAM, CPU/GPU etc, but doesn't need to be long, and perhaps we can find existing content with CC license to include directly (embedded video, copy content, direct link all ok...this applies to all generic programming and coding topics)
+   2. strategies for evaluating code efficiency and identifying slowest parts to improve: in a notebook and in an IDE (VS Code).
+   3. saving results of analyses---would this be pickle? ideally we should teach them how to create a JSON and binary (pickle) format, then they choose whats best (for example, the JSON is readable and may be useful to share with a colleague who cannot run nb's in combination with a PDF printout of the nb). This is a great example where we should have content in the book that then is referred to as needed when students run into issues in submitting reports (i.e., their nb is 10 MB, now what?)
+6. IDE's: a brief overview, then a recommendation and case study with Jupyter Lab and VS Code
+7. package management: we need some theory on this, but it should be concise. Since Robert is already working with the TA's about environments, maybe it's best to leave this one alone till the end
+8. Some handy tricks (we can make a special page on the website to quickly illustrate things described above, and provide links to the theory, explanation and case studies in the textbook)
+
+
+
+Loosely broken down into 4 Chapters, with some ideas for possible contents (will fine tune location as we create it!):
+
+1. Golden Rules (part of Communication, but introduced at beginning to set the stage)
+2. Computers: essentials concepts for Scientific Computation and Numerical Analysis 
+   1. Machine code, compilers, low- and high-level languages; Bits and bytes, storage, memory, precision; File types (e.g., txt, csv, ipynb, py, md; text and binary); 
+Data types (e.g., csv, JSON, netcdf, etc); FLOPS, run time; Complexity  
+3. Code: Instructions. Executed. 
+   1. Programming language (Python), Environments and package management, Fundamentals of programming, Executing code, Other languages 
+4. Programs: essentials for scientific computing 
+   1. Modular programming; Object-oriented programming, Version control, Debugging and error handling 
+5. Communication (and documentation)
+   1. Golden Rules, Visualization, Version control, Sharing your work (Static (*.ipynb, *.pdf, *.html) and Dynamic/Interactive (something with a Python kernel), Open-Source Projects / Collaboration 
+   2. some of this may be moved to the website; most will be short, except git
+
+
+Software is also considered a programming category, but that will be covered exclusively on the website.
\ No newline at end of file
diff --git a/book/programming/programs/overview.md b/book/programming/programs/overview.md
new file mode 100644
index 0000000000000000000000000000000000000000..364140a33a1512e67774c2755030e13d03d09a37
--- /dev/null
+++ b/book/programming/programs/overview.md
@@ -0,0 +1,16 @@
+# Programs 
+
+text. Can briefly mention programming paradigms and that we focus on object-oriented and functional. FORTRAN was old standard in engineering, which is declarative. 
+
+Things that will definitely go here:
+- Objects
+  - theory: what it is, why it's useful
+  - Case studies: interactive pages that focus on variables, functions, figures as objects
+- Modular programming workshop
+  - importing py files
+  - connection to how packages function (explore with interactive features)
+- Writing efficient programs for numerical analysis
+  - add theory missing from Code chapter
+  - explicit case studies illustrating best practices
+  - examples that quantitatively show the effect of bad practices
+  - e.g., sparse matrices, for loops and why they should be avoided
\ No newline at end of file
diff --git a/book/sandbox/SanPart/ObservationTheory/01_Introduction.md b/book/sandbox/SanPart/ObservationTheory/01_Introduction.md
index 67d40762b934b726d05d9df3cc331f94b0362b81..0d99fb301a2f2cf255061a25cce7c9c177f88b99 100644
--- a/book/sandbox/SanPart/ObservationTheory/01_Introduction.md
+++ b/book/sandbox/SanPart/ObservationTheory/01_Introduction.md
@@ -2,7 +2,7 @@
 
 As in other physical sciences, empirical data are used in Civil and Environmental Engineering and Applied Earth Sciences to make inferences so as to describe physical reality. Many such problems involve the determination of unknown model parameters that bear a functional relationship to the set of observed data or measurements. This determination or parameter estimation can be based on different estimation principles. 
 
-From experience we know that various uncertain phenomena can be modeled as a random variable (or a random vector), say $Y$. Examples are: the uncertainty in instrument readings due to measurement errors; the variable strength of material of a certain type,  the variable lifetime of pieces of equipment of a particular type after they have been put into service; the number of "hits" in a certain time interval on a particular Web page; the variable size, weight, or contents of products of a particular type delivered by a production line, etcetera. We will refer to these random variables (our iput data) $Y$ as the *observables*.
+From experience we know that various uncertain phenomena can be modeled as a random variable (or a random vector), say $Y$. Examples are: the uncertainty in instrument readings due to random measurement errors; the variable strength of material of a certain type,  the variable lifetime of pieces of equipment of a particular type after they have been put into service; the number of "hits" in a certain time interval on a particular Web page; the variable size, weight, or contents of products of a particular type delivered by a production line, etcetera. We will refer to these random variables (our iput data) $Y$ as the *observables*.
 
 The unknown model parameters will be denoted as $\mathrm{x}$. The ingredients for this part on sensing and observation theory can then be summarized as follows:
 * a model to describe relation between observables $Y$ and parameters of interest $\mathrm{x}$
@@ -16,28 +16,37 @@ The *functional model* describes the functional relationship between the observe
 In case the functional relationship is linear, the functional model will have the form:
 
 $$
-\mathbb{E}(Y) = \mathrm{Ax}
+\mathbb{E}(Y) = \mathrm{Ax}\quad \text{or} \quad Y=\mathrm{Ax}+\epsilon
 $$
 
-where $\mathrm{x}$ is a vector with the unknown model parameters, and $\mathrm{A}$ is referred to as the *design matrix*. $\mathbb{E}(.)$ is the expectation operator.
+where $\mathrm{x}$ is a vector with the unknown model parameters, and $\mathrm{A}$ is referred to as the *design matrix*. $\mathbb{E}(.)$ is the expectation operator. The random errors are given by $\epsilon$ and are assumed to have zero mean: $\mathbb{E}(\epsilon)=0$.
 ```
 
-Some examples will be shown in [the next subsection](01_funcmodel)
+MMM
+
+EXERCISE : show that the two expressions in the Definition above are indeed the same.
+
+Some examples of functional models will be shown in [the next subsection](01_funcmodel)
 
 Next, we will develop the principles and underlying theory of (1) Least-squares estimation, (2) Best linear unbiased estimation, and (3) Maximum likelihood estimation. 
 
 Special attention is then given to how the uncertainty in the measurements propagates into parameter estimation results, such that we can assess the precision of the estimated parameters. For that purpose we need the stochastic model.
 
 ```{admonition} Definition
-The *stochastic model* describes the uncertainty of the observables in the form of the covariance matrix $\Sigma_Y$.
+The *stochastic model* describes the uncertainty of the observables in the form of the covariance matrix $\Sigma_Y=\Sigma_{\epsilon}$.
 ```
 
-Parameter estimation requires specification of the underlying functional and stochastic models. It may happen, however, that some parts of the models are misspecified, thereby invalidating the results of estimation. Some measurements, for instance, may be corrupted by blunders, or the chosen model my fail to give an adequate description of physical reality. 
+This covariance matrix is assumed to be known here. In practice, it can be determined based on a calibration campaign: taking repeated measurements and calculate the empirical (co-)variances. Note that the uncertainty in the observations is fully attributed to the random errors, therefore we have that the covariance matrix of the observables is equal to that of the random errors.
+
+Parameter estimation requires specification of the underlying functional and stochastic models. It may happen, however, that some parts of the models are misspecified, thereby invalidating the results of estimation. Some measurements, for instance, may be corrupted by blunders, or the chosen model my fail to give an adequate description of physical reality. Testing for such misspecifications is the topic of the last section of this chapter.
 
 (01_funcmodel)=
-## Functional model: examples
+## Functional and stochastic model: examples
+MMM
 Relate to 'modelling' in MUDE.
 
+introduce redundancy
+
 
 ## Estimation and linear regression
 
diff --git a/book/sandbox/SanPart/ObservationTheory/02_LeastSquares.md b/book/sandbox/SanPart/ObservationTheory/02_LeastSquares.md
index 5e144ce36cc0e96f97b1a5a4cb5c2c84ec387a78..c21660d273adc0369186f2e9053a9f22f010d41b 100644
--- a/book/sandbox/SanPart/ObservationTheory/02_LeastSquares.md
+++ b/book/sandbox/SanPart/ObservationTheory/02_LeastSquares.md
@@ -3,17 +3,19 @@
 ## Functional model: dealing with inconsistency
 Given a set of observations which contain noise, and a model that is assumed to explain these data, the goal is to estimate the unknown parameters of that model. The least-squares principle can be used to solve this task. 
 
-Mathematically, we use the â€˜system of observation equationsâ€™, with the vector of observations $\mathrm{y}$, the vector of unknowns $\mathrm{x}$, and the design matrix $\mathrm{A}$. In a simple form, we can write this system of observation equations as
+Mathematically, we use the â€˜system of observation equationsâ€™, with the $m$-vector of observations $\mathrm{y}$, the $n$-vector of unknowns $\mathrm{x}$, and the $m \times n$ design matrix $\mathrm{A}$. In a simple form, we can write this system of observation equations as
 
 $$
 \mathrm{y = Ax}
 $$
 
-If all the observations would fit perfectly to this model, this system is called [consistent](PM_consistent). This is only possible if the number of observations is equal to (or smaller than) the number of unknowns.
+If all the observations would fit perfectly to this model, this system is called [consistent](PM_consistent). This is only possible if the number of observations $m$ is equal to (or smaller than) the number of unknowns $n$, i.e. if the system is [determined](determined).
 
 If the number of observations is greater than the number of unknowns (and the design matrix $\mathrm{A}$ is of full column rank), it is very unlikely that the system is consistent. Physical reality would need to be perfectly described by the conceptual mathematical model. It is evident that in real life this is never the case, since (i) our observations are always contaminated by some form of noise, and (ii) physical reality is often more complex than can be described with a simplified mathematical model. 
 
-Thus, in the case in which there are more observations than unknowns (and design matrix $\mathrm{A}$ is of full column rank) the $\mathrm{y=Ax}$ system of equations has no solution. In other words; every 'solution' would be wrong, since the model would not 'fit' the data.
+Thus, in the case in which there are more observations than unknowns (and design matrix $\mathrm{A}$ is of full column rank) the $\mathrm{y=Ax}$ system of equations has no solution, this is referred to as an [overdetermined system](determined). In other words; every 'solution' would be wrong, since the model would not perfectly 'fit' the data.
+
+The *redundancy* of the system is equal to $m-n$, i.e., the 'additional' number of observations compared to the minimum required to solve the system of equations.
 
 ### Example: linear trend model
 Assume we have $m$ observations and we try to fit the linear trend model:
@@ -71,7 +73,7 @@ $$
 \mathrm{\hat{x}} =\arg \underset{\mathrm{x}}{\min} {\mathrm{(y-Ax)^T (y-Ax)}}.
 $$
 
- In other words, we find $\mathrm{\hat{x}}$ by finding the minimum of $\mathrm{(y-Ax)^T (y-Ax)}$.
+In other words, we find $\mathrm{\hat{x}}$ by finding the minimum of $\mathrm{(y-Ax)^T (y-Ax)}$.
 
 ## Least-squares solution
 We can find the minimum of a function by taking the first derivative with respect to the 'free variable'. Since the observation vector is not free, and also the design matrix $A$ is fixed, the only variable which we can vary is $x$. The first derivative of our objective function should be equal to zero to reach a minimum (see [Gradient](PM_gradient)):
diff --git a/book/sandbox/SanPart/ObservationTheory/03_WeightedLSQ.md b/book/sandbox/SanPart/ObservationTheory/03_WeightedLSQ.md
index af66d98d213d75b95686c8686b04022673506031..64ec8c7d828a489ef822ebff34cfaa167d38fc74 100644
--- a/book/sandbox/SanPart/ObservationTheory/03_WeightedLSQ.md
+++ b/book/sandbox/SanPart/ObservationTheory/03_WeightedLSQ.md
@@ -1,25 +1,35 @@
+(03_wls)=
 # Weighted least-squares estimation
 In ordinary least-squares estimation, we assume that all observations are equally important. In many cases this is not realistic, as observations may be obtained by different measurement systems, or under different circumstances. We want our methodology for least-squares estimation to be able to take this into account.
 
-We achieve this goal by introducing a weight matrix in the normal equation. In the unweighted least-squares approach, we arrive at the normal equation by pre-multiplying both sides of $\mathrm{y=Ax}$ with the transpose of the design matrix $\mathrm{A^T}$:
+We achieve this goal by introducing a weight matrix in the minimization problem:
+
+$$
+\underset{\mathrm{x}}{\min} {\mathrm{(y-Ax)^T W(y-Ax)}}
+$$
+
+In the unweighted least-squares approach, we arrive at the normal equation by pre-multiplying both sides of $\mathrm{y=Ax}$ with the transpose of the design matrix $\mathrm{A^T}$:
 
 $$ 
 \mathrm{y=Ax} \; \rightarrow \; \mathrm{A^T\;  y = A^T\; A x }
 $$
 
-In the weighted least-squares approach, we add weight matrix $W$ to this pre-multiplication factor, i.e., $ \mathrm{A^T W}$, to obtain the normal equation:
+In the weighted least-squares approach, we now need to add weight matrix $W$ to this pre-multiplication factor, i.e., $ \mathrm{A^T W}$, to obtain the normal equation:
 
 $$ 
 \mathrm{y=Ax} \; \rightarrow \; \mathrm{A^T W \; y = A^TW \; A x}
 $$
 
-The normal matrix is now defined as $\mathrm{N=A^T W A}$. From this, assuming that the normal matrix $N$ is invertible (non-singular) we find the weighted least-squares estimate $ \hat{x} $, 
+The normal matrix is now defined as $\mathrm{N=A^T W A}$. From this, assuming that the normal matrix $N$ is invertible (non-singular) we find the weighted least-squares estimate $ \mathrm{\hat{x}} $, 
 
 $$ 
-\mathrm{\hat{x} = (A^T W A)^{-1} A^T W y}
+\begin{align*}
+\mathrm{\hat{x}} &= \mathrm{(A^T W A)^{-1} A^T W y} \\
+                &= \arg \underset{\mathrm{x}}{\min} {\mathrm{(y-Ax)^T W(y-Ax)}}
+\end{align*}
 $$
 
-We also find the derived estimate $ \mathrm{\hat{y}} $ and $ \mathrm{\hat{e}} $:
+We also find the derived estimate $ \mathrm{\hat{y}} $ and $ \mathrm{\hat{\epsilon}} $:
 
 $$
 \mathrm{\hat{y} = A \hat{x}  = A (A^T W A )^{-1} A^T W y}
diff --git a/book/sandbox/SanPart/ObservationTheory/05_Precision.md b/book/sandbox/SanPart/ObservationTheory/05_Precision.md
index 4d62003c0d539eef710a6481a35313cf87bc71b8..14ba165f9682345191a2c8f1cd2ac07deb9f1491 100644
--- a/book/sandbox/SanPart/ObservationTheory/05_Precision.md
+++ b/book/sandbox/SanPart/ObservationTheory/05_Precision.md
@@ -1,3 +1,4 @@
+(05_precision)=
 # Precision and confidence intervals
 When evaluating and reporting estimation results, it is important to give a quality assessment. Thereby we should distinguish between:
 * the precision of the estimated parameters (topic of this part)
@@ -126,4 +127,14 @@ name: CI_model
 Confidence interval of fitted model, including error bars for the individual adjusted observations $\hat{y}_i$.
 ```
 
-Note that covariance matrices can be computed without the need for actual observations. Hence, we can also set up a functional model including future epochs. Let's call the corresponding design matrix $\mathrm{A_p}$, then $\Sigma_{\hat{Y}_p}= \mathrm{A_p}\Sigma_{\hat{X}} \mathrm{A_p^T}$. Note that the $\Sigma_{\hat{X}}$ is based on the original model, since the fitted model is only based on the actual observations. In this way, we can visualize the uncertainty of extrapolated values based on the fitted model.
\ No newline at end of file
+Note that covariance matrices can be computed without the need for actual observations. Hence, we can also set up a functional model including future epochs. Let's call the corresponding design matrix $\mathrm{A_p}$, then $\Sigma_{\hat{Y}_p}= \mathrm{A_p}\Sigma_{\hat{X}} \mathrm{A_p^T}$. Note that the $\Sigma_{\hat{X}}$ is based on the original model, since the fitted model is only based on the actual observations. In this way, we can visualize the uncertainty of extrapolated values based on the fitted model.
+
+## Factors influencing the precision
+As can be seen from the expression $\Sigma_{\hat{X}}=(\mathrm{A^T} \Sigma_Y^{-1} \mathrm{A})^{-1}$, the precision of the estimates depends on the:
+* design matrix $\mathrm{A}$: proper design of the measurement set-up can improve the precision
+* covariance matrix $\Sigma_Y$: more precise observations will result in more precise estimates
+
+Not immediately clear from the expression is that also the redundancy is an important factor: more observations will improve the precision, due to the averaging of the random errors (if $m$ large enough, the mean error will go to zero).
+
+MMM
+NOTEBOOK to show this.
\ No newline at end of file
diff --git a/book/sandbox/SanPart/ObservationTheory/06_MLE.md b/book/sandbox/SanPart/ObservationTheory/06_MLE.md
index 3ebed9846af9c67b0de7371de9341e569fbf2ebb..76da59fff6d23c8c0b930ee5dc4c9964ef748d17 100644
--- a/book/sandbox/SanPart/ObservationTheory/06_MLE.md
+++ b/book/sandbox/SanPart/ObservationTheory/06_MLE.md
@@ -8,25 +8,25 @@ $$
 in order to estimate the unknown parameters $\mathrm{x}$. However, the solution $\hat{X}$ does not depend on the underlying distribution of $Y$.
 
 ## Maximum likelihood principle
-The principle of Maximum Likelihood estimation (MLE) is to find the *most likely* $\mathrm{x}$ given a realization $\mathrm{x}$ of the $Y$.
+The principle of Maximum Likelihood estimation (MLE) is to find the *most likely* $\mathrm{x}$ given a realization $\mathrm{y}$ of the $Y$.
 
 This boils down to estimating the unknown parameters $\alpha$ of the underlying distribution, which means that the probability density function (PDF) is known apart from the $n$ parameters in $\alpha$. We will now distinguish between a PDF and likelihood function.
 
-A *probability density function* $f_Y(\mathrm{y}|\alpha)$ with $\alpha$ known as function of $\mathrm{y}$.
+A *probability density function* $f_Y(\mathrm{y}|\alpha)$ is given as function of $\mathrm{y}$ and with $\alpha$ known.
 
-A *likelihood function* $f_Y(\mathrm{y}|\alpha)$ for a given realization $\mathrm{y}$ as function of all possible values of $\alpha$.
+A *likelihood function* $f_Y(\mathrm{y}|\alpha)$ is given for a certain realization $\mathrm{y}$ as function of all possible values of $\alpha$.
 
-The goal is to find the $\alpha$ which maximizes the likelihood function for the given realization $\mathrm{y}$.
+Wiht MLE, the goal is to find the $\alpha$ which maximizes the likelihood function for the given realization $\mathrm{y}$.
 
 ### Example exponential distribution:
 
-If $Y\sim \text{Exp}(\lambda)$, the goal would be to estimate $\lambda$ based a realization $\mathrm{y_{obs}}$ of $Y$. The {numref}`MLEexp` shows the PDF of $Y$
+If $Y\sim \text{Exp}(\lambda)$, the goal would be to estimate $\lambda$ based on a realization $\mathrm{y_{obs}}$ of $Y$. The {numref}`MLEexp` shows the PDF of $Y$
 
 $$
 f_Y(\mathrm{y}|\lambda)=\lambda \exp(-\lambda \mathrm{y})
 $$
 
-for different possible values of $\lambda$. The likelihood function for the given $\mathrm{y_{obs}}$ is shown on the right-hand side. It is shown that for instance the likelihood value for $\lambda_1$ is equal to the corresponding density value in the left panel. The maximum likelihood estimate $\hat{\lambda}$ is the value for which the likelihood function is maximized as shown in the figure.
+for different possible values of $\lambda$. The likelihood function for the given $\mathrm{y_{obs}}$ is shown on the right-hand side. It is shown that for instance the likelihood value for $\lambda_1$ is equal to the corresponding density value in the left panel. The maximum likelihood estimate $\hat{\lambda}$ is the value for which the likelihood function is maximized, in this case $\hat{\lambda}=\lambda_2$, as shown in the figure.
 
 ```{figure} ../figures/ObservationTheory/06_MLEexp.png
 ---
@@ -84,7 +84,7 @@ You will not be asked to derive the MLE solution as above.
 If we now replace the realization $\mathrm{y}$ by the random observable vector $Y$ we obtain the *Maximum Likelihood estimator* of $\mathrm{x}$:
 
 $$
-\hat{X}= (\mathrm{A^T} \Sigma_Y^{-1} \mathrm{A})^{-1} \mathrm{A^T} \Sigma_Y^{-1} \mathrm{Y}
+\hat{X}= (\mathrm{A^T} \Sigma_Y^{-1} \mathrm{A})^{-1} \mathrm{A^T} \Sigma_Y^{-1} Y
 $$
 
 It follows that the Maximum Likelihood estimator of $\mathrm{x}$ is identical to the BLUE **if** $Y$ is normally distributed.
diff --git a/book/sandbox/SanPart/ObservationTheory/08_Testing.md b/book/sandbox/SanPart/ObservationTheory/08_Testing.md
index effca3093683902d31b261d5a900fa999c0cb545..679ba91069cabe8349740ee7afb06dbe41cf1a44 100644
--- a/book/sandbox/SanPart/ObservationTheory/08_Testing.md
+++ b/book/sandbox/SanPart/ObservationTheory/08_Testing.md
@@ -1,5 +1,106 @@
 (08_testing)=
 # Model testing
-How well does the model fit the data?
+In Chapter [Precision and confidence intervals](05_precision) one part of the quality assessment was presented. But evaluating the precision does not tell us how well the model fits the data we collected. It might be that we worked with a wrong functional model (e.g., too simplistic), or a wrong stochastic model. Or it may be that our observations are affected by blunders or systematic biases. 
 
-First idea based on plotting observations, fitted model and confidence intervals...
\ No newline at end of file
+In some cases, a first indication that something may be wrong can be obtained by plotting the observations and the fitted model together with the confidence intervals. If the residuals are large compared to the confidence interval, SEE EXAMPLE, this is obviously an indication that something is wrong. However, this is less straightforward for models with more than two unknown parameters. Moreover, the question remains: is the fit good enough or not? And if not, what is the underlying misspecification/error?
+
+To answer these questions, we need to apply statistical hypothesis testing. In this chapter, we will introduce the hypothesis testing principle, and then explain how it can be applied in sensing and monitoring to:
+* test for blunders or systematic biases in the observations;
+* test for misspecifications of the functional model and/or decide between two competing hypotheses regarding the functional model.
+
+## Statistical hypothesis testing: principle
+Statistical hypothesis testing means that we apply a certain test to decide between two (or more) competing hypothesis regarding the underlying model (in our case the functional or stochastic model). Therefore we assume a nominal model, corresponding to the null hypothesis $\mathcal{H}_0$, and another model for the alternative hypothesis $\mathcal{H}_a$.
+
+An important note to be made here is that the null hypothesis is presumed to be true unless the data provides convincing evidence against it (i.e., there is significant misfit).
+
+> This is similar as in the court house: a defendant is presumed to be innocent until proven guilty based on convincing evidence.
+
+### Example laser distometer
+We would like to test whether a laser distometer is well-calibrated, implying that the mean error of repeated measurements should be zero (null hypothesis). An alternative hypothesis in this case could be that all measurements are affected by a certain constant offset (i.e., systematic bias). We can also write this as:
+
+$$
+\begin{align*}
+\mathcal{H}_0: \mathbb{E}(\epsilon) &= 0 \\
+\mathcal{H}_a: \mathbb{E}(\epsilon) &= \nabla
+\end{align*}
+$$
+
+***
+
+The next step is then to define a so-called *test statistic* $T$ with a known distribution. Based on the actual observations, a realization of this test statistic can be computed and it can be assessed whether this realization is a likely outcome given the distribution. This can be illustrated based on the previous example.
+
+### Example laser distometer (continued)
+Assuming that the random errors of the observed distances with the laser distometer are normally distributed with zero mean and standard deviation $\sigma$, a logical choice for the test statistic would be simply to use the mean error. To test whether the laser distometer is well-calibrated we measure a known distance $d$ $m$ times, such that we can calculate the errors $\epsilon_i = y_i - d$ ($i=1,\ldots,m$).
+
+We thus have for the null and alternative hypothesis:
+
+$$
+\begin{align*}
+\mathcal{H}_0: T= \frac{1}{m}\sum_{i=1}^{m} \epsilon_i \sim N(0,\sigma^2) \\
+\mathcal{H}_a: T= \frac{1}{m}\sum_{i=1}^{m} \epsilon_i \sim N(\nabla ,\sigma^2) 
+\end{align*}
+$$
+
+{numref}`H0Ha` shows the PDFs of the test statistic under the null and alternative hypothesis. Based on the distributions, the null hypothesis is obviously more likely in case the observed mean error (realization of $T$) is smaller than $k$. Therefore the null hypothesis would be accepted if $T \leq k$ and rejected otherwise. The corresponding *acceptance* and *critical region* are also shown.
+
+```{figure} ../figures/ObservationTheory/08_H0Ha.png
+---
+height: 250px
+name: H0Ha
+---
+PDFs of test statistic $T$ under null and alternative hypothesis, with threshold value $k$.
+```
+***
+
+Even though the choice for the threshold value in the example above may seem logical, it does not account for the requirement that there should be *convincing evidence* against the null hypothesis being true. Even though the probability $P(T>k | \mathcal{H}_0)$ is small, it is not zero! A better approach would therefore be to determine the threshold value based on the choice of an acceptable *false alarm probability* $\alpha$, since a false alarm (also referred to as type I error) corresponds to rejecting the null hypothesis while it is true. 
+
+In the example, the bias was assumed to be positive, such that there is a right-side critical region, but in practice the bias may also be negative (left-side critical region), or we may have a two-sided critical region.
+
+```{admonition} Definition
+The false alarm or type I error probability $\alpha$ is defined as:
+
+$$
+\alpha = P(T \in \mathcal{C}|\mathcal{H}_0)
+$$
+where $\mathcal{C}$ is the critical region.
+```
+
+See {numref}`H0Ha_alpha`, where this principle has been applied for the same example as above. Given that under the null hypothesis $T\sim N(0,\sigma^2)$, we can easily determine the threshold value $k_{\alpha}$ for a given $\alpha$ using the inverse CDF. The critical region $\mathcal{C}$ is thus given by $T>k_{\alpha}$.
+
+```{figure} ../figures/ObservationTheory/08_H0Ha_2.png
+---
+height: 250px
+name: H0Ha_alpha
+---
+PDFs of test statistic $T$ under null and alternative hypothesis, with false alarm probability $\alpha$ and threshold value $k_{\alpha}$.
+```
+
+Besides the probability of a false alarm, the figure also shows the probability of *false acceptance* of $\mathcal{H}_0$, referred to as the type II error probability or missed detection probability. In this case, the alternative hypothesis is true, but still the null hypothesis is accepted.
+
+(betagamma)=
+```{admonition} Definitions
+The missed detecion or type II error probability $\beta$ is defined as:
+
+$$
+\beta= P(T\in \mathcal{C}|\mathcal{H}_a)
+$$
+where $\mathcal{C}$ is the critical region.
+
+The detection power or probability of detection is:
+
+$$
+\gamma = 1-\beta
+$$
+```
+
+Ideally, both the type I and type II error probabilities should be small. However, as can be seen from the figure, choosing a smaller value for $\alpha$ implies a larger $\beta$ and vice versa.
+
+The *decision matrix* in {numref}`decision` summarizes the four types of decisions and corresponding probabilities.
+
+```{figure} ../figures/ObservationTheory/08_decision.png
+---
+height: 150px
+name: decision
+---
+Decision matrix for hypothesis testing.
+```
\ No newline at end of file
diff --git a/book/sandbox/SanPart/ObservationTheory/09_TestingSandM.md b/book/sandbox/SanPart/ObservationTheory/09_TestingSandM.md
new file mode 100644
index 0000000000000000000000000000000000000000..b5a2832ae8a12c5cd470e8cc03baaddc8a85c2dd
--- /dev/null
+++ b/book/sandbox/SanPart/ObservationTheory/09_TestingSandM.md
@@ -0,0 +1,154 @@
+# Hypothesis testing for Sensing and Monitoring
+Statistical hypothesis testing is applied for many applications and in different forms. Here, we will restrict ourselves to applications in observation theory with the following general form for the hypotheses:
+
+$$
+\begin{align*}
+\mathcal{H}_0: \mathbb{E}(Y) &= \mathrm{Ax}; \; &\mathbb{D}(Y)=\Sigma_Y \\
+\mathcal{H}_a: \mathbb{E}(Y) &= \mathrm{Ax + C\nabla} = \begin{bmatrix}\mathrm{A} & \mathrm{C}\end{bmatrix}\begin{bmatrix}\mathrm{x} \\ \nabla\end{bmatrix};\; &\mathbb{D}(Y)=\Sigma_Y
+\end{align*}
+$$
+
+where $\nabla$ is a $q$-vector with extra parameters compared to the nominal model under $\mathcal{H}_0$, and $\mathrm{C}$ is the $m\times q$ matrix augmenting the design matrix.
+
+### Examples 
+Example for outlier
+Example for linear trend versus second-order polynomial
+
+***
+
+## Testing procedure
+
+### Step 1 is to apply BLUE for both the null and alternative hypothesis
+
+Under $\mathcal{H}_0$ we obtain the estimator of $\mathrm{x}$ and the residuals as:
+
+$$
+\begin{align*}
+\hat{X}&=(\mathrm{A}^T\Sigma_Y^{-1}\mathrm{A})^{-1}\mathrm{A}^T\Sigma_Y^{-1}Y \\
+\hat{\epsilon} &= Y - \mathrm{A}\hat{X}
+\end{align*}
+$$
+
+The residuals are important measure for the misfit between individual observations and the fitted model. Recall that BLUE provides us with the solution for $\mathrm{x}$ which minimizes the weighted squared norm of residuals $\hat{\epsilon}^T\Sigma_Y^{-1}\hat{\epsilon}$ (note that $\Sigma_Y=\Sigma_{\epsilon}$), see [Weighted least-squares estimation](03_wls), and it makes sense that this is a good measure for the discrepancy between data and model. This is especially clear if we consider (the common) case that $\Sigma_Y$ is a diagonal matrix with the variances $\sigma_i$ ($i=1,\ldots,m$) of the random errors on the diagonal, since then:
+
+$$
+\hat{\epsilon}^T\Sigma_Y^{-1}\hat{\epsilon} = \sum_{i=1}^m \frac{\epsilon^2_i}{\sigma^2_i}
+$$
+
+This is thus the sum of *squared residuals* (= estimated errors) divided by the variance of the random errors.
+
+It can be shown that the distribution of the weighted squared norm of residuals is given by the central $\chi^2$-distribution:
+
+$$
+\hat{\epsilon}^T\Sigma_Y^{-1}\hat{\epsilon} \sim \chi^2 (m-n,0)
+$$
+
+with $m-n$ the redundancy (number of observations $m$ minus number of unknowns $n$). 
+
+MMM
+link to Chi2 distribution section
+
+For the alternative hypothesis, the BLUE solution follows as:
+
+$$
+\begin{align*}
+\begin{bmatrix}\hat{X}_a\\ \hat{\nabla} \end{bmatrix}&=(\begin{bmatrix}\mathrm{A} &\mathrm{C} \end{bmatrix}^T\Sigma_Y^{-1}\begin{bmatrix}\mathrm{A} &\mathrm{C} \end{bmatrix})^{-1}\begin{bmatrix}\mathrm{A} &\mathrm{C} \end{bmatrix}^T\Sigma_Y^{-1}Y \\
+\hat{\epsilon}_a &= Y - \begin{bmatrix}\mathrm{A} &\mathrm{C} \end{bmatrix}\begin{bmatrix}\hat{X}_a\\ \hat{\nabla} \end{bmatrix}
+\end{align*}
+$$
+
+where the subscript $_a$ is used to indicate that the solution is different from the one with the null hypothesis. Also, the corresponding weighted squared norm of residuals $\hat{\epsilon}_a^T\Sigma_Y^{-1}\hat{\epsilon}_a$ is to be computed.
+
+### Step 2: apply test
+As a test statistic to decide between $\mathcal{H}_0$ and $\mathcal{H}_a$ the difference between the weighted squared norms of residuals is used, which is known to have a Central $\chi^2$-distribution if $\mathcal{H}_0$ is true:
+
+$$
+T_q = \hat{\epsilon}^T\Sigma_Y^{-1}\hat{\epsilon}-\hat{\epsilon}_a^T\Sigma_Y^{-1}\hat{\epsilon}_a \sim \chi^2(q,0)
+$$
+
+where $q$ is equal to the number of additional parameters in $\mathcal{H}_a$.
+
+We would reject $\mathcal{H}_0$ if:
+
+$$
+T_q = \hat{\epsilon}^T\Sigma_Y^{-1}\hat{\epsilon}-\hat{\epsilon}_a^T\Sigma_Y^{-1}\hat{\epsilon}_a > k_{\alpha}
+$$
+
+with $k_{\alpha}$ the threshold value based on a choice for the false alarm probability $\alpha$, see {numref}`chi2`
+
+```{figure} ../figures/ObservationTheory/09_chi2.png
+---
+height: 250px
+name: chi2
+---
+PDF of $\chi2$(q,0)-distribution with threshold value $k_{\alpha}$ based on chosen false alarm probability $\alpha$.
+```
+
+This test is known as the *Generalized Likelihood Ratio Test* (GLRT). It can be derived by using the the ratio of the likelihood functions for $\mathcal{H}_0$ and $\mathcal{H}_a$ (recall that the BLUE solution is equal to the maximum likelihood solution in our case since the data are assumed to be normally distributed). This is a logical choice since the purpose of our test is to decide whether $\mathcal{H}_0$ is sufficiently likely compared to $\mathcal{H}_a$. The derivation is beyond the scope of MUDE.
+
+Important to mention is that this is the *most powerful* test: no other test statistic can be found that would result in a larger [detection power](betagamma) $\gamma$ for the same $\alpha$.
+
+## Overall model test
+Many options for the alternative hypothesis are possible, but instead of immediately testing the null hypothesis against a very specific alternative, it common to first apply the so-called *overall model test* (OMT): it is a test which just focuses on the validity of $\mathcal{H}_0$ (how well do data and model fit) without having a specific $\mathcal{H}_a$ in mind. This is identical to choosing the alternative hypothesis as follows:
+
+$$
+\mathcal{H}_a:\;\; \mathbb{E}(\underset{m\times 1}{Y})=\underset{m\times m}{\begin{bmatrix} \underset{m\times n}{\mathrm{A}} & \underset{m\times m-n}{\mathrm{C}}\end{bmatrix}}\underset{m\times 1}{\begin{bmatrix}\mathrm{x}\\\nabla\end{bmatrix}}
+$$
+
+Hence, $q=m-n$ and the matrix $\begin{bmatrix}\mathrm{A} & \mathrm{C}\end{bmatrix}$ becomes a square invertible matrix, such that the system becomes determined and the BLUE follows as (try to show yourself):
+
+$$
+\begin{bmatrix}\hat{X}_a \\ \hat{\nabla}\end{bmatrix} = \begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^{-1}Y 
+$$
+
+```{admonition} Answer
+:class: tip, dropdown
+
+$$
+\begin{align*}
+\begin{bmatrix}\hat{X}_a \\ \hat{\nabla}\end{bmatrix} &= (\begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^T\Sigma_Y^{-1} \begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix})^{-1}\begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^T\Sigma_Y^{-1}Y \\
+&= \begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^{-1}\Sigma_Y \begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^{-T}\begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^T\Sigma_Y^{-1}Y \\
+&= \begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^{-1}Y 
+\end{align*}
+$$
+```
+
+From this follows that the residuals become zero:
+
+$$
+\hat{\epsilon}_a = Y- \begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}\hat{X} = Y - \begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}\begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}^{-1}Y = 0
+$$
+
+Hence, any alternative hypothesis for which the design matrix $\begin{bmatrix}\mathrm{A}&\mathrm{C}\end{bmatrix}$ is an $m\times m$ invertible matrix will result in residuals being equal to 0. This implies that any observed $\mathrm{y}$ will satisfy the alternative hypothesis, which can thus also be formulated as 
+
+$$
+\mathcal{H}_a:\;\; Y\in \mathbb{R}^m
+$$
+
+The OMT is obtained from the GLRT:
+
+$$
+T_{q=m-n} = \hat{\epsilon}^T\Sigma_Y^{-1}\hat{\epsilon} > k_{\alpha}
+$$
+
+since $\hat{\epsilon}_a = 0$.
+
+## Estimation and testing procedure
+If the OMT is accepted you are done and can present your estimation results, including precision and testing results. However, if the OMT is rejected, the next step would be to identify an alternative hypothesis that can be accepted. This might be an iterative procedure, since many options are possible for the alternative model.
+
+The complete procedure is:
+1. Define the nominal functional and stochastic model (under $\mathcal{H}_0$)
+2. Apply BLUE 
+3. Apply the overall model test (OMT)
+4. If OMT accepted go to step 8; if OMT rejected go to step 5
+5. Choose (another) specific alternative hypothesis
+6. Apply generalized likelihood ratio test (GLRT) to decide whether the alternative hypothesis is indeed more likely
+7. If alternative hypothesis accepted go to step 8; if rejected go back to step 5
+8. Present estimates, precision and testing results
+
+In some cases, you may immediately want to test between two competing models, for instance if you are interested in the height change of a point over time you may want to test a linear trend model (constant velocity) versus a model including a constant acceleration. In that case, the procedure would be:
+1. Define the functional and stochastic models (under $\mathcal{H}_0$ and $\mathcal{H}_a$)
+2. Apply BLUE for both models
+3. Apply the generalized likelihood ratio test (GLRT)
+4. Present estimates, precision and testing results for accepted model
+
diff --git a/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha.png b/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9929675ee104cc9fd055d8f1bb5ebc3605a1761
Binary files /dev/null and b/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha.png differ
diff --git a/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha_2.png b/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1f1e98488ddbf8e0a2ae006712ed6905a44a9cb3
Binary files /dev/null and b/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha_2.png differ
diff --git a/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha_alpha.png b/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha_alpha.png
new file mode 100644
index 0000000000000000000000000000000000000000..fba2eabd84edcba355ac007ebd3785541380039a
Binary files /dev/null and b/book/sandbox/SanPart/figures/ObservationTheory/08_H0Ha_alpha.png differ
diff --git a/book/sandbox/SanPart/figures/ObservationTheory/08_decision.png b/book/sandbox/SanPart/figures/ObservationTheory/08_decision.png
new file mode 100644
index 0000000000000000000000000000000000000000..744ded8afd8475f2d6950e7d61b9d4720dd424bc
Binary files /dev/null and b/book/sandbox/SanPart/figures/ObservationTheory/08_decision.png differ
diff --git a/book/sandbox/SanPart/figures/ObservationTheory/09_chi2.png b/book/sandbox/SanPart/figures/ObservationTheory/09_chi2.png
new file mode 100644
index 0000000000000000000000000000000000000000..558eff2d2b9b5fc35d73efe45569cea349aa4c76
Binary files /dev/null and b/book/sandbox/SanPart/figures/ObservationTheory/09_chi2.png differ
diff --git a/book/sandbox/SanPart/premath/00_01c_PreMath.md b/book/sandbox/SanPart/premath/00_01c_PreMath.md
index a40418614e2168fc25231ca3289afe6ae80eaa2f..9f5eee7840603233acc5b4a5570726c3d4a93877 100644
--- a/book/sandbox/SanPart/premath/00_01c_PreMath.md
+++ b/book/sandbox/SanPart/premath/00_01c_PreMath.md
@@ -79,6 +79,7 @@ A consistent system has a unique solution if and only if the column vectors of m
 
 This can be seen as follows: assume that $\mathrm{x}$ and $\mathrm{x}' \neq \mathrm{x}$ are two different solutions. Then $\mathrm{A}\mathrm{x}=\mathrm{A}\mathrm{x}â€™$ or $\mathrm{A}(\mathrm{x}-\mathrm{x}')=0$. But this can only be the case if some of the column vectors of $\mathrm{A}$ are linear dependent, which contradicts the assumption of full column rank.
 
+(determined)=
 ## Determined, overdetermined and underdetermined systems
 
 A system of equations $\mathrm{y}=\mathrm{A}\mathrm{x}$ with $\text{rank}(\mathrm{A}) =m=n$ is consistent and has a unique solution: $\hat{\mathrm{x}} = \mathrm{A}^{-1}\mathrm{y}$. Such a system is called *determined*.
@@ -93,9 +94,9 @@ The redundancy of a system of equations is equal to $m - \text{rank}(\mathrm{A})
 
 If we restrict ourselves to systems of observation equations that are of full column rank: $\text{rank}(\mathrm{A}) = n $, the system can either be
 
-*determined*: $\text{rank}(\mathrm{A}) =n =m$, the redundancy is equal to 0
+*Determined systems*: $\text{rank}(\mathrm{A}) =n =m$, the redundancy is equal to 0
 
-*overdetermined*: $\text{rank}(\mathrm{A}) =n < m$, the redundancy is equal to $m-n>0$
+*Overdetermined systems*: $\text{rank}(\mathrm{A}) =n < m$, the redundancy is equal to $m-n>0$
 
 
 
diff --git a/book/sandbox/continuous/Exponential.md b/book/sandbox/continuous/Exponential.md
new file mode 100644
index 0000000000000000000000000000000000000000..5c18a043dec16216e391a100d9e4b52685aed7fd
--- /dev/null
+++ b/book/sandbox/continuous/Exponential.md
@@ -0,0 +1,70 @@
+
+# Exponential distribution
+
+Another widely used distribution function is the Exponential distribution. For instance, it is applied to model the waiting time between succesive events of a Poisson process. The PDF of the Exponential distribution is given by
+
+$
+f(x) = \lambda e^{-\lambda x} \hspace{1cm} for \ x \geq 0, \lambda>0
+$
+
+$
+f(x) = 0 \hspace{1cm} otherwise
+$
+
+where $\lambda$ is the parameter of the distribution, which is often called *rate*. In the right pannel of the figure below, an example of two Exponential distributions with $\lambda =1$ and $\lambda = 2$ is shown. As you can see, the maximum density in the PDF of en Exponential distribution is located at zero and it is followed by an Exponential decay. The higher the parameter $\lambda$, the higher the value of the density in $x=0$ and the faster the decay. In other words, the higher the parameter $\lambda$, the more concentrated the values of the random variable which are likely to occur and, thus, the lower the standard deviation. This can be seen on the left pannel of the figure, where random samples of the distribution are plotted. There you can see how higher values of the random variable $x$ appear when $\lambda = 1$, presenting then a higher dispersion.
+
+```{figure} /sandbox/continuous/figures/exponential.png
+
+---
+
+---
+Exponential distribution function: (left) random samples, and (right) PDF.
+```
+
+
+
+Integrating the PDF, the following expression of the CDF is obtained
+
+$
+F(x) = 1 - e^{-\lambda x}
+$
+
+which is visually represented in the figure below.
+
+```{figure} /sandbox/continuous/figures/exponential_cdf.png
+
+---
+
+---
+Exponential distribution function: CDF.
+```
+
+The influence of the parameter $\lambda$ can also be observed in the CDF. The higher the parameter $\lambda$, the faster the CDF reaches values of the non-exceedance probability close to 1. Thus, lower variability of the random variable is obtained.
+
+## Some interesting properties
+
+The mean of the exponential distribution can be obtained integrating by parts as
+
+$
+E[X] = \int_o^{\infty}{x e^{-\lambda x}dx} = [-xe^{-\lambda x}]_0^{\infty} + \int_0^{\infty}{e^{-\lambda x}dx}= 1/ \lambda
+$
+
+As previously mentioned, the Exponential distribution models the waiting time between events (e.g.: floods or earthquakes) of a Poisson process. For the Poisson process, $\lambda$ is the rate at which the events occur. Therefore, $1/\lambda$ is the average time between those events, since it is the expectation of the associated Exponential distribution. In reliability analysis, it can be called *mean life time* or *time to failure*.
+
+The variance is given as
+
+$
+Var[X] = E[X^2]-(E[X])^2 = 1/\lambda^2
+$
+
+It should be noted that the coefficient of variation $CV = \sqrt{Var[X]}/E[X] = 1$.
+
+If you remember the Poisson process is characterized by independent events. Thus, the time of the next occurrence modelled by the Exponential distribution is independent of the present and past occurrences. This is called the *memoryless property of the exponential distribution*. We can show it calculating the probability of the waiting time until the next ocurrence $X$ being higher than the sum of the two previous ones $X>x_1+x_2$, given that it is already bigger than one of them $X>x_1$ (conditional probability) as follows
+
+$
+P[X>x_1+x_2|X>x_1]=\frac{P[X>x_1+x_2]}{P[X>x_1]}=\frac{e^{\lambda (x_1+x_2)}}{e^{\lambda x_1}}=e^{\lambda x_2}=P[X>x_2]
+$
+
+If the waiting time $X$ is exponential-distributed, you can see that the probability of the waiting time of one occurence ($x_2$) does not depend on the previous one ($x_1$).
+
+Finally, note that this distribution is not bell-shaped and, thus, not symmetric. It presents a positive tail and its skewness is 2.
\ No newline at end of file
diff --git a/book/sandbox/continuous/Exponential_quiz.md b/book/sandbox/continuous/Exponential_quiz.md
new file mode 100644
index 0000000000000000000000000000000000000000..83cb719b5f12e5b68a4d70306dd61295ae76a6c6
--- /dev/null
+++ b/book/sandbox/continuous/Exponential_quiz.md
@@ -0,0 +1,38 @@
+
+# Exponential distribution: let's practice
+
+An engineer is the manager of the construction of a bridge across a river and is concerned about large floods (floods with discharges higher than $50 m^3/s$) in the river, since they may damage the construction works. Higher floods than that occur in average every 10 years.
+
+Assume that floods are independent and identically distributed. This means that they can be modelled using a Poisson process and, thus, the time between floods can be modelled using an Exponential distribution.
+
+
+What is the rate ($\lambda$) of the Exponential distribution?
+
+
+```{admonition} Answer
+:class: tip, dropdown
+
+The rate ($\lambda$) represents the frequency which is the inverse of the average waiting time. Therefore:
+
+$$
+   \lambda = 1/10 = 0.1
+$$
+
+```
+
+
+The construction time is planned to be 2 years. Under the previous hypothesis, what is the probability of not observing a flood in those 2 years?
+
+
+```{admonition} Answer
+:class: tip, dropdown
+
+The probability of not observing a flood in two years can be computed as the probability of the waiting time between floods being higher than 2 years. Therefore:
+
+$$
+   P[X>2] = 1 - F(x) = 1 - (1 - e^{-0.1 \cdot 2}) = e^{-0.1 \cdot 2} \approx 0.82
+$$
+
+This means that there is a probability of observing a flood of 1 - 0.82 = 0.18, which is actually pretty high! Maybe the engineer should think about a tighter schedule or using some adaptive techniques so the construction works are not affected by events that low.
+
+```
\ No newline at end of file
diff --git a/book/sandbox/continuous/Gumbel.md b/book/sandbox/continuous/Gumbel.md
new file mode 100644
index 0000000000000000000000000000000000000000..3fb9449ed25cee038fd9c788b8aae53ce2bfb1eb
--- /dev/null
+++ b/book/sandbox/continuous/Gumbel.md
@@ -0,0 +1,55 @@
+
+# Gumbel distribution
+
+Gumbel distribution is widely used to model maximum and minimum values of natural phenomena, such as wave storms or floods. You will learn more about this application in the Extreme Value Analysis section. The PDF of the Gumbel distribution is given by 
+
+$
+f(x) = \frac{1}{\beta}e^{-\left( \frac{x-\mu}{\beta} + e^{-\left( \frac{x-\mu}{\beta} \right)}\right)}
+$
+
+where $\mu$ is the location parameter and $\beta>0$ is the scale parameter. In the figure below the influence of these parameters is presented.
+
+In the right pannel of the figure below, an example of the PDF of three Gumbel Exponential distributions is shown. The first two distributions (black and blue continuous lines) present the same $\beta=1$ but different $\mu_{black}=0$ and $\mu_{blue}=2$. This produces a shift towards the positive values of the distribution, being the mode $=\mu$. Note that the shape of the PDF is identical, so $\mu$ does not influence the dispersion of the distribution. This can also be seen in the left pannel, where random samples from the distribution are drawn. The samples coming from the distribution with $\mu_{blue}=2$ are higher than those coming from the one with $\mu_{black}=0$, while both present a similar dispersion.
+
+Regarding the third distribution (red crosses in the left pannel and red dashed line in the right pannel), it presents the same $\mu=0$ than the first distribution (black dots in the left pannel and continuous black line in the right pannel) but different $\beta$: $\beta_{black}=1$ and $\beta_{red}=3$. In the right pannel, it can be observed that both distributions present the same mode in $x=0$. However, the dispersion of the distribution with $\beta_{red}=3$ is way higher. This effect can also be seen in the random samples in the left pannel. Random samples from $\beta_{black}=1$ range from -2 to 4 approximately, while random samples from $\beta_{red}=3$ range from -2 to 8 approximately, having one sample a value of almost 14. 
+
+```{figure} /sandbox/continuous/figures/gumbel.png
+
+---
+
+---
+Gumbel distribution function: (left) random samples, and (right) PDF.
+```
+
+Integrating the PDF, the following expression of the CDF is derived
+
+$
+F(x) = e^{-e^{-\frac{x-\mu}{\beta}}}
+$
+
+which is displayed in the figure below.
+
+```{figure} /sandbox/continuous/figures/gumbel_cdf.png
+
+---
+
+Gumbel distribution function: CDF.
+```
+
+The influence of the parameters can also be observed in the CDF. The continuous blue and black distributions present the same shape while the blue distribution is shifted towards positive values due to the higher value of $\mu$. Regarding the dashed red distribution, the slope of the CDF is way more gentle, showing a higher dispersion than the other two distributions due to the higher value of $\beta$.
+
+## Some properties
+
+The mean of the Gumbel distribution can be computed as
+
+$
+E[X]=\mu + \gamma \beta
+$
+
+where $\gamma \approx 0.577$ is the Euler-Mascheroni constant. The variance is given by
+
+$
+Var[X] = \frac{\pi^2}{6}\beta^2
+$
+
+Finally, note that Gumbel distribution is not symmetric and presents positive skewness. This is, it presents a tail towards positive values. Actually, the skewness of this distribution can be analytically computed and it is approximately 1.14.
\ No newline at end of file
diff --git a/book/sandbox/continuous/Gumbel_quiz.md b/book/sandbox/continuous/Gumbel_quiz.md
new file mode 100644
index 0000000000000000000000000000000000000000..14740f26239c12f886e956738a1f45313c25b12b
--- /dev/null
+++ b/book/sandbox/continuous/Gumbel_quiz.md
@@ -0,0 +1,53 @@
+
+# Gumbel distribution: let's practice
+
+Groundwater quality of a shallow aquifer has been tracked for a year to determine if it is duable to use it or if it is getting polluted. To this end, the daily maximum concentration of chloride in miligrams per liter has been measured. These concentration has been described using a Gumbel distribution with parameters $\mu=60$ and $\beta=10$.
+
+Chlorides between 1mg/L an 100mg/L are normal in freshwater. Use the Gumbel distribution to compute the probability of being in that interval.
+
+```{admonition} Answer
+:class: tip, dropdown
+
+The probability of being below 100mg/L is:
+
+$$
+   P[X \leq 100] = F(100) = e^{-e^{-\frac{100-60}{10}}} \approx 0.98 
+$$
+
+The same way, the probability of being below 1mg/L is:
+
+$$
+   P[X > 1] = F(1) = e^{-e^{-\frac{1-60}{10}}} \approx 0
+$$
+
+Thus, the probability of being in the interval (0,100) is
+
+$$
+    P[1 \leq X \leq 100] = 0.98 - 0 = 0.98
+$$
+
+The figure below illustrates the area to integrate in the PDF (blue area in the left pannel) and the probabilities computed in the CDF (right pannel).
+
+```{figure} /sandbox/continuous/figures/gumbel_concentration.png
+
+---
+
+```
+
+
+And what is the probability of exceeding the upper limit of 100mg/L?
+
+```{admonition} Answer
+:class: tip, dropdown
+
+Making use of the probability axioms:
+
+The probability of being below 100mg/L is:
+
+$$
+   P[X>100]=1-P[X \leq 100] = 1-F(100) = 1- 0.98 = 0.02
+$$
+
+This probability is very small, so the engineers can conclude that the water in the aquifer seems to present the characteristics of fresh water regarding the concentration of Chlorides.
+
+```
diff --git a/book/sandbox/continuous/Uniform.md b/book/sandbox/continuous/Uniform.md
new file mode 100644
index 0000000000000000000000000000000000000000..5ae0b8bade1d95f40c0339d6d00798f8e7b75ddc
--- /dev/null
+++ b/book/sandbox/continuous/Uniform.md
@@ -0,0 +1,53 @@
+
+# Uniform distribution
+
+The uniform distribution is the simplest type of continuous parametric distribution and, as it is implied in its name, the PDF has a constant value along a given interval $[a,b]$, where $a < b$. The PDF of the uniform is given by
+
+$
+f(x) = \frac{1}{b-a}   \hspace{1cm}   for \ a\leq x \leq b
+$
+
+$
+f(x) = 0  \hspace{1cm} otherwise
+$
+
+Note that all values in the distribution are between the lower limit $a$ and the higher limit $b$ and are equally likely to occur. The left pannel of the figure below, shows an example of a uniform PDF with $a=-1$ and $b=1$.
+
+```{figure} /sandbox/continuous/figures/uniform.png
+
+---
+
+---
+Uniform distribution function: (left) PDF, and (right) CDF.
+```
+As seen in previous chapters, the CDF is the integral of the PDF. Thus, as we integrate over a constant, the CDF presents a linear shape, as shown in the right pannel of the figure above. Thus, the CDF is given as
+
+$
+F(x) = 0   \hspace{1cm}   for \ x<a
+$
+
+$
+F(x) = \frac{x-a}{b-a}   \hspace{1cm}   for \ a\leq x \leq b
+$
+
+$
+F(x) = 0  \hspace{1cm} for \ x>b
+$
+
+If we make $a=0$ and $b=1$, we obtain the standard or unity uniform distribution, which is used to generate random values from other distribution functions for simulation purposes.
+
+## Some properties
+
+The mean of the uniform distribution can be computed based on its simple geometry as
+
+$
+E[X]=\frac{1}{2}(a+b)
+$
+
+The variance is given by
+
+$
+Var[X] = \frac{1}{12}(b-a)^2
+$
+
+Finally, note that uniform distribution is symmetric and presents 0 skewness. Thus, the median and the mean are identical. This is, it does not present any tail.
\ No newline at end of file
diff --git a/book/sandbox/continuous/figures/exponential.png b/book/sandbox/continuous/figures/exponential.png
new file mode 100644
index 0000000000000000000000000000000000000000..5f585349c36c6a10079b1598d7c0b17566e90e41
Binary files /dev/null and b/book/sandbox/continuous/figures/exponential.png differ
diff --git a/book/sandbox/continuous/figures/exponential_cdf.png b/book/sandbox/continuous/figures/exponential_cdf.png
new file mode 100644
index 0000000000000000000000000000000000000000..f037ff524832dab4a95b6da39419403e81637e45
Binary files /dev/null and b/book/sandbox/continuous/figures/exponential_cdf.png differ
diff --git a/book/sandbox/continuous/figures/gumbel.png b/book/sandbox/continuous/figures/gumbel.png
new file mode 100644
index 0000000000000000000000000000000000000000..045e15d5983816f54a3cefb36787d43184c53af8
Binary files /dev/null and b/book/sandbox/continuous/figures/gumbel.png differ
diff --git a/book/sandbox/continuous/figures/gumbel_cdf.png b/book/sandbox/continuous/figures/gumbel_cdf.png
new file mode 100644
index 0000000000000000000000000000000000000000..2939c99244288ffdcdb63369fd5e47167f115ad5
Binary files /dev/null and b/book/sandbox/continuous/figures/gumbel_cdf.png differ
diff --git a/book/sandbox/continuous/figures/gumbel_concentration.png b/book/sandbox/continuous/figures/gumbel_concentration.png
new file mode 100644
index 0000000000000000000000000000000000000000..0468aef701c9da3e65f2ebd1a4a715489398538c
Binary files /dev/null and b/book/sandbox/continuous/figures/gumbel_concentration.png differ
diff --git a/book/sandbox/continuous/figures/one_gaussian.png b/book/sandbox/continuous/figures/one_gaussian.png
new file mode 100644
index 0000000000000000000000000000000000000000..914b5247284547bc3b465e5f7e053dbcbc5fba55
Binary files /dev/null and b/book/sandbox/continuous/figures/one_gaussian.png differ
diff --git a/book/sandbox/continuous/figures/uniform.png b/book/sandbox/continuous/figures/uniform.png
new file mode 100644
index 0000000000000000000000000000000000000000..aad3ff98f95c329c5bac9b6aceee6f40546b62ce
Binary files /dev/null and b/book/sandbox/continuous/figures/uniform.png differ
diff --git a/book/sandbox/continuous/fitting.md b/book/sandbox/continuous/fitting.md
index cc0aad0c9bec80155148d5563d45777699d3df29..a7bfc762fcf85c34eb6747a6e6bc0e8db8e1b16a 100644
--- a/book/sandbox/continuous/fitting.md
+++ b/book/sandbox/continuous/fitting.md
@@ -1,16 +1,7 @@
 
-# Fitting a Distribtuion
+# Fitting a Distribution
 
-In the previous section, distr.
+In the previous sections, we introduced parametric distributions as mathematical models for the empirical distribution functions of observations. Also, some of the most widely used parametric distributions were presented, such as Exponential or Normal distribution. Those parametric distributions had some parameters which required to be fitted to the observations. For instance, Exponential distribution has a rate parameter ($\lambda$). In this section, the most commonly used methods to fit the parametric distribution functions are presented. Note that it assumes that we know the parametric distribution we want to fit. For instance, fitting an Exponential distribution to a sample of observations.
 
-## L-moments
-
-moments distr = moments data
-
-## MLE
-
-MLE video
-
-## Quizz/exercises
-
-practice
\ No newline at end of file
+- L-moments or Method of moments
+- Maximum loglikelihood estimator (MLE)
diff --git a/book/sandbox/continuous/lmoments.md b/book/sandbox/continuous/lmoments.md
new file mode 100644
index 0000000000000000000000000000000000000000..2a470e10d61f4a7b8b303c6ecebea152787483f8
--- /dev/null
+++ b/book/sandbox/continuous/lmoments.md
@@ -0,0 +1,4 @@
+
+# Fitting a Distribution: L-moments
+
+In the previous sections, 
diff --git a/book/sandbox/continuous/moments.md b/book/sandbox/continuous/moments.md
new file mode 100644
index 0000000000000000000000000000000000000000..b845559c7b58a4cb2860e55b41778bdfe82c500b
--- /dev/null
+++ b/book/sandbox/continuous/moments.md
@@ -0,0 +1,78 @@
+
+# Fitting a Distribution: Method of moments
+
+The method of moments consists of equating the moments[^moment] in the observations to those of the distribution we want to fit. This is, there is a relationship between the moments and the parameters of the distribution. Then, if we equate the moments of the distribution to the moments of the observations, we can obtain the values of the parameters of the distribution. Actually, you have already done it in the exercises of the Exponential distribution! 
+
+## Let's see it with an example
+
+An engineer is studying the intensity of earthquakes in Rome (Italy). To this end, the engineer is using *Catalogo dei terremoti italiani dall'anno 1000 al 1980* (the Catalog of Italian earthquakes from year 1000 to 1980) edited by D. Postpischl in 1985. This catalog reports the intensity of earthquakes in terms of the Mercalli-Canconi-Sieber (MCS) index. Due to the uncertainties associated to this natural phenomenom, the engineer considers them as a random one and wants to fit a Gumbel distribution to the observations found in the catalog using the method of moments. The data found in the catalog is shown in the table below [^ref].
+
+
+```{list-table} MSC intensity of the recorded earthquakes in the city of Rome.
+:header-rows: 1
+:name: earthquakes
+
+* - MSC intensity
+  - 2
+  - 3
+  - 4
+  - 5
+  - 6
+  - 7
+* - Number
+  - 113
+  - 132
+  - 56
+  - 22
+  - 4
+  - 2
+```
+
+Remember that the CDF of the Gumbel distribution is given by 
+
+$
+F(x) = e^{-e^{-\frac{x-\mu}{\beta}}}
+$
+
+Therefore, the value of $\mu$ and $\beta$ needs to be determined based on the observations to fit the distribution.
+
+The first thing the engineer needs to do is to calculate the moments of the observations in the Table. The mean ($\overline{X}$) and variance ($\sigma^2$) are calculated as
+
+$
+\overline{X} = \frac{2 \cdot 113 + 3 \cdot 132 + 4 \cdot 56 + 5 \cdot 22 + 6 \cdot 4 + 7 \cdot 2}{113+132+56+22+4+2} \approx 3.02
+$
+
+$
+\sigma^2 = \frac{\sum{fx^2}}{\sum{f}}-\left( \frac{\sum{fx}}{\sum{f}}\right)^2 \approx 0.99
+$
+
+where $x$ is the value of the intensity and $f$ is the frequency of the value $x$.
+
+Based on the properties of the Gumbel distribution, we know 
+
+$
+E[X]=\mu + \gamma \beta
+$
+
+$
+Var[X] = \frac{\pi^2}{6}\beta^2
+$
+
+where $\gamma \approx 0.577$ is the Euler-Mascheroni constant.
+
+Therefore, we can equate the expectation and variance of the distribution ($E[X]$ and $Var[X]$) to the calculated moments and obtain the value of the parameters as
+
+$
+3.02 = \mu + 0.577 \beta 
+$
+
+$
+0.99 = \frac{\pi^2}{6}\beta^2
+$
+
+Thus, $\mu \approx 2.57$ and $\beta \approx 0.77$. 
+
+
+
+[^moment]: We denote as moments in statistics to the quantitative properties to characterize a distribution. The four commonly used moments are the mean, the variance, the skewness and the kurtosis.
+[^ref]: Data extracted from Kottegoda and Rosso (2008).
\ No newline at end of file
diff --git a/book/sandbox/continuous/moments_quiz.md b/book/sandbox/continuous/moments_quiz.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b9027fd1db89094a91d0d6f8086159f4fed2664
--- /dev/null
+++ b/book/sandbox/continuous/moments_quiz.md
@@ -0,0 +1,70 @@
+
+# Method of moments: let's practice
+
+As part of the quality control of the construction of a building, lab tests are performed to determine the compressive strengths of concrete. The following values in $N/mm^2$ are obtained: 60.5, 59.8, 53.4, 56.9 and 61.9. 
+
+The engineer responsible for quality assumes that the compressive strength follows a uniform distribution, whose CDF is given by 
+
+$
+F(x) = 0   \hspace{1cm}   for \ x<a
+$
+
+$
+F(x) = \frac{x-a}{b-a}   \hspace{1cm}   for \ a\leq x \leq b
+$
+
+$
+F(x) = 0  \hspace{1cm} for \ x>b
+$
+
+Determine the value of $a$ and $b$ based on the observations using the method of moments.
+
+```{admonition} Answer
+:class: tip, dropdown
+
+The mean and variance of the observations can be calculates as:
+
+$$
+   \overline{X} = 58.5
+$$
+
+
+$$
+   \sigma^2 = 11.46
+$$
+
+We know that the expectation and variance of the uniform distribution is given by
+
+$$
+E[X]=\frac{1}{2}(a+b)
+$$
+
+$$
+Var[X] = \frac{1}{12}(b-a)^2
+$$
+
+Thus, equating the mean and variance in the observations to those of the distribution, we obtain the following system
+
+$$
+58.5=\frac{1}{2}(a+b)
+$$
+
+$$
+11.46 = \frac{1}{12}(b-a)^2
+$$
+
+Solving the system, $a \approx 52.64$, and $b \approx 64.36$. Note that $a \approx 64.36$, and $b \approx 52.64$ is also a solution to the system, so we choose $a<b$ based on our definition of the uniform distribution.
+
+```
+
+Now that the engineer knows the uniform distribution which characterizes the compressive strength of the concrete samples, he needs to compute the probability of being above $55N/mm^2$, since it is the required resistance for the construction.
+
+```{admonition} Answer
+:class: tip, dropdown
+
+$$
+P[X>55N/mm^2] = 1 - F(55) = 1 - \frac{55-52.64}{64.36-52.64} \approx 0.80
+$$
+
+Thus, there is a probability of 0.80 to fulfill the required resistence and a probabiltiy of 0.20 of not doing it. He will have to contact the contractor!
+```
\ No newline at end of file
diff --git a/book/sandbox/continuous/other_distr.md b/book/sandbox/continuous/other_distr.md
index 0a41bc2d3dab5384f8f4210b3a9484a13ac76987..9fba317713066141cd9649c294fb8aef2a71578a 100644
--- a/book/sandbox/continuous/other_distr.md
+++ b/book/sandbox/continuous/other_distr.md
@@ -1,22 +1,55 @@
 
 # Non-Gaussian distributions
 
-In the previous section, Gaussian.
+So far, you have seen that we can use a parametric distribution to model the uncertainty in our observations. Also, you have revisited one of those parametric distributions, the Gaussian distribution, which is a widely used model since it has lots of applications such as modelling natural processes or measurement errors. Here, we will discuss an interesting property of this distribution, the lack of **asymmetry**, and introduce the concept of the **tail of the distribution**.
 
-## Concept of tail, asymmetry
+## Concepts of asymmetry and tail of the distribution
 
-Tail 
+Asymmetry is a relevant property of a distribution which analyzes its shape. In the Figure below, you have the PDF of a Gaussian distribution.
 
+```{figure} /sandbox/continuous/figures/one_gaussian.png
 
-## Lognormal distribution
+---
 
-for instance
+---
+Gaussian PDF.
+```
 
-## Exponential distribution
+As you can see, the shape of the PDF is perfectly symmetric with respect to the mode[^mode] of the distribution. This leads to an interesting property: for perfectly symmetric distributions the mode, mean[^mean] and median[^median] have the same value.
 
-for instance
+As you can imagine, it is also possible to analyze the asymmetry of an empirical distribution. To this end, we can visually inspect the empirical PDF and compute the *sample coefficient of skewness* from a set of observations as
 
+$
+g_1 = \frac{\sum_{i=1}^n{(x_i-\overline{x})^3}}{ns^3}
+$
 
-## Gumbel distribution
+where $x_i$ are the observations, $\overline{x}$ is the mean of the observations, $n$ is the number of observations, and $s$ is the standard deviation.
 
-List of distr is tentative
\ No newline at end of file
+**And what happens when my observations are not symmetric?**
+
+In that case, Gaussian distribution may not be the most appropriate model and we need to look for a different model. Actually, it is very common to find asymmetric distributions when modelling natural processes! 
+
+If we go back to the wind data that we previously introduced and have a look at the PDF, we can easily notice that it is not symmetric. The mass in the PDF is not distributed equally at both sides of the mode. Actually, the shape is similar to a bell with the center around $W_s = 5m/s$, but we can see that the distribution expands towards the positive values creating what we call the **tail of the distribution**. Since the tail expands towards positive values, we say that the PDF has *positive skewness*.
+
+```{figure} /sandbox/continuous/figures/epdf_wind.png
+
+---
+
+---
+Empirical probability density function of the wind speed data.
+```
+
+Many natural phenomena present positive skewness. This means that the mode, the mean and the median do not have the same value anymore. For a positively skewed PDF: mode < median < mean. This inequality is reversed if the PDF is negatively skewed.
+
+**And why are asymmetry and tail that important?**
+
+As you can imagine, asymmetry is an important criteria when selecting the parametric distribution to model the uncertainty in our observations. The shape of the parametric distribution needs to fit the shape of the empirical distribution function.
+
+Moreover, the tail of the distribution is modelling the highest and lowest values of our variable and, typically, they are the ones we use for design. For instance, if we are designing a building, we would be interested in the high wind speeds that it has to withstand (loading conditions). Also, if we are considering the nutrients required to cultivate mussels, we would analyze the lowest values to ensure that the mussels have enough food to survive. Thus, it is important to choose a parametric distribution which is able to model the behavior in the tail when the observations are not symmetric.
+
+In the following sections, we will introduce some common parametric distributions, as well as their characteristics, to give an overview of some possible models you may apply to model your data. But keep in mind that the list is not exhaustive, there are a lot of distributions!
+
+
+[^mode]: The mode is the most frequently observed value and, thus, the value of the distribution with the highest value of the density in its PDF.
+[^mean]: Visually from the PDF, we can define the mean as the value of the random variable which is the balance point of the distribution.
+[^median]: Visually from the PDF, we can define the median as the value of the random variable which divides the PDF in two areas of the same value.
\ No newline at end of file
diff --git a/build-book.sh b/build-book.sh
index dc02204259afce1ae776a46f6badd21498b7931d..99bf4aa12f666cc4d16d69ef73c6ff080e605839 100755
--- a/build-book.sh
+++ b/build-book.sh
@@ -2,13 +2,16 @@
 
 set -euo pipefail
 
-set -euo pipefail
+START_SERVER=${1:-false}
+PAGE_ROOT=${2:-"/"}
 
 # Build the jupyter book, everything else is post-processing
-jupyter-book build book
+jupyter-book clean book/
+jupyter-book build book/
 
 # Note: the structure of thebe_lite mimicks where thing are needed in the html folder
 cp thebe_lite/* book/_build/html/ -r
+sed "s,const PAGE_ROOT = \"/\";,const PAGE_ROOT = \"$PAGE_ROOT\";,g" thebe_lite/_static/sphinx-thebe.js > book/_build/html/_static/sphinx-thebe.js
 rm book/_build/html/THEBE_LITE.md
 
 # Copy all non notebook, markdown or build files into the build for local access in pyodide etc.
@@ -40,11 +43,16 @@ if [ "$python_command" = "" ] ; then
 fi
 
 # Serves the files on port 8000, localhost (127.0.0.1:8000)
-START_SERVER=${1:-false}
-
 if [ "$START_SERVER" = true ] ; then
 	echo "Starting server on port 8000"
 	$python_command -m http.server 8000 --directory book/_build/html
 else
-	echo "Book successfully built. If you want to use interactive elements, start a server locally using the command: $python_command -m http.server 8000 --directory book/_build/html. Or run this script again: ${0} true"  
+	echo "Book successfully built. If you want to use interactive elements,
+	start a server locally using the command:
+	    $python_command -m http.server 8000 --directory book/_build/html.
+	Or run this script again (book will build again!):
+		${0} true"  
 fi
+
+echo "To close a python server run the commeand:
+	kill-server.sh"
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 1a09fdcc2fc9f502555729672b6f751c034fc7b6..0b1b2cf395fe0aa8bbcd210e0d59335cf6ec7382 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11-slim-bullseye
 WORKDIR /book
-RUN apt-get update && apt-get -y install git
+RUN apt-get update && apt-get -y install git gcc
 COPY requirements.txt requirements.txt
 RUN pip install -r requirements.txt
 ENTRYPOINT ["./build-book.sh"]
diff --git a/kill-server.sh b/kill-server.sh
index 2421a3ef2d3b97899fa80290bc4c35fc57742431..8e78ceb50066938ea094999bcc52e631916b70c2 100755
--- a/kill-server.sh
+++ b/kill-server.sh
@@ -2,3 +2,13 @@
 
 pid=$(ps aux | grep http.server | head -n 1 | awk '{ print $2 }')
 kill -9 $pid
+
+echo "If kill-server.sh did not successfully kill the server:
+    - you are probably using Windows ;)
+    - try the following commands
+
+    in git bash:
+        tskill python
+
+    in cmd or PowerShell:
+        TASKKILL /IM python.exe /F"
\ No newline at end of file
diff --git a/thebe_lite/_static/_hook_fetch_module_finder.py b/thebe_lite/_static/_hook_fetch_module_finder.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2e8dc2f21eb0a2bc74c602b2331a97cdfcebdfa
--- /dev/null
+++ b/thebe_lite/_static/_hook_fetch_module_finder.py
@@ -0,0 +1,36 @@
+import importlib.abc 
+import importlib.machinery
+import importlib.util
+
+import sys
+import os
+
+# Finds modules by stupidly fetching from the server
+class FetchPathFinder(importlib.abc.MetaPathFinder):
+	@classmethod
+	def find_spec(cls, fullname, path, target=None):
+		if path is None or len(path) == 0:
+			path = os.getcwd()
+		else:
+			path = path[0]
+	
+		module_name = fullname.split(".")[-1]
+		suffixed = [module_name + suffix for suffix in importlib.machinery.SOURCE_SUFFIXES]
+		# Due to the lack of directory listings, we'll just force nested modules to have an __init__.py
+		for relpath in [*suffixed, os.path.join(module_name, "__init__.py")]:
+			fullpath = os.path.join(path, relpath)
+			try:
+				# This will cause a lazy load from the server
+				open(fullpath, "r").close()
+			except:
+				continue
+	
+			loader = importlib.machinery.SourceFileLoader(fullname, fullpath)
+			return importlib.util.spec_from_loader(fullname, loader=loader)
+		return None
+		
+	@classmethod
+	def invalidate_caches(cls):
+		return
+
+sys.meta_path.append(FetchPathFinder)
diff --git a/thebe_lite/_static/sphinx-thebe.js b/thebe_lite/_static/sphinx-thebe.js
index 4b9ff23b3a9511ebc4fbf669b84fddb7ebb3de9b..6c1b9d01b79308fdf598d64d76adf6c0dadbc17b 100644
--- a/thebe_lite/_static/sphinx-thebe.js
+++ b/thebe_lite/_static/sphinx-thebe.js
@@ -1,3 +1,7 @@
+// Do not touch the following line
+const PAGE_ROOT = "/";
+// You can touch from here onwards
+
 const thebeLiteConfig = {
   requestKernel: true,
   useJupyterLite: true,
@@ -260,8 +264,8 @@ function override_pyodide_lookup(fs, server_path) {
   // 1. The default /drive home is a mount which does not play well with certain file operations
   // 2. I thought it would help fetching py files from root, but that returns index.html instead of the directory listing
   // This means py files inside the root folder of the server cannot be loaded implcitly, but you can still use open etc. to fetch them
-  // NOTE: The slash at the end is important, keep it if you change anything.
-  const home = "/home/pyodide/book/";
+  // NOTE: The LACK of slash at the end is important, keep it that way if you change anything.
+  const home = "/home/pyodide/book";
 
   // Create a file from the string without using the writeFile method
   // writeFile calls lookupPath, which may cause infinite recursion?? (haven't tested)
@@ -297,35 +301,6 @@ function override_pyodide_lookup(fs, server_path) {
       currentDirectory += directory + "/";
       try {
         fs.mkdir(currentDirectory);
-
-        // Fetching the directory needs to return a directory listing
-        // This feature may not be enabled on some servers and will fail
-        let request = new XMLHttpRequest();
-        const fullPath = currentDirectory;
-        const path = currentDirectory.slice(home.length);
-        request.open("GET", path, false);
-        request.send();
-
-        if (request.status !== 200) {
-          continue;
-        }
-
-        // Find alls all the links with .py files in the directory listing
-        // We cannot use DOMParser here since it's running inside a web worker: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API
-        const regex = /\<a href=\"(.*?\.py)\"\>(.*?\.py)\<.*?\>/g;
-        regex.lastIndex = 0;
-
-        for (const match of request.response.matchAll(regex)) {
-          // match[1] contains the name of the python file
-          request.open("GET", path + match[1], false);
-          request.send();
-
-          if (request.status !== 200) {
-            continue;
-          }
-
-          createFileFromString(fullPath, match[1], request.response);
-        }
       } catch {}
     }
   }
@@ -354,8 +329,6 @@ function override_pyodide_lookup(fs, server_path) {
       const name = pathList[pathList.length - 1]; // Name of file
       const parentPathList = pathList.slice(0, -1); // List of all parent directories
 
-      createDirectoryStructure(parentPathList);
-
       let request = new XMLHttpRequest();
       request.open("GET", path, false);
       request.send();
@@ -363,6 +336,7 @@ function override_pyodide_lookup(fs, server_path) {
       if (request.status !== 200) {
         throw exception;
       }
+      createDirectoryStructure(parentPathList);
       createFileFromString(parentPathList.join("/"), name, request.response);
       return old_lookup(fullPath, opts);
     }
@@ -436,8 +410,8 @@ var initThebe = async () => {
   console.log("[sphinx-thebe]: Loading thebe...");
   $(".thebe-launch-button ").text("Loading thebe...");
 
-  await loadScriptAsync("/thebe-lite.min.js");
-  await loadScriptAsync("/index.js");
+  await loadScriptAsync(`${PAGE_ROOT}thebe-lite.min.js `);
+  await loadScriptAsync(`${PAGE_ROOT}index.js`);
 
   // Runs once the script has finished loading
   console.log("[sphinx-thebe]: Finished loading thebe...");
@@ -455,11 +429,26 @@ var initThebe = async () => {
   // 3. Eval the string og the override_pyodide_lookup function in JS, this brings it into scope
   // 4. Execute the override_pyodide_lookup function in JS, and bake in the relative path from root in the book (the home)
   // NOTE: All functions used in override_pyodide_lookup should be nested inside it, since the web worker cannot access functions in this script
-  await thebelab.session.kernel.requestExecute({
+  thebelab.session.kernel.requestExecute({
     code: `import js; import pyodide_js; js.fs = pyodide_js.FS; js.eval("""${override_pyodide_lookup.toString()}"""); js.eval(f"override_pyodide_lookup(fs, '${
       location.pathname.split("/").slice(0, -1).join("/") + "/"
     }')")`,
-  }).done;
+  });
+
+  const request = new XMLHttpRequest();
+  request.open(
+    "GET",
+    `${PAGE_ROOT}_static/_hook_fetch_module_finder.py`,
+    false
+  );
+  request.send();
+
+  const fetchImportHookCode = request.response;
+
+  // Enable importing of modules on server
+  thebelab.session.kernel.requestExecute({
+    code: fetchImportHookCode,
+  });
 
   // Fix for issues with ipywidgets in Thebe
   await thebelab.session.kernel.requestExecute({
@@ -529,5 +518,5 @@ if (document.readyState !== "loading") {
   });
 }
 
-loadStyleAsync("/thebe.css");
-loadStyleAsync("/_static/code.css");
+loadStyleAsync(`${PAGE_ROOT}thebe.css`);
+loadStyleAsync(`${PAGE_ROOT}_static/code.css`);