diff --git a/content/GA_1_4/Analysis_solution.ipynb b/content/GA_1_4/Analysis_solution.ipynb index 442553057d91fd88f62c3ad598fc7be3a7addec4..3b6e6c89b3b69cc8eb138df59e909efa4ccdfd22 100644 --- a/content/GA_1_4/Analysis_solution.ipynb +++ b/content/GA_1_4/Analysis_solution.ipynb @@ -23,14 +23,234 @@ }, { "cell_type": "markdown", - "id": "511b014c-e327-46c3-90fd-11f89d9d05be", - "metadata": { - "id": "0491cc69" - }, + "id": "00945ccf", + "metadata": {}, + "source": [ + "## Part 0: Recap of GA 1.3\n", + "\n", + "As you (hopefully!) recall, last week we\n", + "\n", + "\n", + "## Task 2: Set-up linear functional model\n", + "\n", + "We want to investigate observed displacements of a in the Green Heart of the Netherlands, where groundwater levels play a significant role. The model was defined as:\n", + "\n", + "$$\n", + "d = d_0 + vt + k \\ \\textrm{GW},\n", + "$$\n", + "where $d$ is the displacement, $t$ is time and $\\textrm{GW}$ is the groundwater level (that we assume to be deterministic). \n", + "\n", + "Therefore, the model has 3 unknowns:\n", + "1. $d_0$, as the initial displacement at $t_0$;\n", + "2. $v$, as the displacement velocity;\n", + "3. $k$, as the 'groundwater factor', which can be seen as the response of the soil to changes in the groundwater level.\n", + "\n", + "We used BLUE to create a linear model, which we conveniently stored in a dictionary." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b381f517", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from scipy import interpolate\n", + "from scipy.stats import norm\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import ipywidgets as widgets\n", + "from ipywidgets import interact, fixed\n", + "\n", + "from functions import *\n", + "\n", + "np.set_printoptions(precision=3)" + ] + }, + { + "cell_type": "markdown", + "id": "3f0287af", + "metadata": {}, + "source": [ + "<div style=\"background-color:#AABAB2; color: black; width:95%; vertical-align: middle; padding:15px; margin: 10px; border-radius: 10px\">\n", + "<p>\n", + "<b>Task 0.1: </b> \n", + " \n", + "Run the cell below to load two dictionaries (objects) into the Python variable space, <code>m1</code> and <code>m2</code>. You can uncomment and run the code to check the key:value pairs.\n", + " \n", + "</p>\n", + "</div>" + ] + }, + { + "cell_type": "markdown", + "id": "a32822ae", + "metadata": {}, + "source": [ + "<div style=\"background-color:#facb8e; color: black; width:95%; vertical-align: middle; padding:15px; margin: 10px; border-radius: 10px\"> <p>\n", + "\n", + "Note that the objects are stored in and loaded from \"pickle\" files, a native Python serialization format (in non-technical speak: a pickle file saves our variables to use them later, like we do here). The model dictionaries were defined and saved in the file <code>setup.py</code>, which you are welcome to read if you are curious.\n", + "</p></div>" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b8be3730", + "metadata": {}, + "outputs": [], + "source": [ + "m1_blue = load_pickle_file('m1_blue.pickle')\n", + "m2_blue = load_pickle_file('m2_blue.pickle')" + ] + }, + { + "cell_type": "markdown", + "id": "b4c81901", + "metadata": {}, + "source": [ + "It's not very easy to get a sense of the contents of the dictionary, so we made a simple function to help summarize it:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e02d5566", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summary of Model\n", + "----------------\n", + " Data type: InSAR\n", + " Model type: BLUE\n", + " Number of observations: 61\n", + " Model parameters:\n", + " x_0 = 9.174 +/- 2.128\n", + " x_1 = -0.024 +/- 0.001\n", + " x_2 = 0.202 +/- 0.016\n", + "----------------\n", + "\n", + "Summary of Model\n", + "----------------\n", + " Data type: GNSS\n", + " Model type: BLUE\n", + " Number of observations: 730\n", + " Model parameters:\n", + " x_0 = 1.181 +/- 4.647\n", + " x_1 = -0.021 +/- 0.003\n", + " x_2 = 0.160 +/- 0.035\n", + "----------------\n", + "\n" + ] + } + ], + "source": [ + "model_summary(m1_blue)\n", + "model_summary(m2_blue)" + ] + }, + { + "cell_type": "markdown", + "id": "0b4e08a3", + "metadata": {}, "source": [ - "<div style=\"background-color:#facb8e; color: black; width:95%; vertical-align: middle; padding:15px; margin: 10px; border-radius: 10px\"> <p><strong>IMPORTANT</strong>\n", + "<div style=\"background-color:#AABAB2; color: black; width:95%; vertical-align: middle; padding:15px; margin: 10px; border-radius: 10px\">\n", + "<p>\n", + "<b>Task 0.2: </b> \n", " \n", - "Depending on how your code is written, it is highly likely you will over-write variable names from Part 1, so be careful if you re-run cells above.</p></div>" + "Run the cell below to activate the widget. Play with the parameter values until you are completely familiar with what each one does, and confirm that the model results from last week are indeed those that minimize the mean of the squared residuals.\n", + " \n", + "</p>\n", + "</div>" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "73af3940", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3a29a2a56b5e467aba6a3d11c46a5292", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(FloatSlider(value=0.0, description='x0', max=10.0, min=-10.0), FloatSlider(value=0.0, de…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x0_slider = widgets.FloatSlider(value=0, min=-10, max=10, step=0.1, description='x0')\n", + "x1_slider = widgets.FloatSlider(value=0, min=-0.1, max=0.1, step=0.001, description='x1')\n", + "x2_slider = widgets.FloatSlider(value=1, min=-1, max=1, step=0.01, description='x2')\n", + "\n", + "interact(update_plot, x0=x0_slider, x1=x1_slider, x2=x2_slider, x3=fixed(None),\n", + " m=[('Data Type 1', m1_blue), ('Data Type 2', m2_blue)]);\n" + ] + }, + { + "cell_type": "markdown", + "id": "36948058", + "metadata": {}, + "source": [ + "### Preparing for Another Set of Models\n", + "\n", + "Hopefully you found it useful for keeping track of our models in dictionaries. If not, well---maybe you will after this GA, because we are going to do it again! The next cell takes care of initializing a new set of dictionaries for our work \n", + "\n", + "_Tip: if you would like to see a list of the keys and values in a dictionary, you can try adapting this code:_\n", + "\n", + "```\n", + "print(\"Keys and Values:\")\n", + "for key, value in YOUR_DICTIONARY_NAME.items():\n", + " print(f\"{key:16s} --> {value}\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "62dff8b6", + "metadata": {}, + "source": [ + "<div style=\"background-color:#AABAB2; color: black; width:95%; vertical-align: middle; padding:15px; margin: 10px; border-radius: 10px\">\n", + "<p>\n", + "<b>Task 0.3: </b> \n", + " \n", + "Run the cell below to initialize the dictionaries for our non-linear least squares models. Once again, we will have a model for GNSS and another for INSAR data.\n", + " \n", + "</p>\n", + "</div>" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "712e4114", + "metadata": {}, + "outputs": [], + "source": [ + "def initialize_new_dict(d_old):\n", + " d = {}\n", + " d['data_type'] = d_old['data_type']\n", + " d['model_type'] = 'Non-Linear Least Squares'\n", + " d['times'] = d_old['times']\n", + " d['y'] = d_old['y']\n", + " d['days'] = d_old['days']\n", + " d['groundwater'] = d_old['groundwater']\n", + " return d\n", + "\n", + "m1 = initialize_new_dict(m1_blue)\n", + "m2 = initialize_new_dict(m2_blue)\n", + "\n" ] }, { @@ -40,9 +260,9 @@ "id": "80a9b60f" }, "source": [ - "## 8. Set-up non-linear functional model\n", + "## Part 1: Set-up Non-Linear Functional Model\n", "\n", - "In the model we fitted so far, we only considered a linear velocity and a dependency on the grounwater level. However, when a heavy construction is built on 'soft' soil layers, compaction of the upper layers will be observed. This compaction is relatively large in the beginning but decreases when time passes. We can approach the behavior with a simplified model, assuming an exponential decay. \n", + "In the model we fitted using BLUE, we only considered a linear velocity and a dependency on the groundwater level. However, when a heavy construction is built on 'soft' soil layers, compaction of the upper layers will be observed. This compaction is relatively large in the beginning but decreases when time passes. We can approach the behavior with a simplified model, assuming an exponential decay. \n", "\n", "*Please note that this is very simplified model that does not necessarily rely on physics.* \n", "\n", @@ -69,7 +289,7 @@ "source": [ "<div style=\"background-color:#AABAB2; color: black; width:95%; vertical-align: middle; padding:15px; margin: 10px; border-radius: 10px\">\n", "<p>\n", - "<b>Task 8.1: </b> \n", + "<b>Task 1.1: </b> \n", " \n", "Choose initial values for the model parameters. Use the code and Markdown cells below to justify your decision. We suggest two possible approaches: a) use the forward model and make a plot to see if you can get it in the right order of magnitude, or b) make an inference about what the values might be using knowledge about each term in the model.\n", " \n", diff --git a/content/GA_1_4/auxiliary_files/m1.pickle b/content/GA_1_4/auxiliary_files/m1_blue.pickle similarity index 100% rename from content/GA_1_4/auxiliary_files/m1.pickle rename to content/GA_1_4/auxiliary_files/m1_blue.pickle diff --git a/content/GA_1_4/auxiliary_files/m2.pickle b/content/GA_1_4/auxiliary_files/m2_blue.pickle similarity index 100% rename from content/GA_1_4/auxiliary_files/m2.pickle rename to content/GA_1_4/auxiliary_files/m2_blue.pickle diff --git a/content/GA_1_4/functions.py b/content/GA_1_4/functions.py index ef73a8d2e4693511b6d6fce11967b5fb7fddb082..b3491035ecaeb6f056d262f34492c239a4ebbb38 100644 --- a/content/GA_1_4/functions.py +++ b/content/GA_1_4/functions.py @@ -1,4 +1,5 @@ - +import os +import pickle import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -109,45 +110,73 @@ def plot_residual_histogram(d): return fig, ax -def xhat_slider_plot(A, y, t, Sigma_y=None): - """Interactive plot of the solution space for x_hat.""" - n, k = A.shape - - if Sigma_y is None: - Sigma_y = np.eye(n) - xhat = np.linalg.inv(A.T @ np.linalg.inv(Sigma_y) @ A) @ A.T @ np.linalg.inv(Sigma_y) @ y - Sigma_xhat = np.linalg.inv(A.T @ np.linalg.inv(Sigma_y) @ A) - std_xhat = np.sqrt(np.diag(Sigma_xhat)) - - sliders = {} - for i in range(k): - sliders[f'xhat_{i}'] = widgets.FloatSlider(value=xhat[i], - min=xhat[i] - 10*std_xhat[i], - max=xhat[i] + 10*std_xhat[i], - step=0.1*std_xhat[i], - description=f'xhat_{i}') - - def update_plot(**kwargs): - xhat_values = np.array([kwargs[f'xhat_{i}'] for i in range(k)]) - y_fit = A @ xhat_values - W = np.linalg.inv(Sigma_y) - ss_res = (y - y_fit).T @ W @ (y - y_fit) - - plt.figure(figsize=(10, 5)) - plt.plot(t, y, 'o', label='data') - plt.plot(t, y_fit, label='y_fit') - plt.title(f'Mean of squared residuals: {ss_res:.2f}') - plt.ylabel('y') - plt.xlabel('t') - plt.grid() - plt.legend() - plt.show() - - interact(update_plot, **sliders) - -# Example usage -# A, y, t should be defined before calling this function -# xhat_slider_plot(A, y, t) +def update_plot(x0, x1, x2, x3, m): + plt.figure(figsize=(15,5)) + plt.plot(m['times'], m['y'], 'o', label=m['data_type']) + plt.ylabel('Displacement [mm]') + plt.xlabel('Time') + + if x3 is None: + y_fit = m['A'] @ [x0, x1, x2] + if (x0 == 0) & (x1 == 0) & (x2 == 1): + plt.plot(m['times'], y_fit, 'r', label='Groundwater data', linewidth=2) + else: + plt.plot(m['times'], y_fit, 'r', label='Fit', linewidth=2) + else: + y_fit = m['A'] @ [x0, x1, x2, x3] + if (x0 == 0) & (x1 == 0) & (x2 == 0) & (x3 == 1): + plt.plot(m['times'], y_fit, 'r', label='Groundwater data', linewidth=2) + else: + plt.plot(m['times'], y_fit, 'r', label='Fit', linewidth=2) + + + W = np.linalg.inv(m['Sigma_Y']) + ss_res = (m['y'] - y_fit).T @ W @ (m['y'] - y_fit) + plt.title(f'Mean of squared residuals: {ss_res:.0f}') + plt.ylim(-150, 30) + plt.grid() + plt.legend() + plt.show() + +# def xhat_slider_plot(A, y, t, Sigma_y=None): +# """Interactive plot of the solution space for x_hat.""" +# n, k = A.shape + +# if Sigma_y is None: +# Sigma_y = np.eye(n) +# xhat = np.linalg.inv(A.T @ np.linalg.inv(Sigma_y) @ A) @ A.T @ np.linalg.inv(Sigma_y) @ y +# Sigma_xhat = np.linalg.inv(A.T @ np.linalg.inv(Sigma_y) @ A) +# std_xhat = np.sqrt(np.diag(Sigma_xhat)) + +# sliders = {} +# for i in range(k): +# sliders[f'xhat_{i}'] = widgets.FloatSlider(value=xhat[i], +# min=xhat[i] - 10*std_xhat[i], +# max=xhat[i] + 10*std_xhat[i], +# step=0.1*std_xhat[i], +# description=f'xhat_{i}') + +# def update_plot(**kwargs): +# xhat_values = np.array([kwargs[f'xhat_{i}'] for i in range(k)]) +# y_fit = A @ xhat_values +# W = np.linalg.inv(Sigma_y) +# ss_res = (y - y_fit).T @ W @ (y - y_fit) + +# plt.figure(figsize=(10, 5)) +# plt.plot(t, y, 'o', label='data') +# plt.plot(t, y_fit, label='y_fit') +# plt.title(f'Mean of squared residuals: {ss_res:.2f}') +# plt.ylabel('y') +# plt.xlabel('t') +# plt.grid() +# plt.legend() +# plt.show() + +# interact(update_plot, **sliders) + +# # Example usage +# # A, y, t should be defined before calling this function +# # xhat_slider_plot(A, y, t) def to_days_years(times): '''Convert the observation times to days and years.''' @@ -212,4 +241,12 @@ def model_summary(d): for i in range(len(d['x_hat'])): print(f' x_{i} = {d["x_hat"][i]:6.3f}' + f' +/- {np.sqrt(d["Sigma_X_hat"][i,i]):6.3f}') - print('----------------\n') \ No newline at end of file + print('----------------\n') + +def load_pickle_file(filename): + directory = os.path.join(os.path.dirname(__file__), 'auxiliary_files') + filepath = os.path.join(directory, filename) + # Load the data from the pickle file + with open(os.path.normpath(filepath), 'rb') as file: + data = pickle.load(file) + return data \ No newline at end of file diff --git a/content/GA_1_4/setup.py b/content/GA_1_4/setup.py index 7befd7e3119c3c68ccbd48191594f4e1c32bce2a..9e2ccab293ae5134c47bb2e06d2c84933e1d7731 100644 --- a/content/GA_1_4/setup.py +++ b/content/GA_1_4/setup.py @@ -68,12 +68,14 @@ m2 = get_CI(m2, 0.04) # Save models -with open(get_path_aux('m1.pickle'), 'wb') as file: - pickle.dump(m1, file) -print('\nModel 1 saved as m1.pickle\n') -model_summary(m1) - -with open(get_path_aux('m2.pickle'), 'wb') as file: - pickle.dump(m2, file) -print('\nModel 2 saved as m2.pickle\n') -model_summary(m2) +m1_blue = m1 +with open(get_path_aux('m1_blue.pickle'), 'wb') as file: + pickle.dump(m1_blue, file) +print('\nModel 1 saved as m1_blue.pickle\n') +model_summary(m1_blue) + +m2_blue = m2 +with open(get_path_aux('m2_blue.pickle'), 'wb') as file: + pickle.dump(m2_blue, file) +print('\nModel 2 saved as m2_blue.pickle\n') +model_summary(m2_blue)