{ "cells": [ { "cell_type": "markdown", "id": "ca114622-27c4-46db-be66-2c489639a83f", "metadata": {}, "source": [ "# Data Exploration\n", "## Volumetric Soil Water Layer 1 (0-7 cm): \"swvl1\"\n", "### 30 years of hourly values over the Maritime Continent of 1x1 degree spatial resolution\n", "### Task: \n", "1. Load a variable from 1991 to 2020 (load multiple GRIB files) \n", "2. Find out statistics (mean and standard deviation) for each year \n", "3. Find out statistics for the whole period (memory intensive): use DASK\n", "3. Save results (maps) as png files and stored to the Gallery folder\n", "4. Locate when and where the values below 0 and top 10 percent values" ] }, { "cell_type": "code", "execution_count": 1, "id": "98ab6d49-86c5-48a9-9abe-a19ca5de8826", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:31:23.787738Z", "start_time": "2022-01-24T20:31:22.395405Z" }, "execution": { "iopub.execute_input": "2022-01-21T00:50:02.308764Z", "iopub.status.busy": "2022-01-21T00:50:02.308541Z", "iopub.status.idle": "2022-01-21T00:50:02.312282Z", "shell.execute_reply": "2022-01-21T00:50:02.311679Z", "shell.execute_reply.started": "2022-01-21T00:50:02.308747Z" }, "tags": [] }, "outputs": [], "source": [ "import metview as mv \n", "import xarray as xr\n", "from dask.distributed import Client\n", "\n", "#import dask.array as da" ] }, { "cell_type": "code", "execution_count": 2, "id": "2b1b0f2b", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:31:34.822796Z", "start_time": "2022-01-24T20:31:33.678292Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "
\n", "

Client

\n", "

Client-9ed3a326-7d54-11ec-828f-1d280296b0a8

\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "
Connection method: Cluster objectCluster type: distributed.LocalCluster
\n", " Dashboard: http://127.0.0.1:8787/status\n", "
\n", "\n", " \n", "
\n", "

Cluster Info

\n", "
\n", "
\n", "
\n", "
\n", "

LocalCluster

\n", "

148400f2

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", "
\n", " Dashboard: http://127.0.0.1:8787/status\n", " \n", " Workers: 4\n", "
\n", " Total threads: 8\n", " \n", " Total memory: 30.47 GiB\n", "
Status: runningUsing processes: True
\n", "\n", "
\n", " \n", "

Scheduler Info

\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "

Scheduler

\n", "

Scheduler-02d744b7-2ccf-42cd-9ad4-249fe109f620

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " Comm: tcp://127.0.0.1:35655\n", " \n", " Workers: 4\n", "
\n", " Dashboard: http://127.0.0.1:8787/status\n", " \n", " Total threads: 8\n", "
\n", " Started: Just now\n", " \n", " Total memory: 30.47 GiB\n", "
\n", "
\n", "
\n", "\n", "
\n", " \n", "

Workers

\n", "
\n", "\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 0

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:41161\n", " \n", " Total threads: 2\n", "
\n", " Dashboard: http://127.0.0.1:44279/status\n", " \n", " Memory: 7.62 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:39735\n", "
\n", " Local directory: /mnt/c/Users/andis/OneDrive - UW-Madison/ERA5-Land/swvl1/dask-worker-space/worker-9y4yz6uf\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 1

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:46883\n", " \n", " Total threads: 2\n", "
\n", " Dashboard: http://127.0.0.1:32987/status\n", " \n", " Memory: 7.62 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:40575\n", "
\n", " Local directory: /mnt/c/Users/andis/OneDrive - UW-Madison/ERA5-Land/swvl1/dask-worker-space/worker-7rzxhr59\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 2

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:38927\n", " \n", " Total threads: 2\n", "
\n", " Dashboard: http://127.0.0.1:32833/status\n", " \n", " Memory: 7.62 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:44619\n", "
\n", " Local directory: /mnt/c/Users/andis/OneDrive - UW-Madison/ERA5-Land/swvl1/dask-worker-space/worker-b8mififc\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "

Worker: 3

\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", "\n", "
\n", " Comm: tcp://127.0.0.1:43563\n", " \n", " Total threads: 2\n", "
\n", " Dashboard: http://127.0.0.1:42391/status\n", " \n", " Memory: 7.62 GiB\n", "
\n", " Nanny: tcp://127.0.0.1:43279\n", "
\n", " Local directory: /mnt/c/Users/andis/OneDrive - UW-Madison/ERA5-Land/swvl1/dask-worker-space/worker-ujyumazu\n", "
\n", "
\n", "
\n", "
\n", " \n", "\n", "
\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", " \n", "\n", "
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client = Client(n_workers=4, \n", " threads_per_worker=2, \n", " memory_limit='12GB')\n", "client" ] }, { "cell_type": "code", "execution_count": 3, "id": "a0f7c1af-909b-4913-9c4a-9b7c0e90e257", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:33:55.023571Z", "start_time": "2022-01-24T20:31:45.355174Z" }, "execution": { "iopub.execute_input": "2022-01-20T22:53:01.863457Z", "iopub.status.busy": "2022-01-20T22:53:01.863151Z", "iopub.status.idle": "2022-01-20T22:54:25.946803Z", "shell.execute_reply": "2022-01-20T22:54:25.946188Z", "shell.execute_reply.started": "2022-01-20T22:53:01.863426Z" }, "tags": [] }, "outputs": [], "source": [ "fs = mv.Fieldset(path='*_swvl1.grib')" ] }, { "cell_type": "markdown", "id": "6460dc84", "metadata": {}, "source": [ "Check the type of the variable that contains the GRIB files" ] }, { "cell_type": "code", "execution_count": 4, "id": "66728081", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:33:59.952448Z", "start_time": "2022-01-24T20:33:59.947376Z" } }, "outputs": [ { "data": { "text/plain": [ "metview.bindings.Fieldset" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(fs)" ] }, { "cell_type": "code", "execution_count": 5, "id": "cd19548d", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:40:36.960458Z", "start_time": "2022-01-24T20:40:36.956477Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "262992\n" ] } ], "source": [ "print(len(fs))" ] }, { "cell_type": "code", "execution_count": 6, "id": "631e857d", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:42:55.472549Z", "start_time": "2022-01-24T20:42:55.303101Z" }, "scrolled": true }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", "
shortNameswvl1
nameVolumetric soil water layer 1
paramId39
unitsm**3 m**-3
typeOfLeveldepthBelowLandLayer
level0
date19910101
time0
step0
classl5
streamoper
typean
experimentVersionNumber0001
" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fs[0].describe('swvl1')" ] }, { "cell_type": "code", "execution_count": 7, "id": "7b870f45", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:42:58.505269Z", "start_time": "2022-01-24T20:42:58.496126Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Variable Type Data/Info\n", "---------------------------------------------------------\n", "Client type \n", "NamespaceMagics MetaHasTraits mespace.NamespaceMagics'>\n", "client Client eads=8, memory=30.47 GiB>\n", "fs Fieldset Fieldset (262992 fields)\n", "get_ipython function \n", "getsizeof builtin_function_or_method \n", "json module hon3.7/json/__init__.py'>\n", "mv module ges/metview/__init__.py'>\n", "np module kages/numpy/__init__.py'>\n", "var_dic_list function \n", "xr module ages/xarray/__init__.py'>\n" ] } ], "source": [ "whos" ] }, { "cell_type": "markdown", "id": "32784381", "metadata": {}, "source": [ "The metview.bindings.Fieldset will be converted to xarray.Dataset using \".to_dataset()\". First, tell xarray to keep the variabel attributes throughout the computations" ] }, { "cell_type": "code", "execution_count": 8, "id": "3760ec91", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:43:01.537524Z", "start_time": "2022-01-24T20:43:01.533308Z" } }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xr.set_options(keep_attrs=True)" ] }, { "cell_type": "code", "execution_count": 9, "id": "cdaeaeb4", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:58:34.408910Z", "start_time": "2022-01-24T20:43:12.138927Z" } }, "outputs": [], "source": [ "ds = fs.to_dataset() # it takes 14-20 minutes\n", "\n", "# don't do this:\n", "# ds = fs.to_dataset(chunks={'time': 24*7}) since it takes forever\n", "# df = fs.to_dataframe() since there is no such function to convert fieldset to dataframe" ] }, { "cell_type": "code", "execution_count": 10, "id": "e65619b4", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:59:46.077673Z", "start_time": "2022-01-24T20:59:46.073091Z" } }, "outputs": [ { "data": { "text/plain": [ "xarray.core.dataset.Dataset" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(ds)" ] }, { "cell_type": "code", "execution_count": 11, "id": "4b2e473e", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:59:48.904877Z", "start_time": "2022-01-24T20:59:48.877731Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:              (time: 262992, latitude: 301, longitude: 801)\n",
       "Coordinates:\n",
       "    number               int64 0\n",
       "  * time                 (time) datetime64[ns] 1991-01-01 ... 2020-12-31T23:0...\n",
       "    step                 timedelta64[ns] 00:00:00\n",
       "    depthBelowLandLayer  float64 0.0\n",
       "  * latitude             (latitude) float64 15.0 14.9 14.8 ... -14.8 -14.9 -15.0\n",
       "  * longitude            (longitude) float64 90.0 90.1 90.2 ... 169.9 170.0\n",
       "    valid_time           (time) datetime64[ns] ...\n",
       "Data variables:\n",
       "    swvl1                (time, latitude, longitude) float32 ...\n",
       "Attributes:\n",
       "    GRIB_edition:            1\n",
       "    GRIB_centre:             ecmf\n",
       "    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts\n",
       "    GRIB_subCentre:          0\n",
       "    Conventions:             CF-1.7\n",
       "    institution:             European Centre for Medium-Range Weather Forecasts\n",
       "    history:                 2022-01-24T14:58 GRIB to CDM+CF via cfgrib-0.9.9...
" ], "text/plain": [ "\n", "Dimensions: (time: 262992, latitude: 301, longitude: 801)\n", "Coordinates:\n", " number int64 ...\n", " * time (time) datetime64[ns] 1991-01-01 ... 2020-12-31T23:0...\n", " step timedelta64[ns] ...\n", " depthBelowLandLayer float64 ...\n", " * latitude (latitude) float64 15.0 14.9 14.8 ... -14.8 -14.9 -15.0\n", " * longitude (longitude) float64 90.0 90.1 90.2 ... 169.9 170.0\n", " valid_time (time) datetime64[ns] ...\n", "Data variables:\n", " swvl1 (time, latitude, longitude) float32 ...\n", "Attributes:\n", " GRIB_edition: 1\n", " GRIB_centre: ecmf\n", " GRIB_centreDescription: European Centre for Medium-Range Weather Forecasts\n", " GRIB_subCentre: 0\n", " Conventions: CF-1.7\n", " institution: European Centre for Medium-Range Weather Forecasts\n", " history: 2022-01-24T14:58 GRIB to CDM+CF via cfgrib-0.9.9..." ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds" ] }, { "cell_type": "markdown", "id": "5b4a590a", "metadata": {}, "source": [ "Processing a large dataset (xarray dataset) needs multiprocessing (DASK). First, divide the large data into chunks for parallel process using dask" ] }, { "cell_type": "code", "execution_count": null, "id": "e77a6759", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T15:20:26.945968Z", "start_time": "2022-01-24T15:20:19.748091Z" } }, "outputs": [], "source": [ "swvl1_monmean = ds.swvl1.chunk(chunks={'latitude': 15, 'longitude': 40, 'time': 24*5}).groupby('time.month').mean('time')" ] }, { "cell_type": "code", "execution_count": null, "id": "3606870f", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T15:20:26.973420Z", "start_time": "2022-01-24T15:20:26.947727Z" } }, "outputs": [], "source": [ "swvl1_monmean" ] }, { "cell_type": "code", "execution_count": null, "id": "908f3dc1", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.366Z" } }, "outputs": [], "source": [ "swvl1_monmean.compute()" ] }, { "cell_type": "code", "execution_count": null, "id": "2b555840", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.367Z" } }, "outputs": [], "source": [ "swvl1_monmean" ] }, { "cell_type": "code", "execution_count": null, "id": "d4ea77a5", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.368Z" } }, "outputs": [], "source": [ "swvl1_monmean.sel(month=1).plot(figsize=(12,4))" ] }, { "cell_type": "code", "execution_count": null, "id": "612a54a3", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.369Z" } }, "outputs": [], "source": [ "ds" ] }, { "cell_type": "code", "execution_count": null, "id": "9f896a4b", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.370Z" } }, "outputs": [], "source": [ "swvl1_hrmean = ds.swvl1.chunk(chunks={'latitude': 15, 'longitude': 40, 'time': 24*7}).groupby('time.hour').mean('time')" ] }, { "cell_type": "code", "execution_count": null, "id": "e1f29454", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.371Z" } }, "outputs": [], "source": [ "swvl1_hrmean" ] }, { "cell_type": "code", "execution_count": null, "id": "a9a8532d", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.372Z" } }, "outputs": [], "source": [ "swvl1_hrmean.compute()" ] }, { "cell_type": "code", "execution_count": null, "id": "6b90b4e0", "metadata": { "ExecuteTime": { "start_time": "2022-01-24T15:04:45.373Z" } }, "outputs": [], "source": [ "swvl1_hrmean" ] }, { "cell_type": "code", "execution_count": null, "id": "523eb723", "metadata": { "ExecuteTime": { "end_time": "2022-01-24T20:31:10.402884Z", "start_time": "2022-01-24T20:31:10.072229Z" } }, "outputs": [], "source": [ "client.shutdown()" ] }, { "cell_type": "code", "execution_count": null, "id": "2406da41", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" }, "toc-autonumbering": false, "toc-showcode": false, "toc-showmarkdowntxt": false, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }