{ "cells": [ { "cell_type": "markdown", "id": "9805db07", "metadata": { "tags": [] }, "source": [ "# 01 - Compute Long Term Averages\n", "For each case, compute the global 3D (`nlat`, `nlon`, `z_t`) mean" ] }, { "cell_type": "markdown", "id": "89d2a8f2", "metadata": {}, "source": [ "## Imports\n", "We include the lines at the beginning to make sure that any updates we make to the `analysis_config.yml` file are reflected in real time for this notebook" ] }, { "cell_type": "code", "execution_count": 1, "id": "cf252e5e", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import intake\n", "import ast\n", "import yaml\n", "from distributed import Client\n", "from ncar_jobqueue import NCARCluster\n", "import xarray as xr\n", "from config import analysis_config" ] }, { "cell_type": "markdown", "id": "812c62d2", "metadata": {}, "source": [ "## Spin up a Dask Cluster" ] }, { "cell_type": "code", "execution_count": 4, "id": "85054be4", "metadata": {}, "outputs": [], "source": [ "cluster = NCARCluster()\n", "cluster.scale(20)\n", "client = Client(cluster)" ] }, { "cell_type": "code", "execution_count": 5, "id": "49fe45d9-278e-42bc-a25c-00c4c54ce7b5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "
\n", "

Client

\n", "

Client-1dcbcc90-1b50-11ec-a00d-3cecef1aca66

\n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "
Connection method: Cluster objectCluster type: dask_jobqueue.PBSCluster
\n", " Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status\n", "
\n", "\n", " \n", "
\n", "

Cluster Info

\n", "
\n", "
\n", "
\n", "
\n", "

PBSCluster

\n", "

d49b1dd6

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status\n", " \n", " Workers: 0\n", "
\n", " Total threads: 0\n", " \n", " Total memory: 0 B\n", "
\n", "\n", "
\n", " \n", "

Scheduler Info

\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "

Scheduler

\n", "

Scheduler-634ffb9a-5903-401d-b456-fb1cf05d6a48

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " Comm: tcp://10.12.206.39:39709\n", " \n", " Workers: 0\n", "
\n", " Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mgrover/proxy/8787/status\n", " \n", " Total threads: 0\n", "
\n", " Started: Just now\n", " \n", " Total memory: 0 B\n", "
\n", "
\n", "
\n", "\n", "
\n", " \n", "

Workers

\n", "
\n", "\n", " \n", "\n", "
\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", " \n", "\n", "
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client" ] }, { "cell_type": "code", "execution_count": 6, "id": "3f5e8cb2", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "da686697ddf9497ab548d98948152a29", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Tab(children=(HTML(value='
None catalog with 12 dataset(s) from 11103 asset(s):

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique
component1
stream4
date2501
case3
member_id2
frequency4
variables545
path11103
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data_catalog = intake.open_esm_datastore(\n", " analysis_config[\"catalog_json\"],\n", " csv_kwargs={\"converters\": {\"variables\": ast.literal_eval}},\n", " sep=\"/\",\n", ")\n", "data_catalog" ] }, { "cell_type": "code", "execution_count": 8, "id": "8b343635", "metadata": {}, "outputs": [ { "data": { "text/html": [ "

None catalog with 3 dataset(s) from 3600 asset(s):

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique
component1
stream1
date1200
case3
member_id2
frequency1
variables434
path3600
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data_catalog_subset = data_catalog.search(\n", " frequency='month_1',\n", ")\n", "data_catalog_subset" ] }, { "cell_type": "markdown", "id": "52e684fd", "metadata": {}, "source": [ "### Subset the last 20 years of data" ] }, { "cell_type": "code", "execution_count": 9, "id": "4da47eda", "metadata": {}, "outputs": [], "source": [ "dates = sorted(data_catalog_subset.df.date.unique())" ] }, { "cell_type": "code", "execution_count": 11, "id": "d46b628a", "metadata": {}, "outputs": [], "source": [ "data_catalog_subset = data_catalog_subset.search(variables=analysis_config['variables'],\n", " date=dates[-240:])" ] }, { "cell_type": "markdown", "id": "125230fb", "metadata": {}, "source": [ "### Read in our dataset using `to_dataset_dict()`" ] }, { "cell_type": "code", "execution_count": 12, "id": "5bc90302", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--> The keys in the returned dictionary of datasets are constructed as follows:\n", "\t'component/stream/case'\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " 100.00% [3/3 00:02<00:00]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dsets = data_catalog_subset.to_dataset_dict(cdf_kwargs={'use_cftime': True, 'chunks': {'time': 60}})" ] }, { "cell_type": "code", "execution_count": 25, "id": "02eea06d-7599-45fc-b4c6-7694511f9fa0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:           (time: 240, nlat: 384, nlon: 320, z_t: 60, z_t_150m: 15)\n",
       "Coordinates:\n",
       "  * time              (time) object 0081-02-01 00:00:00 ... 0101-01-01 00:00:00\n",
       "  * z_t               (z_t) float32 500.0 1.5e+03 ... 5.125e+05 5.375e+05\n",
       "  * z_t_150m          (z_t_150m) float32 500.0 1.5e+03 ... 1.35e+04 1.45e+04\n",
       "    ULONG             (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>\n",
       "    ULAT              (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>\n",
       "    TLONG             (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>\n",
       "    TLAT              (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>\n",
       "Dimensions without coordinates: nlat, nlon\n",
       "Data variables: (12/27)\n",
       "    SFWF              (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    photoC_TOT_zint   (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    SiO3              (time, z_t, nlat, nlon) float32 dask.array<chunksize=(1, 60, 384, 320), meta=np.ndarray>\n",
       "    BSF               (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    photoC_sp_zint    (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    photoC_diat_zint  (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    ...                ...\n",
       "    CaCO3_FLUX_100m   (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    diaz_Nfix         (time, z_t_150m, nlat, nlon) float32 dask.array<chunksize=(1, 15, 384, 320), meta=np.ndarray>\n",
       "    SHF               (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    DOCr              (time, z_t, nlat, nlon) float32 dask.array<chunksize=(1, 60, 384, 320), meta=np.ndarray>\n",
       "    SHF_QSW           (time, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    NH4               (time, z_t, nlat, nlon) float32 dask.array<chunksize=(1, 60, 384, 320), meta=np.ndarray>\n",
       "Attributes:\n",
       "    cell_methods:            cell_methods = time: mean ==> the variable value...\n",
       "    contents:                Diagnostic and Prognostic Variables\n",
       "    title:                   b1850.f19_g17.validation_mct.004\n",
       "    revision:                $Id$\n",
       "    time_period_freq:        month_1\n",
       "    Conventions:             CF-1.0; http://www.cgd.ucar.edu/cms/eaton/netcdf...\n",
       "    calendar:                All years have exactly  365 days.\n",
       "    source:                  CCSM POP2, the CCSM Ocean Component\n",
       "    model_doi_url:           https://doi.org/10.5065/D67H1H0V\n",
       "    intake_esm_varname:      SFWF\\nphotoC_TOT_zint\\nSiO3\\nBSF\\nphotoC_sp_zint...\n",
       "    history:                 none\n",
       "    intake_esm_dataset_key:  ocn/pop.h/b1850.f19_g17.validation_mct.004
" ], "text/plain": [ "\n", "Dimensions: (time: 240, nlat: 384, nlon: 320, z_t: 60, z_t_150m: 15)\n", "Coordinates:\n", " * time (time) object 0081-02-01 00:00:00 ... 0101-01-01 00:00:00\n", " * z_t (z_t) float32 500.0 1.5e+03 ... 5.125e+05 5.375e+05\n", " * z_t_150m (z_t_150m) float32 500.0 1.5e+03 ... 1.35e+04 1.45e+04\n", " ULONG (nlat, nlon) float64 dask.array\n", " ULAT (nlat, nlon) float64 dask.array\n", " TLONG (nlat, nlon) float64 dask.array\n", " TLAT (nlat, nlon) float64 dask.array\n", "Dimensions without coordinates: nlat, nlon\n", "Data variables: (12/27)\n", " SFWF (time, nlat, nlon) float32 dask.array\n", " photoC_TOT_zint (time, nlat, nlon) float32 dask.array\n", " SiO3 (time, z_t, nlat, nlon) float32 dask.array\n", " BSF (time, nlat, nlon) float32 dask.array\n", " photoC_sp_zint (time, nlat, nlon) float32 dask.array\n", " photoC_diat_zint (time, nlat, nlon) float32 dask.array\n", " ... ...\n", " CaCO3_FLUX_100m (time, nlat, nlon) float32 dask.array\n", " diaz_Nfix (time, z_t_150m, nlat, nlon) float32 dask.array\n", " SHF (time, nlat, nlon) float32 dask.array\n", " DOCr (time, z_t, nlat, nlon) float32 dask.array\n", " SHF_QSW (time, nlat, nlon) float32 dask.array\n", " NH4 (time, z_t, nlat, nlon) float32 dask.array\n", "Attributes:\n", " cell_methods: cell_methods = time: mean ==> the variable value...\n", " contents: Diagnostic and Prognostic Variables\n", " title: b1850.f19_g17.validation_mct.004\n", " revision: $Id$\n", " time_period_freq: month_1\n", " Conventions: CF-1.0; http://www.cgd.ucar.edu/cms/eaton/netcdf...\n", " calendar: All years have exactly 365 days.\n", " source: CCSM POP2, the CCSM Ocean Component\n", " model_doi_url: https://doi.org/10.5065/D67H1H0V\n", " intake_esm_varname: SFWF\\nphotoC_TOT_zint\\nSiO3\\nBSF\\nphotoC_sp_zint...\n", " history: none\n", " intake_esm_dataset_key: ocn/pop.h/b1850.f19_g17.validation_mct.004" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dsets[f\"ocn/pop.h/{analysis_config['reference_case_name']}\"]" ] }, { "cell_type": "markdown", "id": "ccc89ca5", "metadata": { "tags": [] }, "source": [ "## Loop through the data and compute!\n", "We are computing the average over time, and merging into a single dataset, subsetting for the variables specified in the `analysis_config.yml` file" ] }, { "cell_type": "code", "execution_count": 14, "id": "e4a25693", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xr.set_options(keep_attrs=True)" ] }, { "cell_type": "code", "execution_count": 17, "id": "42604ab5", "metadata": {}, "outputs": [], "source": [ "ds_list = []\n", "for key in dsets.keys():\n", " ds = dsets[key]\n", " mean = ds.mean(dim='time')\n", " ds_list.append(mean)" ] }, { "cell_type": "code", "execution_count": 18, "id": "ba2ba644", "metadata": {}, "outputs": [], "source": [ "merged_ds = xr.concat(ds_list, dim='case')" ] }, { "cell_type": "markdown", "id": "52401b90", "metadata": {}, "source": [ "We also want to make sure that we keep the title, or case information" ] }, { "cell_type": "code", "execution_count": 19, "id": "89750ef9", "metadata": {}, "outputs": [], "source": [ "cases = []\n", "for ds in ds_list:\n", " cases.append(ds.title)" ] }, { "cell_type": "code", "execution_count": 20, "id": "fa40af5f", "metadata": {}, "outputs": [], "source": [ "merged_ds['case'] = cases" ] }, { "cell_type": "code", "execution_count": 27, "id": "e6985cef", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:           (case: 3, nlat: 384, nlon: 320, z_t: 60, z_t_150m: 15)\n",
       "Coordinates:\n",
       "  * z_t               (z_t) float32 500.0 1.5e+03 ... 5.125e+05 5.375e+05\n",
       "  * z_t_150m          (z_t_150m) float32 500.0 1.5e+03 ... 1.35e+04 1.45e+04\n",
       "    ULONG             (nlat, nlon) float64 321.1 322.3 323.4 ... 319.6 320.0\n",
       "    ULAT              (nlat, nlon) float64 -78.95 -78.95 -78.95 ... 72.41 72.41\n",
       "    TLONG             (nlat, nlon) float64 320.6 321.7 322.8 ... 319.4 319.8\n",
       "    TLAT              (nlat, nlon) float64 -79.22 -79.22 -79.22 ... 72.19 72.19\n",
       "  * case              (case) <U34 'b1850.f19_g17.validation_mct.004' ... 'b18...\n",
       "Dimensions without coordinates: nlat, nlon\n",
       "Data variables: (12/27)\n",
       "    SFWF              (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    photoC_TOT_zint   (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    SiO3              (case, z_t, nlat, nlon) float32 dask.array<chunksize=(1, 60, 384, 320), meta=np.ndarray>\n",
       "    BSF               (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    photoC_sp_zint    (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    photoC_diat_zint  (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    ...                ...\n",
       "    CaCO3_FLUX_100m   (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    diaz_Nfix         (case, z_t_150m, nlat, nlon) float32 dask.array<chunksize=(1, 15, 384, 320), meta=np.ndarray>\n",
       "    SHF               (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    DOCr              (case, z_t, nlat, nlon) float32 dask.array<chunksize=(1, 60, 384, 320), meta=np.ndarray>\n",
       "    SHF_QSW           (case, nlat, nlon) float32 dask.array<chunksize=(1, 384, 320), meta=np.ndarray>\n",
       "    NH4               (case, z_t, nlat, nlon) float32 dask.array<chunksize=(1, 60, 384, 320), meta=np.ndarray>\n",
       "Attributes:\n",
       "    cell_methods:            cell_methods = time: mean ==> the variable value...\n",
       "    contents:                Diagnostic and Prognostic Variables\n",
       "    title:                   b1850.f19_g17.validation_mct.004\n",
       "    revision:                $Id$\n",
       "    time_period_freq:        month_1\n",
       "    Conventions:             CF-1.0; http://www.cgd.ucar.edu/cms/eaton/netcdf...\n",
       "    calendar:                All years have exactly  365 days.\n",
       "    source:                  CCSM POP2, the CCSM Ocean Component\n",
       "    model_doi_url:           https://doi.org/10.5065/D67H1H0V\n",
       "    intake_esm_varname:      SFWF\\nphotoC_TOT_zint\\nSiO3\\nBSF\\nphotoC_sp_zint...\n",
       "    history:                 none\n",
       "    intake_esm_dataset_key:  ocn/pop.h/b1850.f19_g17.validation_mct.004
" ], "text/plain": [ "\n", "Dimensions: (case: 3, nlat: 384, nlon: 320, z_t: 60, z_t_150m: 15)\n", "Coordinates:\n", " * z_t (z_t) float32 500.0 1.5e+03 ... 5.125e+05 5.375e+05\n", " * z_t_150m (z_t_150m) float32 500.0 1.5e+03 ... 1.35e+04 1.45e+04\n", " ULONG (nlat, nlon) float64 321.1 322.3 323.4 ... 319.6 320.0\n", " ULAT (nlat, nlon) float64 -78.95 -78.95 -78.95 ... 72.41 72.41\n", " TLONG (nlat, nlon) float64 320.6 321.7 322.8 ... 319.4 319.8\n", " TLAT (nlat, nlon) float64 -79.22 -79.22 -79.22 ... 72.19 72.19\n", " * case (case) \n", " photoC_TOT_zint (case, nlat, nlon) float32 dask.array\n", " SiO3 (case, z_t, nlat, nlon) float32 dask.array\n", " BSF (case, nlat, nlon) float32 dask.array\n", " photoC_sp_zint (case, nlat, nlon) float32 dask.array\n", " photoC_diat_zint (case, nlat, nlon) float32 dask.array\n", " ... ...\n", " CaCO3_FLUX_100m (case, nlat, nlon) float32 dask.array\n", " diaz_Nfix (case, z_t_150m, nlat, nlon) float32 dask.array\n", " SHF (case, nlat, nlon) float32 dask.array\n", " DOCr (case, z_t, nlat, nlon) float32 dask.array\n", " SHF_QSW (case, nlat, nlon) float32 dask.array\n", " NH4 (case, z_t, nlat, nlon) float32 dask.array\n", "Attributes:\n", " cell_methods: cell_methods = time: mean ==> the variable value...\n", " contents: Diagnostic and Prognostic Variables\n", " title: b1850.f19_g17.validation_mct.004\n", " revision: $Id$\n", " time_period_freq: month_1\n", " Conventions: CF-1.0; http://www.cgd.ucar.edu/cms/eaton/netcdf...\n", " calendar: All years have exactly 365 days.\n", " source: CCSM POP2, the CCSM Ocean Component\n", " model_doi_url: https://doi.org/10.5065/D67H1H0V\n", " intake_esm_varname: SFWF\\nphotoC_TOT_zint\\nSiO3\\nBSF\\nphotoC_sp_zint...\n", " history: none\n", " intake_esm_dataset_key: ocn/pop.h/b1850.f19_g17.validation_mct.004" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_ds" ] }, { "cell_type": "markdown", "id": "e9a6c29f", "metadata": {}, "source": [ "### Export our data\n", "We output our dataset to zarr!" ] }, { "cell_type": "code", "execution_count": 23, "id": "6440c3bb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_ds.to_zarr('cached_output/averages_year_081_100.zarr', mode='w')" ] }, { "cell_type": "code", "execution_count": null, "id": "0e83058b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:miniconda3-cesm2-marbl]", "language": "python", "name": "conda-env-miniconda3-cesm2-marbl-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }