Contents
%load_ext autoreload
%autoreload 2
import os

from itertools import product

import pandas as pd
import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.colors as colors
import cmocean

import cartopy
import cartopy.crs as ccrs

import xpersist as xp
cache_dir = '/glade/p/cgd/oce/projects/cesm2-marbl/xpersist_cache/3d_fields'
if (os.path.isdir(cache_dir)):
    xp.settings['cache_dir'] = cache_dir
os.makedirs(cache_dir, exist_ok=True)

os.environ['CESMDATAROOT'] = '/glade/scratch/mclong/inputdata'
import pop_tools


import climo_utils as cu
import utils
import discrete_obs 

import plot
import intake
catalog = intake.open_esm_datastore('data/campaign-cesm2-cmip6-timeseries.json')
df = catalog.search(experiment='historical', component='ocn', stream='pop.h').df
variables = df.variable.unique()
[v for v in variables if 'Fe' in v or 'iron' in v.lower() or 'sed' in v.lower() or 'dust' in v.lower()]
['ATM_COARSE_DUST_FLUX_CPL',
 'ATM_FINE_DUST_FLUX_CPL',
 'Fe',
 'Fe_RIV_FLUX',
 'Fe_scavenge',
 'Fe_scavenge_rate',
 'Fefree',
 'IRON_FLUX',
 'Jint_100m_Fe',
 'P_iron_FLUX_100m',
 'P_iron_FLUX_IN',
 'P_iron_PROD',
 'P_iron_REMIN',
 'SEAICE_DUST_FLUX_CPL',
 'SedDenitrif',
 'bsiToSed',
 'calcToSed',
 'calcToSed_ALT_CO2',
 'diatFe',
 'diat_Fe_lim_Cweight_avg_100m',
 'diat_Fe_lim_surf',
 'diazFe',
 'diaz_Fe_lim_Cweight_avg_100m',
 'diaz_Fe_lim_surf',
 'dustToSed',
 'dust_FLUX_IN',
 'dust_REMIN',
 'pfeToSed',
 'photoFe_diat',
 'photoFe_diaz',
 'photoFe_sp',
 'pocToSed',
 'ponToSed',
 'popToSed',
 'spFe',
 'sp_Fe_lim_Cweight_avg_100m',
 'sp_Fe_lim_surf',
 'tend_zint_100m_Fe']
cluster, client = utils.get_ClusterClient()
cluster.scale(12) #adapt(minimum_jobs=0, maximum_jobs=24)
client
/glade/work/mclong/miniconda3/envs/cesm2-marbl/lib/python3.7/site-packages/distributed/dashboard/core.py:79: UserWarning: 
Port 8787 is already in use. 
Perhaps you already have a cluster running?
Hosting the diagnostics dashboard on a random port instead.
  warnings.warn("\n" + msg)

Client

Cluster

  • Workers: 0
  • Cores: 0
  • Memory: 0 B
ds_grid = pop_tools.get_grid('POP_gx1v7')
masked_area = ds_grid.TAREA.where(ds_grid.REGION_MASK > 0).fillna(0.).expand_dims('region')
masked_area #.plot()
/glade/work/mclong/miniconda3/envs/cesm2-marbl/lib/python3.7/site-packages/numba/np/ufunc/parallel.py:365: NumbaWarning: The TBB threading layer requires TBB version 2019.5 or later i.e., TBB_INTERFACE_VERSION >= 11005. Found TBB_INTERFACE_VERSION = 6103. The TBB threading layer is disabled.
  warnings.warn(problem)
<xarray.DataArray 'TAREA' (region: 1, nlat: 384, nlon: 320)>
array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [1.52530781e+13, 1.52530781e+13, 1.52530781e+13, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]])
Dimensions without coordinates: region, nlat, nlon
Attributes:
    units:        cm^2
    long_name:    area of T cells
    coordinates:  TLONG TLAT

dust_to_Fe: conversion of dust to iron (nmol Fe/g Dust)

dust remin gDust = 0.035 gFe       mol Fe     1e3 mmolFe
                    --------- *  ----------- * ----------
                      gDust      molw_Fe gFe      molFe
molw_Fe =  55.845
dust_to_Fe_mmol = 0.035 / molw_Fe * 1.0e3
dust_to_Fe_mmol
0.6267347121497001
percm2_to_perm2 = 1e4
pers_to_peryr = 86400. * 365.
nmolcm3_to_nM = 1e3
nmolcm2s_to_mmolm2yr = 1e-9 * 1e3 * 1e4 * 86400 * 365.
µmolm2d_to_mmolm2yr = 1e-3 * 365.
mmols_to_Gmolyr = 1e-12 * 86400. * 365.
nmols_to_Gmolyr = 1e-18 * 86400. * 365.

time_slice = slice("1990-01-15", "2015-01-15")
varlist = [
    'Fe',
    'IRON_FLUX',
    'Fe_RIV_FLUX',
    'pfeToSed',   
    'dust_REMIN',
]
ds_list = []
for variable in varlist:
    xp_func = xp.persist_ds(cu.read_CESM_var, name=f'{variable}', trust_cache=True)    
    ds_list.append(xp_func(
        time_slice, 
        variable, 
        mean_dims=['member_id', 'time'], 
    ))
    
ds = xr.merge(ds_list)
ds['dz'] = ds_grid.dz.drop('z_t') # drop z_t because precision issues cause diffs
ds['TAREA'] = ds_grid.TAREA

# add data from forcing files
ds = xr.merge((ds, cu.get_fesedflux_forcing(),))

# compute dust remin component 
assert ds.dust_REMIN.attrs['units'] == 'g/cm^3/s'
ds['Fe_from_dust'] = (ds.dust_REMIN * dust_to_Fe_mmol * ds.dz).sum('z_t') * percm2_to_perm2 * pers_to_peryr 
ds.Fe_from_dust.attrs['units'] = 'mmol m$^{-2}$ yr$^{-1}$'

ds['Fe_aeolian_input'] = ds.Fe_from_dust + ds.IRON_FLUX
ds.Fe_aeolian_input.attrs['units'] = 'mmol m$^{-2}$ yr$^{-1}$'

# compute global integrals
convert_glb_Gmolyr = dict(
    IRON_FLUX=mmols_to_Gmolyr * 1e-4,
    Fe_from_dust=1e-12 * 1e-4,
    pfeToSed=-1. * nmols_to_Gmolyr,
    Fe_ventflux=1e-12 * 1e-4,
    Fe_sedflux=1e-12 * 1e-4,
    Fe_RIV_FLUX=nmols_to_Gmolyr,
)
ds_glb = xr.Dataset()
for v in convert_glb_Gmolyr.keys():
    ds_glb[v] = (masked_area * ds[v]).sum(['nlat', 'nlon']) * convert_glb_Gmolyr[v]
    ds_glb[v].attrs['units'] = 'Gmol yr$^{-1}$'
    
    
global_Fe_inv = (ds.Fe * ds.TAREA * ds.dz).sum() * 1e-18
global_Fe_inv.attrs['units'] = 'Gmol'
    
# convert to nice units    
with xr.set_options(keep_attrs=True):
    
    for v in ['Fe', 'Fe_RIV_FLUX', 'pfeToSed', 'IRON_FLUX']:
        if ds[v].attrs['units'] == 'mmol/m^3':
            ds[v] = ds[v] * nmolcm3_to_nM
            ds[v].attrs['units'] = 'nM'
    
        elif ds[v].attrs['units'] in ['nmol/cm^2/s', 'mmol/m^3 cm/s']:
            ds[v] = ds[v] * nmolcm2s_to_mmolm2yr
            ds[v].attrs['units'] = 'mmol m$^{-2}$ yr$^{-1}$'        
        
        elif ds[v].attrs['units'] in ['mmol/m^2/s']:
            ds[v] = ds[v] * 86400. * 365. #* 1e4
            ds[v].attrs['units'] = 'mmol m$^{-2}$ yr$^{-1}$'        
    
           
# make dataset map-plottable
dsp = utils.pop_add_cyclic(ds)
dsp.info()
assuming cache is correct
reading cached file: /glade/p/cgd/oce/projects/cesm2-marbl/xpersist_cache/3d_fields/Fe.nc
assuming cache is correct
reading cached file: /glade/p/cgd/oce/projects/cesm2-marbl/xpersist_cache/3d_fields/IRON_FLUX.nc
assuming cache is correct
reading cached file: /glade/p/cgd/oce/projects/cesm2-marbl/xpersist_cache/3d_fields/Fe_RIV_FLUX.nc
assuming cache is correct
reading cached file: /glade/p/cgd/oce/projects/cesm2-marbl/xpersist_cache/3d_fields/pfeToSed.nc
assuming cache is correct
reading cached file: /glade/p/cgd/oce/projects/cesm2-marbl/xpersist_cache/3d_fields/dust_REMIN.nc
xarray.Dataset {
dimensions:
	nlat = 384 ;
	nlon = 321 ;
	z_t = 60 ;

variables:
	float32 z_t(z_t) ;
	float64 TLAT(nlat, nlon) ;
	float64 TLONG(nlat, nlon) ;
	float32 Fe(z_t, nlat, nlon) ;
		Fe:long_name = Dissolved Inorganic Iron ;
		Fe:units = nM ;
		Fe:grid_loc = 3111 ;
		Fe:cell_methods = time: mean ;
	float32 IRON_FLUX(nlat, nlon) ;
		IRON_FLUX:long_name = Atmospheric Iron Flux ;
		IRON_FLUX:units = mmol m$^{-2}$ yr$^{-1}$ ;
		IRON_FLUX:grid_loc = 2110 ;
		IRON_FLUX:cell_methods = time: mean ;
	float32 Fe_RIV_FLUX(nlat, nlon) ;
		Fe_RIV_FLUX:long_name = Dissolved Inorganic Iron Riverine Flux ;
		Fe_RIV_FLUX:units = mmol m$^{-2}$ yr$^{-1}$ ;
		Fe_RIV_FLUX:grid_loc = 2110 ;
		Fe_RIV_FLUX:cell_methods = time: mean ;
	float32 pfeToSed(nlat, nlon) ;
		pfeToSed:long_name = pFe Flux to Sediments ;
		pfeToSed:units = mmol m$^{-2}$ yr$^{-1}$ ;
		pfeToSed:grid_loc = 2110 ;
		pfeToSed:cell_methods = time: mean ;
	float32 dust_REMIN(z_t, nlat, nlon) ;
		dust_REMIN:long_name = Dust Remineralization ;
		dust_REMIN:units = g/cm^3/s ;
		dust_REMIN:grid_loc = 3111 ;
		dust_REMIN:cell_methods = time: mean ;
	float64 dz(z_t) ;
		dz:units = cm ;
		dz:long_name = thickness of layer k ;
	float64 TAREA(nlat, nlon) ;
		TAREA:units = cm^2 ;
		TAREA:long_name = area of T cells ;
		TAREA:coordinates = TLONG TLAT ;
	float32 Fe_ventflux(nlat, nlon) ;
		Fe_ventflux:units = mmol m$^{-2}$ yr$^{-1}$ ;
	float32 Fe_sedflux(nlat, nlon) ;
		Fe_sedflux:units = mmol m$^{-2}$ yr$^{-1}$ ;
	float64 Fe_from_dust(nlat, nlon) ;
		Fe_from_dust:units = mmol m$^{-2}$ yr$^{-1}$ ;
	float64 Fe_aeolian_input(nlat, nlon) ;
		Fe_aeolian_input:units = mmol m$^{-2}$ yr$^{-1}$ ;

// global attributes:
}
df_glb = ds_glb.to_dataframe() #.sum(axis=1)
print(f'Global imbalance: {df_glb.sum(axis=1).values[0]:0.3f} Gmol/yr')
df_glb
Global imbalance: -0.032 Gmol/yr
IRON_FLUX Fe_from_dust pfeToSed Fe_ventflux Fe_sedflux Fe_RIV_FLUX
region
0 8.05681 5.566933 -38.614945 4.905846 19.681022 0.372556
client.close()
cluster.close()
del client
del cluster
from matplotlib.colors import LogNorm

nvar = len(ds_glb.data_vars)
nrow = int(np.sqrt(nvar))
ncol = int(nvar/nrow) + min(1, nvar%nrow)
figsize=(6, 4)
fig, axs = plt.subplots(
    nrow, ncol, 
    figsize=(figsize[0]*ncol, figsize[1]*nrow),
    squeeze=False)

for n, v in enumerate(ds_glb.data_vars):
    i, j = np.unravel_index(n, axs.shape)
    ds[v].plot(ax=axs[i, j], norm=LogNorm(vmin=0.001, vmax=10.))
    axs[i, j].set_title(v)
../_images/dFe-comparison_11_0.png
titles = dict(
    Fe_aeolian_input=f'Aeolian deposition: {df_glb["IRON_FLUX"].values[0] + df_glb["Fe_from_dust"].values[0]:0.2f} Gmol yr$^{{-1}}$',
    Fe_sedflux=f'Sediment input: {df_glb["Fe_sedflux"].values[0]:0.2f} Gmol yr$^{{-1}}$',
    Fe_ventflux=f'Geothermal input: {df_glb["Fe_ventflux"].values[0]:0.2f} Gmol yr$^{{-1}}$',
    pfeToSed=f'Loss to sediment: {df_glb["pfeToSed"].values[0]:0.2f} Gmol yr$^{{-1}}$',
)
titles
{'Fe_aeolian_input': 'Aeolian deposition: 13.62 Gmol yr$^{-1}$',
 'Fe_sedflux': 'Sediment input: 19.68 Gmol yr$^{-1}$',
 'Fe_ventflux': 'Geothermal input: 4.91 Gmol yr$^{-1}$',
 'pfeToSed': 'Loss to sediment: -38.61 Gmol yr$^{-1}$'}
fields = ['Fe_aeolian_input', 'Fe_sedflux', 'Fe_ventflux', 'pfeToSed']

log_levels = [0., 0.001]
for scale in 10**np.arange(-3., 1., 1.):
    log_levels.extend(list(np.array([3., 6., 10.]) * scale))
    
levels = {k: log_levels for k in fields}


fig = plt.figure(figsize=(12, 6))
prj = ccrs.Robinson(central_longitude=305.0)

nrow, ncol = 2, 2 
gs = gridspec.GridSpec(
    nrows=nrow, ncols=ncol+1, 
    width_ratios=(1, 1, 0.02),
    wspace=0.15, 
    hspace=0.1,
)

axs = np.empty((nrow, ncol)).astype(object)
caxs= np.empty((nrow, ncol)).astype(object)
for i, j in product(range(nrow), range(ncol)):    
    axs[i, j] = plt.subplot(gs[i, j], projection=prj)
cax = plt.subplot(gs[:, -1])

cmap_field = cmocean.cm.dense


for n, field in enumerate(fields):
    
    i, j = np.unravel_index(n, axs.shape)
    
    ax = axs[i, j]
   
    cf = ax.contourf(
        dsp.TLONG,dsp.TLAT, dsp[field],
        levels=levels[field],
        extend='max',
        cmap=cmap_field,
        norm=colors.BoundaryNorm(levels[field], ncolors=cmap_field.N),
        transform=ccrs.PlateCarree(),
    )

    land = ax.add_feature(
        cartopy.feature.NaturalEarthFeature(
            'physical','land','110m',
            edgecolor='face',
            facecolor='gray'
        )
    )  
                             
    ax.set_title(titles[field]) #dsp[field].attrs['title_str'])
    
cb = plt.colorbar(cf, cax=cax, ticks=log_levels)
if 'units' in dsp[field].attrs:
    cb.ax.set_title(dsp[field].attrs['units'])
    cb.ax.set_yticklabels([f'{f:g}' for f in log_levels])
    
utils.label_plots(fig, [ax for ax in axs.ravel()], xoff=0.02, yoff=0)       
utils.savefig('iron-budget-maps.pdf')
../_images/dFe-comparison_13_0.png
regions = {
    'Southern Ocean': 1,
    'Atlantic': 6, 
    'Indian': 3,
    'Pacific': 2,     
}
ds_grid.REGION_MASK.plot(vmax=6)
<matplotlib.collections.QuadMesh at 0x2b9b93561bd0>
../_images/dFe-comparison_14_1.png
df = discrete_obs.open_datastream('dFe')
df.obs_stream.add_model_field(ds.Fe)
df.obs_stream.add_model_field(ds_grid.REGION_MASK, method='nearest')
df
/glade/u/home/mclong/p/cesm2-marbl/notebooks/discrete_obs/tools.py:75: UserWarning: registration of accessor <class 'discrete_obs.tools.obs_datastream'> under name 'obs_stream' for type <class 'pandas.core.frame.DataFrame'> is overriding a preexisting attribute with the same name.
  @pd.api.extensions.register_dataframe_accessor('obs_stream')
lon lat depth dFe_obs Fe REGION_MASK
0 210.010 -16.0018 20.0 0.540000 0.065604 2.0
1 210.010 -16.0018 35.0 0.440000 0.065562 2.0
2 210.010 -16.0019 50.0 0.480000 0.066896 2.0
3 210.010 -16.0019 80.0 0.400000 0.081158 2.0
4 210.010 -16.0020 100.0 0.390000 0.096782 2.0
... ... ... ... ... ... ...
27777 160.051 47.0032 3929.6 0.825681 0.850402 2.0
27778 160.051 47.0032 3929.8 0.902248 0.850392 2.0
27779 160.051 47.0032 4900.4 0.555630 NaN 2.0
27780 160.051 47.0032 4900.9 0.621851 NaN 2.0
27781 160.051 47.0032 5210.1 0.573220 NaN 2.0

27782 rows × 6 columns

plt.plot(df.dFe_obs, df.Fe, '.')
plt.xlabel('CESM2 dFe [nM]')
plt.ylabel('Obs dFe [nM]')
plt.plot([0, 5], [0, 5], 'r-')
[<matplotlib.lines.Line2D at 0x2b9cbabdfa50>]
../_images/dFe-comparison_16_1.png
fig, axs = plot.canvas(3, 1, figsize=(6, 2.5), use_gridspec=True, hspace=0.06)

dx = 0.05
bin_edges = np.arange(0., 1.5+dx, dx)
bins = np.vstack((bin_edges[:-1], bin_edges[1:])).mean(axis=0)

depth_ranges = {
    '–100m < z': (0., 100.),
    '-500m < z < -100 m': (100., 500.),
    '   z < -500m': (500., 1e36),
}

for n, (key, depth_range) in enumerate(depth_ranges.items()):
    ax = axs[n, 0]
    df_sub = df.loc[(depth_range[0] <= df.depth) & (df.depth <= depth_range[1])]
    
    hist, _ = np.histogram(df_sub.dFe_obs.values, bin_edges)
    ax.bar(bins, hist, width=dx, alpha=0.7, label='Obs')
    
    hist, _ = np.histogram(df_sub.Fe.values, bin_edges)
    ax.bar(bins, hist, width=dx, alpha=0.6, label='CESM2-MARBL')
    
    if n == 0:
        ax.legend(loc='upper right', frameon=False)
    if n < 2:
        ax.set_xticklabels([])
    ylm = ax.get_ylim()
    ax.text(0.75, ylm[1] - 0.12 * np.diff(ylm), key, 
            fontweight='bold',
            fontsize=12,
            ha='center',
           )
    if n == 1:
        ax.set_ylabel('Number of samples')
    if n == 2:
        ax.set_xlabel('Dissolved iron concentration [nM]')
        
utils.label_plots(fig, [ax for ax in axs.ravel()], xoff=-0.05, yoff=-0.02)               
utils.savefig('iron-global-ocean-obs-PDF')
../_images/dFe-comparison_17_0.png
residence_time = -(global_Fe_inv / ds_glb.pfeToSed).values[0]
print(f'Fe residence time: {residence_time:0.2f} yr')
Fe residence time: 28.33 yr
field = 'Fe'

levels = {'Fe': [0., 0.05, 0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.8, 1., 2.5, 5.0]}


fig = plt.figure(figsize=(10, 8))
prj = ccrs.Robinson(central_longitude=305.0)

nrow, ncol = 3, 2 
gs = gridspec.GridSpec(
    nrows=nrow, ncols=ncol+1, 
    width_ratios=(1, 1, 0.03),
    wspace=0.05, 
    hspace=0.2,
)

axs = np.empty((nrow, ncol)).astype(object)
caxs= np.empty((nrow, ncol)).astype(object)
for i, j in product(range(nrow), range(ncol)):    
    axs[i, j] = plt.subplot(gs[i, j], projection=prj)
cax = plt.subplot(gs[:, -1])

cmap_field = cmocean.cm.dense


for i, (key, depth_range) in enumerate(depth_ranges.items()):
    for j in range(2):
        ax = axs[i, j]
        if j == 0:
            zslice = slice(depth_range[0]*100., depth_range[1]*100.)

            cf = ax.contourf(
                dsp.TLONG,dsp.TLAT, dsp[field].sel(z_t=zslice).mean('z_t'),
                levels=levels[field],
                extend='max',
                cmap=cmap_field,
                norm=colors.BoundaryNorm(levels[field], ncolors=cmap_field.N),
                transform=ccrs.PlateCarree(),
            )
        else:
            df_sub = df.loc[(depth_range[0] <= df.depth) & (df.depth <= depth_range[1])]
            sc = ax.scatter(
                df_sub.lon, df_sub.lat, c=df_sub.dFe_obs.values,
                cmap=cmap_field,
                norm=colors.BoundaryNorm(levels[field], ncolors=cmap_field.N),
                s=6,
                transform=ccrs.PlateCarree(),
            )

        land = ax.add_feature(
            cartopy.feature.NaturalEarthFeature(
                'physical','land','110m',
                edgecolor='face',
                facecolor='gray'
            )
        )  

cb = plt.colorbar(cf, cax=cax, ticks=levels['Fe'])
if 'units' in dsp[field].attrs:
    cb.ax.set_title(dsp[field].attrs['units'])
    cb.ax.set_yticklabels([f'{f:g}' for f in levels['Fe']])

utils.subplot_col_labels(axs[0, :], ['CESM2', 'Observations'])    
utils.subplot_row_labels(axs[:, 0], depth_ranges.keys(), xoff=60)
    
utils.label_plots(fig, [ax for ax in axs.ravel()], xoff=0.02, yoff=0)       
utils.savefig('iron-concentration-maps.pdf')
../_images/dFe-comparison_19_0.png