header

Read and download regional data

2023-04-27 READ_AND_DOWNLOAD_REGIONAL_DATA


Authors: CLS & Datlas Copyright: 2023 CLS & Datlas License: MIT

Read and download regional data

The notebook illustrates how to read online the global data, select the regional data of interest and locally save them in a netcdf file. The example is given here for the Gulf Stream region (see DC_2020 and DC_2021 data challenges).


General Note 1: Execute each cell through the button from the top MENU (or keyboard shortcut Shift + Enter). General Note 2: If, for any reason, the kernel is not working anymore, in the top MENU, click on the button. Then, in the top MENU, click on “Cell” and select “Run All Above Selected Cell”. ***

[1]:
from glob import glob
import numpy as np
import os
[2]:
import sys
sys.path.append('..')
from src.mod_plot import *
from src.mod_stat import *
from src.mod_read import *
from src.mod_spectral import *
[3]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
[4]:
# Example for Gulf Stream region
lon_min = 295                                          # domain min longitude
lon_max = 305                                          # domain max longitude
lat_min = 33.                                          # domain min latitude
lat_max = 43.                                          # domain max latitude

# Time slice if you are interested in a shorter evaluation period
time_min = '2019-01-01'                                # time min for analysis
time_max = '2019-12-31'                                # time max for analysis

# Saving directory and outputs
saving_dir = '../data/'                                # saving directory path
name_maps = 'maps/DUACS_GS.nc'                              # regional maps file name
name_alg = 'independent_alongtrack/indep_nadir_GS.nc'                         # regional independant nadir file name
name_drift = 'independent_drifters/indep_drifters_GS.nc'

Data for reconstruction: Sea Surface Height from available nadirs (except Saral/AltiKa)

[ ]:

Data for evaluation: Sea Surface Height from Saral/AltiKa

[5]:
%%time
path_catalog = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/catalog/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_alongtrack/alg/2019/catalog.html"
path_data =    "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_alongtrack/alg/2019/"


list_of_files = retrieve_list_of_files_from_url(path_catalog, path_data)

xr.set_options(file_cache_maxsize=12)
ds_alg = xr.open_mfdataset(sorted(list_of_files)[:],chunks={'time':47151},concat_dim='time',combine='nested')
ds_alg = ds_alg.where((ds_alg.time >= np.datetime64(time_min)) & (ds_alg.time <=  np.datetime64(time_max)), drop=True)
ds_alg = ds_alg.sortby('time')
lon=np.array(ds_alg.longitude.values)
lat=np.array(ds_alg.latitude.values)
ind_lonmax = lon<lon_max
ind_lonmin = lon>lon_min
ind_latmax = lat<lat_max
ind_latmin = lat>lat_min
ind_sel_time = (ind_lonmax*ind_lonmin*ind_latmax*ind_latmin)
ds_alg = ds_alg.isel({'time':ind_sel_time})
ds_alg.to_netcdf(saving_dir+name_alg)

CPU times: user 5min 3s, sys: 38.9 s, total: 5min 42s
Wall time: 10min 5s

Data for evaluation: Drifters

[5]:
%%time
path_catalog = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/catalog/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_drifters/catalog.html"
path_data =    "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_drifters/"


list_of_files = retrieve_list_of_files_from_url(path_catalog, path_data,'uv_')

xr.set_options(file_cache_maxsize=12)
ds_drift = xr.open_mfdataset(sorted(list_of_files)[:],chunks={'time':6091},concat_dim='time',combine='nested')
ds_drift = ds_drift.where((ds_drift.time >= np.datetime64(time_min)) & (ds_drift.time <=  np.datetime64(time_max)), drop=True)
ds_drift = ds_drift.sortby('time')
lon=np.array(ds_drift.longitude.values)
lat=np.array(ds_drift.latitude.values)
lon[lon<0]=lon[lon<0] + 360
ind_lonmax = lon<lon_max
ind_lonmin = lon>lon_min
ind_latmax = lat<lat_max
ind_latmin = lat>lat_min
ind_sel_time = (ind_lonmax*ind_lonmin*ind_latmax*ind_latmin)
ds_drift = ds_drift.isel({'time':ind_sel_time})
ds_drift.to_netcdf(saving_dir+name_drift)

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/file_manager.py:209, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    208 try:
--> 209     file = self._cache[self._key]
    210 except KeyError:

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/lru_cache.py:55, in LRUCache.__getitem__(self, key)
     54 with self._lock:
---> 55     value = self._cache[key]
     56     self._cache.move_to_end(key)

KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/Users/sammymetref/Documents/DataChallenges_testing/2023a_SSH_mapping_OSE/data/independent_drifters/indep_drifters_GS.nc',), 'a', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), '0f541993-496c-441b-ac11-e9f405007329']

During handling of the above exception, another exception occurred:

PermissionError                           Traceback (most recent call last)
File <timed exec>:20

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/core/dataset.py:1903, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   1900     encoding = {}
   1901 from ..backends.api import to_netcdf
-> 1903 return to_netcdf(  # type: ignore  # mypy cannot resolve the overloads:(
   1904     self,
   1905     path,
   1906     mode=mode,
   1907     format=format,
   1908     group=group,
   1909     engine=engine,
   1910     encoding=encoding,
   1911     unlimited_dims=unlimited_dims,
   1912     compute=compute,
   1913     multifile=False,
   1914     invalid_netcdf=invalid_netcdf,
   1915 )

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/api.py:1213, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1209     else:
   1210         raise ValueError(
   1211             f"unrecognized option 'invalid_netcdf' for engine {engine}"
   1212         )
-> 1213 store = store_open(target, mode, format, group, **kwargs)
   1215 if unlimited_dims is None:
   1216     unlimited_dims = dataset.encoding.get("unlimited_dims", None)

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:376, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
    370 kwargs = dict(
    371     clobber=clobber, diskless=diskless, persist=persist, format=format
    372 )
    373 manager = CachingFileManager(
    374     netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
    375 )
--> 376 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:323, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
    321 self._group = group
    322 self._mode = mode
--> 323 self.format = self.ds.data_model
    324 self._filename = self.ds.filepath()
    325 self.is_remote = is_remote_uri(self._filename)

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:385, in NetCDF4DataStore.ds(self)
    383 @property
    384 def ds(self):
--> 385     return self._acquire()

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:379, in NetCDF4DataStore._acquire(self, needs_lock)
    378 def _acquire(self, needs_lock=True):
--> 379     with self._manager.acquire_context(needs_lock) as root:
    380         ds = _nc4_require_group(root, self._group, self._mode)
    381     return ds

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/contextlib.py:135, in _GeneratorContextManager.__enter__(self)
    133 del self.args, self.kwds, self.func
    134 try:
--> 135     return next(self.gen)
    136 except StopIteration:
    137     raise RuntimeError("generator didn't yield") from None

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/file_manager.py:197, in CachingFileManager.acquire_context(self, needs_lock)
    194 @contextlib.contextmanager
    195 def acquire_context(self, needs_lock=True):
    196     """Context manager for acquiring a file."""
--> 197     file, cached = self._acquire_with_cache_info(needs_lock)
    198     try:
    199         yield file

File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/file_manager.py:215, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    213     kwargs = kwargs.copy()
    214     kwargs["mode"] = self._mode
--> 215 file = self._opener(*self._args, **kwargs)
    216 if self._mode == "w":
    217     # ensure file doesn't get overridden when opened again
    218     self._mode = "a"

File src/netCDF4/_netCDF4.pyx:2463, in netCDF4._netCDF4.Dataset.__init__()

File src/netCDF4/_netCDF4.pyx:2026, in netCDF4._netCDF4._ensure_nc_success()

PermissionError: [Errno 13] Permission denied: b'/Users/sammymetref/Documents/DataChallenges_testing/2023a_SSH_mapping_OSE/data/independent_drifters/indep_drifters_GS.nc'

Data for comparison: DUACS maps

[6]:
%%time
path_catalog = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/catalog/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/maps/DUACS_global_allsat-alg/catalog.html"
path_data =    "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/maps/DUACS_global_allsat-alg/"


list_of_files = retrieve_list_of_files_from_url(path_catalog, path_data)

xr.set_options(file_cache_maxsize=12)
ds_maps = xr.open_mfdataset(sorted(list_of_files)[:],chunks={'time':1,'latitude':720,'longitude':1440},concat_dim='time',combine='nested')
ds_maps = ds_maps.sel(time=slice(time_min, time_max))

# We select the region size 0.5° wider on each side to avoid interpolation issues later

ds_maps = ds_maps.sel({'longitude':slice(lon_min-0.5,lon_max+0.5)})
ds_maps = ds_maps.sel({'latitude':slice(lat_min-0.5,lat_max+0.5)})
ds_maps.to_netcdf(saving_dir+name_maps)
CPU times: user 1min 30s, sys: 11.7 s, total: 1min 42s
Wall time: 3min 24s

You can now use the evaluation notebooks by changing the data directory and file names !