
Read and download regional data
2023-04-27 READ_AND_DOWNLOAD_REGIONAL_DATA
Authors: CLS & Datlas Copyright: 2023 CLS & Datlas License: MIT
Read and download regional data
Read and download regional data
The notebook illustrates how to read online the global data, select the regional data of interest and locally save them in a netcdf file. The example is given here for the Gulf Stream region (see DC_2020 and DC_2021 data challenges).
General Note 1: Execute each cell through the button from the top MENU (or keyboard shortcut Shift + Enter). General Note 2: If, for any reason, the kernel is not working anymore, in the top MENU, click on the button. Then, in the top MENU, click on “Cell” and select “Run All Above Selected Cell”. ***
Learning outcomes
At the end of this notebook you will know how you can :
read online the global data,
select the regional data of interest,
locally save them in a netcdf file.
[1]:
from glob import glob
import numpy as np
import os
[2]:
import sys
sys.path.append('..')
from src.mod_plot import *
from src.mod_stat import *
from src.mod_read import *
from src.mod_spectral import *
[3]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
Experiment setup
[4]:
# Example for Gulf Stream region
lon_min = 295 # domain min longitude
lon_max = 305 # domain max longitude
lat_min = 33. # domain min latitude
lat_max = 43. # domain max latitude
# Time slice if you are interested in a shorter evaluation period
time_min = '2019-01-01' # time min for analysis
time_max = '2019-12-31' # time max for analysis
# Saving directory and outputs
saving_dir = '../data/' # saving directory path
name_maps = 'maps/DUACS_GS.nc' # regional maps file name
name_alg = 'independent_alongtrack/indep_nadir_GS.nc' # regional independant nadir file name
name_drift = 'independent_drifters/indep_drifters_GS.nc'
Read online global data, select region and locally save file
Data for reconstruction: Sea Surface Height from available nadirs (except Saral/AltiKa)
[ ]:
Data for evaluation: Sea Surface Height from Saral/AltiKa
[5]:
%%time
path_catalog = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/catalog/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_alongtrack/alg/2019/catalog.html"
path_data = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_alongtrack/alg/2019/"
list_of_files = retrieve_list_of_files_from_url(path_catalog, path_data)
xr.set_options(file_cache_maxsize=12)
ds_alg = xr.open_mfdataset(sorted(list_of_files)[:],chunks={'time':47151},concat_dim='time',combine='nested')
ds_alg = ds_alg.where((ds_alg.time >= np.datetime64(time_min)) & (ds_alg.time <= np.datetime64(time_max)), drop=True)
ds_alg = ds_alg.sortby('time')
lon=np.array(ds_alg.longitude.values)
lat=np.array(ds_alg.latitude.values)
ind_lonmax = lon<lon_max
ind_lonmin = lon>lon_min
ind_latmax = lat<lat_max
ind_latmin = lat>lat_min
ind_sel_time = (ind_lonmax*ind_lonmin*ind_latmax*ind_latmin)
ds_alg = ds_alg.isel({'time':ind_sel_time})
ds_alg.to_netcdf(saving_dir+name_alg)
CPU times: user 5min 3s, sys: 38.9 s, total: 5min 42s
Wall time: 10min 5s
Data for evaluation: Drifters
[5]:
%%time
path_catalog = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/catalog/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_drifters/catalog.html"
path_data = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/independent_drifters/"
list_of_files = retrieve_list_of_files_from_url(path_catalog, path_data,'uv_')
xr.set_options(file_cache_maxsize=12)
ds_drift = xr.open_mfdataset(sorted(list_of_files)[:],chunks={'time':6091},concat_dim='time',combine='nested')
ds_drift = ds_drift.where((ds_drift.time >= np.datetime64(time_min)) & (ds_drift.time <= np.datetime64(time_max)), drop=True)
ds_drift = ds_drift.sortby('time')
lon=np.array(ds_drift.longitude.values)
lat=np.array(ds_drift.latitude.values)
lon[lon<0]=lon[lon<0] + 360
ind_lonmax = lon<lon_max
ind_lonmin = lon>lon_min
ind_latmax = lat<lat_max
ind_latmin = lat>lat_min
ind_sel_time = (ind_lonmax*ind_lonmin*ind_latmax*ind_latmin)
ds_drift = ds_drift.isel({'time':ind_sel_time})
ds_drift.to_netcdf(saving_dir+name_drift)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/file_manager.py:209, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
208 try:
--> 209 file = self._cache[self._key]
210 except KeyError:
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/lru_cache.py:55, in LRUCache.__getitem__(self, key)
54 with self._lock:
---> 55 value = self._cache[key]
56 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/Users/sammymetref/Documents/DataChallenges_testing/2023a_SSH_mapping_OSE/data/independent_drifters/indep_drifters_GS.nc',), 'a', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), '0f541993-496c-441b-ac11-e9f405007329']
During handling of the above exception, another exception occurred:
PermissionError Traceback (most recent call last)
File <timed exec>:20
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/core/dataset.py:1903, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
1900 encoding = {}
1901 from ..backends.api import to_netcdf
-> 1903 return to_netcdf( # type: ignore # mypy cannot resolve the overloads:(
1904 self,
1905 path,
1906 mode=mode,
1907 format=format,
1908 group=group,
1909 engine=engine,
1910 encoding=encoding,
1911 unlimited_dims=unlimited_dims,
1912 compute=compute,
1913 multifile=False,
1914 invalid_netcdf=invalid_netcdf,
1915 )
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/api.py:1213, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
1209 else:
1210 raise ValueError(
1211 f"unrecognized option 'invalid_netcdf' for engine {engine}"
1212 )
-> 1213 store = store_open(target, mode, format, group, **kwargs)
1215 if unlimited_dims is None:
1216 unlimited_dims = dataset.encoding.get("unlimited_dims", None)
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:376, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
370 kwargs = dict(
371 clobber=clobber, diskless=diskless, persist=persist, format=format
372 )
373 manager = CachingFileManager(
374 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
375 )
--> 376 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:323, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
321 self._group = group
322 self._mode = mode
--> 323 self.format = self.ds.data_model
324 self._filename = self.ds.filepath()
325 self.is_remote = is_remote_uri(self._filename)
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:385, in NetCDF4DataStore.ds(self)
383 @property
384 def ds(self):
--> 385 return self._acquire()
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:379, in NetCDF4DataStore._acquire(self, needs_lock)
378 def _acquire(self, needs_lock=True):
--> 379 with self._manager.acquire_context(needs_lock) as root:
380 ds = _nc4_require_group(root, self._group, self._mode)
381 return ds
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/contextlib.py:135, in _GeneratorContextManager.__enter__(self)
133 del self.args, self.kwds, self.func
134 try:
--> 135 return next(self.gen)
136 except StopIteration:
137 raise RuntimeError("generator didn't yield") from None
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/file_manager.py:197, in CachingFileManager.acquire_context(self, needs_lock)
194 @contextlib.contextmanager
195 def acquire_context(self, needs_lock=True):
196 """Context manager for acquiring a file."""
--> 197 file, cached = self._acquire_with_cache_info(needs_lock)
198 try:
199 yield file
File ~/miniconda3/envs/env-dc-global-ose0/lib/python3.10/site-packages/xarray/backends/file_manager.py:215, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
213 kwargs = kwargs.copy()
214 kwargs["mode"] = self._mode
--> 215 file = self._opener(*self._args, **kwargs)
216 if self._mode == "w":
217 # ensure file doesn't get overridden when opened again
218 self._mode = "a"
File src/netCDF4/_netCDF4.pyx:2463, in netCDF4._netCDF4.Dataset.__init__()
File src/netCDF4/_netCDF4.pyx:2026, in netCDF4._netCDF4._ensure_nc_success()
PermissionError: [Errno 13] Permission denied: b'/Users/sammymetref/Documents/DataChallenges_testing/2023a_SSH_mapping_OSE/data/independent_drifters/indep_drifters_GS.nc'
Data for comparison: DUACS maps
[6]:
%%time
path_catalog = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/catalog/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/maps/DUACS_global_allsat-alg/catalog.html"
path_data = "https://ige-meom-opendap.univ-grenoble-alpes.fr/thredds/fileServer/meomopendap/extract/MEOM/OCEAN_DATA_CHALLENGES/2023a_SSH_mapping_OSE/maps/DUACS_global_allsat-alg/"
list_of_files = retrieve_list_of_files_from_url(path_catalog, path_data)
xr.set_options(file_cache_maxsize=12)
ds_maps = xr.open_mfdataset(sorted(list_of_files)[:],chunks={'time':1,'latitude':720,'longitude':1440},concat_dim='time',combine='nested')
ds_maps = ds_maps.sel(time=slice(time_min, time_max))
# We select the region size 0.5° wider on each side to avoid interpolation issues later
ds_maps = ds_maps.sel({'longitude':slice(lon_min-0.5,lon_max+0.5)})
ds_maps = ds_maps.sel({'latitude':slice(lat_min-0.5,lat_max+0.5)})
ds_maps.to_netcdf(saving_dir+name_maps)
CPU times: user 1min 30s, sys: 11.7 s, total: 1min 42s
Wall time: 3min 24s