Source code for core.save

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


from contextlib import nullcontext
from pathlib import Path
from typing import Literal, Optional

import xarray as xr
from dask.diagnostics.progress import ProgressBar

from core.fileutils import filegen
from core import log


[docs]
def to_netcdf(
    ds: xr.Dataset,
    filename: Path,
    *,
    engine: str = "h5netcdf",
    zlib: bool = True,
    complevel: int = 5,
    verbose: bool = True,
    tmpdir: Optional[Path] = None,
    lock_timeout: int = 0,
    if_exists: Literal["skip", "overwrite", "backup", "error"] = "error",
    clean_attrs: bool = True,
    **kwargs
):
    """
    Write an xarray Dataset `ds` using `.to_netcdf` with several additional features:

    - Use file compression
    - Wrapped by filegen: use temporary files, detect existing output files...

    Args:
        ds (xr.Dataset): Input dataset
        filename (Path): Output file path
        engine (str, optional): Engine driver to use. Defaults to 'h5netcdf'.
        zlib (bool, optional): activate zlib. Defaults to True.
        complevel (int, optional): Compression level. Defaults to 5.
        verbose (bool, optional): Verbosity. Defaults to True.
        tmpdir (Path, optional): use a given temporary directory. Defaults to None.
        lock_timeout (int): timeout in case of existing lock file
        if_exists (str, optional): what to do if output file exists. Defaults to 'error'.
        clean_attrs: whether to remove attributes in the xarray object, that cannot
            be written to netcdf.
        other kwargs are passed to ds.to_netcdf
    """
    soluce = ', got an xarray DataArray. Please use .to_dataset method and ' \
             'specify a variable name' if isinstance(ds, xr.DataArray) else ''
    if not isinstance(ds, xr.Dataset):
        log.error("to_netcdf expects an xarray Dataset" + soluce, 
                  e=AssertionError)

    encoding = (
        {var: dict(zlib=True, complevel=complevel) for var in ds.data_vars}
        if zlib
        else None
    )

    if clean_attrs:
        clean_attributes(ds)

    PBar = {True: ProgressBar, False: nullcontext}[verbose]

    with PBar():
        if verbose:
            log.info("Writing:", filename)

        filegen(
            0,
            tmpdir=tmpdir,
            lock_timeout=lock_timeout,
            if_exists=if_exists,
            verbose=verbose,
        )(ds.to_netcdf)(filename, engine=engine, encoding=encoding, **kwargs)




[docs]
def clean_attributes(obj: xr.Dataset|xr.DataArray):
    """
    Remove attributes that can not be written to netcdf
    """
    import numpy as np
    for attr in list(obj.attrs):
        if isinstance(obj.attrs[attr], (bool,)):
            obj.attrs[attr] = str(obj.attrs[attr])
        elif not isinstance(obj.attrs[attr], (str, float, int, np.ndarray, np.number)):
            del obj.attrs[attr]
    
    # recursively clean attributes in the individual variables
    if isinstance(obj, xr.Dataset):
        for var in list(obj):
            clean_attributes(obj[var])