Source code for buzzard._dataset

""">>> help(buzz.Dataset)"""

# pylint: disable=too-many-lines
import sys
import pathlib
import itertools
from types import MappingProxyType
import os

from osgeo import osr
import numpy as np

from buzzard._tools import conv, deprecation_pool
from buzzard._tools import GDALErrorCatcher as Catch

from buzzard._footprint import Footprint
from buzzard import _tools
from buzzard._dataset_back import BackDataset
from buzzard._a_source import ASource
from buzzard._gdal_file_raster import GDALFileRaster, BackGDALFileRaster
from buzzard._gdal_file_vector import GDALFileVector, BackGDALFileVector
from buzzard._gdal_mem_raster import GDALMemRaster
from buzzard._gdal_memory_vector import GDALMemoryVector
from buzzard._dataset_register import DatasetRegisterMixin
from buzzard._numpy_raster import NumpyRaster
from buzzard._cached_raster_recipe import CachedRasterRecipe
from buzzard._a_pooled_emissary import APooledEmissary
import buzzard.utils

[docs]class Dataset(DatasetRegisterMixin):
    """**Dataset** is a class that stores references to sources. A source is either a raster, or a
    vector. A `Dataset` allows:

    + quick manipulations by optionally assigning a key to each registered source, \
      (see :ref:`Sources Registering` below)
    + closing all source at once by closing the Dataset object.

    But also inter-sources operations, like:

    + spatial reference harmonization (see :ref:`On the fly re-projections in buzzard` below),
    + workload scheduling on pools when using async rasters (see :ref:`Scheduler` below),
    + other features in the future (like data visualization).

    For actions specific to opened sources, see those classes:

    - :doc:`source_gdal_file_raster`
    - :doc:`source_gdal_mem_raster`
    - :doc:`source_numpy_raster`
    - :doc:`source_cached_raster_recipe`
    - :doc:`source_gdal_file_vector`
    - :doc:`source_gdal_memory_vector`

    .. warning::
        This class is not equivalent to the `gdal.Dataset` class.

    Parameters
    ----------
    sr_work: None or string
        In order to set a spatial reference, use a string that can be `converted to WKT by GDAL
        <https://gdal.org/doxygen/classOGRSpatialReference.html#aec3c6a49533fe457ddc763d699ff8796>`_.

        (see :ref:`On the fly re-projections in buzzard` below)
    sr_fallback: None or string
        In order to set a spatial reference, use a string that can be `converted to WKT by GDAL
        <https://gdal.org/doxygen/classOGRSpatialReference.html#aec3c6a49533fe457ddc763d699ff8796>`_.

        (see :ref:`On the fly re-projections in buzzard` below)
    sr_forced: None or string
        In order to set a spatial reference, use a string that can be `converted to WKT by GDAL
        <https://gdal.org/doxygen/classOGRSpatialReference.html#aec3c6a49533fe457ddc763d699ff8796>`_.

        (see :ref:`On the fly re-projections in buzzard` below)
    analyse_transformation: bool
        Whether or not to perform a basic analysis on two `sr` to check their compatibility.

        if True: Read the `buzz.env.significant` variable and raise an exception if a spatial
        reference conversions is too lossy in precision.

        if False: Skip all checks.

        (see :ref:`On the fly re-projections in buzzard` below)
    allow_none_geometry: bool
        Whether or not a vector geometry should raise an exception when encountering a None geometry
    allow_interpolation: bool
        Whether or not a raster geometry should raise an exception when remapping with interpolation
        is necessary.
    max_active: nbr >= 1
        Maximum number of pooled sources active at the same time.
        (see :ref:`Sources activation / deactivation` below)
    debug_observers: sequence of object
        Entry points to observe what is happening in the Dataset's sheduler.

    Examples
    --------
    >>> import buzzard as buzz

    Creating a Dataset.

    >>> ds = buzz.Dataset()

    Opening a file and registering it under the 'roofs' key. There are four ways to the access an
    opened source.

    >>> r = ds.open_vector('roofs', 'path/to/roofs.shp')
    ... feature_count = len(ds.roofs)
    ... feature_count = len(ds['roofs'])
    ... feature_count = len(ds.get('roofs'))
    ... feature_count = len(r)

    Opening a file anonymously. There is only one way to access the source.

    >>> r = ds.aopen_raster('path/to/dem.tif')
    ... data_type = r.dtype

    Opening, reading and closing two raster files with context management.

    >>> with ds.open_raster('rgb', 'path/to/rgb.tif').close:
    ...     data_type = ds.rgb.fp
    ...     arr = ds.rgb.get_data()

    >>> with ds.aopen_raster('path/to/rgb.tif').close as rgb:
    ...     data_type = rgb.dtype
    ...     arr = rgb.get_data()

    Creating two files

    >>> ds.create_vector('targets', 'path/to/targets.geojson', 'point', driver='GeoJSON')
    ... geometry_type = ds.targets.type

    >>> with ds.acreate_raster('/tmp/cache.tif', ds.dem.fp, 'float32', 1).delete as cache:
    ...     file_footprint = cache.fp
    ...     cache.set_data(dem.get_data())

    Sources Types
    -------------
    - Raster sources
        - GDAL drivers http://www.gdal.org/formats_list.html (e.g. 'GTIff', 'JPEG', 'PNG', ...)
        - numpy.ndarray
        - recipes
    - Vector sources
        - OGR drivers: https://www.gdal.org/ogr_formats.html (e.g. 'ESRI Shapefile', 'GeoJSON', 'DXF', ...)

    .. _Sources Registering:
    Sources Registering
    -------------------
    There are always two ways to create a source, with a key or anonymously.

    When creating a source using a key, said key (e.g. the string "my_source_name") must be provided
    by user. Each key identify one source and should thus be unique. There are then three ways to
    access that source:

    - from the object returned by the method that created the source,
    - from the Dataset with the attribute syntax: `ds.my_source_name`,
    - from the Dataset with the item syntax: ds["my_source_name"].

    All keys should be unique.

    When creating a source anonymously you don't have to provide a key, but the only way to access
    this source is to use the object returned by the method that created the source.

    .. _Sources activation / deactivation:
    Sources activation / deactivation
    ---------------------------------

    The sources that inherit from `APooledEmissary` (like `GDALFileVector` and `GDALFileRaster`) are
    flexible about their underlying driver object. Those sources may be temporary deactivated
    (useful to limit the number of file descriptors active), or activated multiple time at the
    same time (useful to perfom concurrent reads).

    Those sources are automatically activated and deactivated given the current needs and
    constraints. Setting a `max_active` lower than `np.inf` in the Dataset constructor, will
    ensure that no more than `max_active` driver objects are active at the same time, by
    deactivating the LRU ones.

    .. _On the fly re-projections in buzzard:
    On the fly re-projections in buzzard
    ------------------------------------

    A Dataset may perform spatial reference conversions on the fly, like a GIS does. Several
    modes are available, a set of rules define how each mode work. Those conversions concern both
    read operations and write operations, all are performed by the OSR library.

    Those conversions are only perfomed on vector's data/metadata and raster's Footprints.
    This implies that classic raster warping is not included (yet) in those conversions, only raster
    shifting/scaling/rotation work.

    The `z` coordinates of vectors geometries are also converted, on the other hand elevations are
    not converted in DEM rasters.

    If `analyse_transformation` is set to `True` (default), all coordinates conversions are
    tested against `buzz.env.significant` on file opening to ensure their feasibility or
    raise an exception otherwise. This system is naive and very restrictive, use with caution.
    Although, disabling those tests is not recommended, ignoring floating point precision errors
    can create unpredictable behaviors at the pixel level deep in your code. Those bugs can be
    witnessed when zooming to infinity with tools like `qgis` or `matplotlib`.

    On the fly re-projections in buzzard - Terminology
    --------------------------------------------------
    `sr`
      Spatial reference
    `sr_work`
      The sr of all interactions with a Dataset (i.e. Footprints, extents, Polygons...),
      may be None.
    `sr_stored`
      The sr that can be found in the metadata of a raster/vector storage, may be None.
    `sr_virtual`
      The sr considered to be written in the metadata of a raster/vector storage, it is
      often the same as `sr_stored`. When a raster/vector is read, a conversion is performed from
      `sr_virtual` to `sr_work`. When writing vector data, a conversion is performed from
      `sr_work` to `sr_virtual`.
    `sr_forced`
      A `sr_virtual` provided by user to ignore all `sr_stored`. This is for example
      useful when the `sr` stored in the input files are corrupted.
    `sr_fallback`
      A `sr_virtual` provided by user to be used when `sr_stored` is missing. This is
      for example useful when an input file can't store a `sr` (e.g. DFX).

    On the fly re-projections in buzzard -  Dataset parameters and modes
    --------------------------------------------------------------------

    +------+----------+--------------+------------+-------------------------------------------------------------------------------+
    | mode | sr_work  | sr_fallback  | sr_forced  | How is the `sr_virtual` of a source determined                                |
    +======+==========+==============+============+===============================================================================+
    | 1    | None     | None         | None       | Use `sr_stored`, no conversion is performed for the lifetime of this Dataset  |
    +------+----------+--------------+------------+-------------------------------------------------------------------------------+
    | 2    | string   | None         | None       | Use `sr_stored`, if None raises an exception                                  |
    +------+----------+--------------+------------+-------------------------------------------------------------------------------+
    | 3    | string   | string       | None       | Use `sr_stored`, if None it is considered to be `sr_fallback`                 |
    +------+----------+--------------+------------+-------------------------------------------------------------------------------+
    | 4    | string   | None         | string     | Use `sr_forced`                                                               |
    +------+----------+--------------+------------+-------------------------------------------------------------------------------+

    On the fly re-projections in buzzard - Use cases
    ------------------------------------------------
    - If all opened files are known to be written in a same sr in advance, use `mode 1`.
        No conversions will be performed, this is the safest way to work.

    - If all opened files are known to be written in the same sr but you wish to work in a \
    different sr, use `mode 4`.
        The huge benefit of this mode is that the `driver` specific behaviors
        concerning spatial references have no impacts on the data you manipulate.

    - On the other hand if you don't have a priori information on files' `sr`, `mode 2` or \
    `mode 3` should be used.
         .. warning::
             Side note: Since the GeoJSON driver cannot store a `sr`, it is impossible to open or
             create a GeoJSON file in `mode 2`.

    On the fly re-projections in buzzard - Examples
    -----------------------------------------------
    mode 1 - No conversions at all

    >>> ds = buzz.Dataset()

    mode 2 - Working with WGS84 coordinates

    >>> ds = buzz.Dataset(
    ...     sr_work='WGS84',
    ... )

    mode 3 - Working in UTM with DXF files in WGS84 coordinates

    >>> ds = buzz.Dataset(
    ...     sr_work='EPSG:32632',
    ...     sr_fallback='WGS84',
    ... )

    mode 4 - Working in UTM with unreliable LCC input files

    >>> ds = buzz.Dataset(
    ...     sr_work='EPSG:32632',
    ...     sr_forced='EPSG:27561',
    ..  )

    .. _Scheduler:
    Scheduler
    ---------
    To handle *async rasters* living in a Dataset, a thread is to manage requests made to those
    rasters. It will start as soon as you create an *async raster* and stop when the Dataset is
    closed or collected. If one of your callbacks to be called by the scheduler raises an exception,
    the scheduler will stop and the exception will be propagated to the main thread as soon as
    possible.

    Thread-safety
    -------------
    Thread safety is one of the main concern of buzzard. Everything is thread-safe except:

    - The raster write methods
    - The vector write methods
    - The raster read methods when using the `GDAL::MEM` driver
    - The vector read methods when using the `GDAL::Memory` driver

    """

    def __init__(self, sr_work=None, sr_fallback=None, sr_forced=None,
                 analyse_transformation=True,
                 allow_none_geometry=False,
                 allow_interpolation=False,
                 max_active=np.inf,
                 debug_observers=(),
                 **kwargs):
        sr_fallback, kwargs = deprecation_pool.handle_param_renaming_with_kwargs(
            new_name='sr_fallback', old_names={'sr_implicit': '0.4.4'}, context='Dataset.__init__',
            new_name_value=sr_fallback,
            new_name_is_provided=sr_fallback is not None,
            user_kwargs=kwargs,
        )
        sr_forced, kwargs = deprecation_pool.handle_param_renaming_with_kwargs(
            new_name='sr_forced', old_names={'sr_origin': '0.4.4'}, context='Dataset.__init__',
            new_name_value=sr_forced,
            new_name_is_provided=sr_forced is not None,
            user_kwargs=kwargs,
        )
        max_active, kwargs = deprecation_pool.handle_param_renaming_with_kwargs(
            new_name='max_active', old_names={'max_activated': '0.5.0'}, context='Dataset.__init__',
            new_name_value=max_active,
            new_name_is_provided=max_active != np.inf,
            user_kwargs=kwargs,
        )
        if kwargs: # pragma: no cover
            raise TypeError("__init__() got an unexpected keyword argument '{}'".format(
                list(kwargs.keys())[0]
            ))

        mode = (sr_work is not None, sr_fallback is not None, sr_forced is not None)
        wkt_work, wkt_fallback, wkt_forced = None, None, None
        if mode == (False, False, False):
            pass
        elif mode == (True, False, False):
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr_work)
            if not success:
                raise ValueError('Could not transform `sr_work` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt_work = payload
        elif mode == (True, True, False):
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr_work)
            if not success:
                raise ValueError('Could not transform `sr_work` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt_work = payload
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr_fallback)
            if not success:
                raise ValueError('Could not transform `sr_fallback` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt_fallback = payload
        elif mode == (True, False, True):
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr_work)
            if not success:
                raise ValueError('Could not transform `sr_work` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt_work = payload
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr_forced)
            if not success:
                raise ValueError('Could not transform `sr_forced` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt_forced = payload
        else:
            raise ValueError('Bad combination of `sr_*` parameters') # pragma: no cover
        del sr_work, sr_fallback, sr_forced

        if max_active < 1: # pragma: no cover
            raise ValueError('`max_active` should be greater than 1')

        allow_interpolation = bool(allow_interpolation)
        allow_none_geometry = bool(allow_none_geometry)
        analyse_transformation = bool(analyse_transformation)
        self._ds_closed = False
        self._back = BackDataset(
            wkt_work=wkt_work,
            wkt_fallback=wkt_fallback,
            wkt_forced=wkt_forced,
            analyse_transformation=analyse_transformation,
            allow_none_geometry=allow_none_geometry,
            allow_interpolation=allow_interpolation,
            max_active=max_active,
            ds_id=id(self),
            debug_observers=debug_observers,
        )
        super(Dataset, self).__init__()

    # Raster entry points *********************************************************************** **
[docs]    def open_raster(self, key, path, driver='GTiff', options=(), mode='r'):
        """Open a raster file within this Dataset under `key`. Only metadata are kept in memory.

        >>> help(GDALFileRaster)

        Parameters
        ----------
        key: hashable (like a string)
            File identifier within Dataset

            To avoid using a `key`, you may use :py:meth:`aopen_raster`
        path: string
            ..
        driver: string
            gdal driver to use when opening the file
            http://www.gdal.org/formats_list.html
        options: sequence of str
            options for gdal
        mode: one of {'r', 'w'}
            ..

        Returns
        -------
        source: GDALFileRaster
            ..

        Example
        -------
        >>> ds.open_raster('ortho', '/path/to/ortho.tif')
        >>> file_proj4 = ds.ortho.proj4_stored

        >>> ds.open_raster('dem', '/path/to/dem.tif', mode='w')
        >>> nodata_value = ds.dem.nodata

        See Also
        --------
        - :py:meth:`Dataset.aopen_raster`: To skip the `key` assigment
        - :py:func:`buzzard.open_raster`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """
        # Parameter checking ***************************************************
        path = str(path)
        driver = str(driver)
        options = [str(arg) for arg in options]
        _ = conv.of_of_mode(mode)

        # Construction dispatch ************************************************
        if driver.lower() == 'mem': # pragma: no cover
            raise ValueError("Can't open a MEM raster, user create_raster")
        elif True:
            allocator = lambda: BackGDALFileRaster.open_file(
                path, driver, options, mode
            )
            prox = GDALFileRaster(self, allocator, options, mode)
        else:
            pass

        # Dataset Registering ***********************************************
        if not isinstance(key, _AnonymousSentry):
            self._register([key], prox)
        else:
            self._register([], prox)
        return prox

[docs]    def aopen_raster(self, path, driver='GTiff', options=(), mode='r'):
        """Open a raster file anonymously within this Dataset. Only metadata are kept in memory.

        See :py:meth:`~Dataset.open_raster`

        Example
        ------
        >>> ortho = ds.aopen_raster('/path/to/ortho.tif')
        >>> file_wkt = ortho.wkt_stored

        See Also
        --------
        - :py:meth:`Dataset.open_raster`: To assign a `key` to this source within the `Dataset`
        - :py:func:`buzzard.open_raster`: To skip the explicit `Dataset` instanciation

        """
        return self.open_raster(_AnonymousSentry(), path, driver, options, mode)

[docs]    def create_raster(self, key, path, fp, dtype, channel_count, channels_schema=None,
                      driver='GTiff', options=(), sr=None, ow=False, **kwargs):
        """Create a raster file and register it under `key` within this Dataset. Only metadata are
        kept in memory.

        The raster's values are initialized with `channels_schema['nodata']` or `0`.

        >>> help(GDALFileRaster)
        >>> help(GDALMemRaster)

        Parameters
        ----------
        key: hashable (like a string)
            File identifier within Dataset

            To avoid using a `key`, you may use :py:meth:`acreate_raster`
        path: string
            Anything that makes sense to GDAL:

                + A path to a file
                + An empty string when using `driver=MEM`
                + A path or an xml string when using `driver=VRT`
        fp: Footprint
            Description of the location and size of the raster to create.
        dtype: numpy type (or any alias)
            ..
        channel_count: integer
            number of channels
        channels_schema: dict or None
            Channel(s) metadata. (see `Channels schema fields` below)
        driver: string
            gdal driver to use when opening the file
            http://www.gdal.org/formats_list.html
        options: sequence of str
            options for gdal
            http://www.gdal.org/frmt_gtiff.html
        sr: string or None
            Spatial reference of the new file.

            In order not to set a spatial reference, use `None`.

            In order to set a spatial reference, use a string that can be `converted to WKT by GDAL
            <https://gdal.org/doxygen/classOGRSpatialReference.html#aec3c6a49533fe457ddc763d699ff8796>`_.
        ow: bool
            Overwrite. Whether or not to erase the existing files.

        Returns
        -------
        source: GDALFileRaster or GDALMemRaster
            The type depends on the `driver` parameter

        Example
        -------
        >>> ds.create_raster('dem_copy', 'dem_copy.tif', ds.dem.fp, ds.dsm.dtype, len(ds.dem))
        >>> array = ds.dem.get_data()
        >>> ds.dem_copy.set_data(array)

        Channel schema fields
        ---------------------
        Fields:
            'nodata': None or number
            'interpretation': None or str
            'offset': None or number
            'scale': None or number
            'mask': None or str
        Interpretation values:
            undefined, grayindex, paletteindex, redband, greenband, blueband, alphaband, hueband,
            saturationband, lightnessband, cyanband, magentaband, yellowband, blackband
        Mask values:
            all_valid, per_dataset, alpha, nodata

        Additionally:

        - A field missing or None is kept to default value.
        - A field can be passed as

            - a value: All bands are set to this value
            - a sequence of values of length `channel_count`: All bands will be set to their respective state

        Caveat
        ------
        When using the GTiff driver, specifying a `mask` or `interpretation` field may lead to unexpected results.

        See Also
        --------
        - :py:meth:`Dataset.acreate_raster`: To skip the `key` assigment
        - :py:func:`buzzard.create_raster`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """

        # Deprecated parameters ************************************************
        channels_schema, kwargs = deprecation_pool.handle_param_renaming_with_kwargs(
            new_name='channels_schema', old_names={'band_schema': '0.6.0'},
            context='Dataset.create_raster',
            new_name_value=channels_schema,
            new_name_is_provided=channels_schema is not None,
            user_kwargs=kwargs,
        )
        if kwargs: # pragma: no cover
            raise TypeError("create_raster() got an unexpected keyword argument '{}'".format(
                list(kwargs.keys())[0]
            ))

        # Parameter checking ***************************************************
        ow = bool(ow)
        path = str(path)
        if not isinstance(fp, Footprint): # pragma: no cover
            raise TypeError('`fp` should be a Footprint')
        dtype = np.dtype(dtype)
        channel_count = int(channel_count)
        if channel_count <= 0:
            raise ValueError('`channel_count` should be >0')
        channels_schema = _tools.sanitize_channels_schema(channels_schema, channel_count)
        driver = str(driver)
        options = [str(arg) for arg in options]

        if sr is not None:
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr)
            if not success:
                raise ValueError('Could not transform `sr` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt = payload
        else:
            wkt = None
        del sr

        if wkt is not None:
            fp = self._back.convert_footprint(fp, wkt)

        # Construction dispatch ************************************************
        if driver.lower() == 'mem':
            # TODO for 0.5.0: Check async_ is False
            prox = GDALMemRaster(
                self, fp, dtype, channel_count, channels_schema, options, wkt,
            )
        elif True:
            allocator = lambda: BackGDALFileRaster.create_file(
                path, fp, dtype, channel_count, channels_schema, driver, options, wkt, ow,
            )
            prox = GDALFileRaster(self, allocator, options, 'w')
        else:
            pass

        # Dataset Registering ***********************************************
        if not isinstance(key, _AnonymousSentry):
            self._register([key], prox)
        else:
            self._register([], prox)
        return prox

[docs]    def acreate_raster(self, path, fp, dtype, channel_count, channels_schema=None,
                       driver='GTiff', options=(), sr=None, ow=False, **kwargs):
        """Create a raster file anonymously within this Dataset. Only metadata are kept in memory.

        See :py:meth:`~Dataset.create_raster`

        Example
        -------
        >>> mask = ds.acreate_raster('mask.tif', ds.dem.fp, bool, 1, options=['SPARSE_OK=YES'])
        >>> open_options = mask.open_options

        >>> channels_schema = {
        ...     'nodata': -32767,
        ...     'interpretation': ['blackband', 'cyanband'],
        ... }
        >>> out = ds.acreate_raster('output.tif', ds.dem.fp, 'float32', 2, channels_schema)
        >>> band_interpretation = out.channels_schema['interpretation']

        See Also
        --------
        - :py:meth:`Dataset.create_raster`: To assign a `key` to this source within the `Dataset`
        - :py:func:`buzzard.create_raster`: To skip the explicit `Dataset` instanciation

        """
        return self.create_raster(_AnonymousSentry(), path, fp, dtype, channel_count, channels_schema,
                                  driver, options, sr, ow, **kwargs)

[docs]    def wrap_numpy_raster(self, key, fp, array, channels_schema=None, sr=None, mode='w', **kwargs):
        """Register a numpy array as a raster under `key` within this Dataset.

        >>> help(NumpyRaster)

        Parameters
        ----------
        key: hashable (like a string)
            File identifier within Dataset

            To avoid using a `key`, you may use :py:meth:`awrap_numpy_raster`
        fp: Footprint of shape (Y, X)
            Description of the location and size of the raster to create.
        array: ndarray of shape (Y, X) or (Y, X, C)
            ..
        channels_schema: dict or None
            Channel(s) metadata. (see `Channels schema fields` below)
        sr: string or None
            Spatial reference of the new file

            In order not to set a spatial reference, use `None`.

            In order to set a spatial reference, use a string that can be `converted to WKT by GDAL
            <https://gdal.org/doxygen/classOGRSpatialReference.html#aec3c6a49533fe457ddc763d699ff8796>`_.

        Returns
        -------
        source: NumpyRaster
            ..

        Channel schema fields
        ---------------------
        Fields:
            'nodata': None or number
            'interpretation': None or str
            'offset': None or number
            'scale': None or number
            'mask': None or str
        Interpretation values:
            undefined, grayindex, paletteindex, redband, greenband, blueband, alphaband, hueband,
            saturationband, lightnessband, cyanband, magentaband, yellowband, blackband
        Mask values:
            all_valid, per_dataset, alpha, nodata

        Additionally:

        - A field missing or None is kept to default value.
        - A field can be passed as

            - a value: All bands are set to this value
            - a sequence of values of length `channel_count`: All bands will be set to their respective state

        See Also
        --------
        - :py:meth:`Dataset.awrap_numpy_raster`: To skip the `key` assigment
        - :py:meth:`buzzard.wrap_numpy_raster`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """

        # Deprecated parameters ************************************************
        channels_schema, kwargs = deprecation_pool.handle_param_renaming_with_kwargs(
            new_name='channels_schema', old_names={'band_schema': '0.6.0'},
            context='Dataset.wrap_numpy_raster',
            new_name_value=channels_schema,
            new_name_is_provided=channels_schema is not None,
            user_kwargs=kwargs,
        )
        if kwargs: # pragma: no cover
            raise TypeError("wrap_numpy_raster() got an unexpected keyword argument '{}'".format(
                list(kwargs.keys())[0]
            ))

        # Parameter checking ***************************************************
        if not isinstance(fp, Footprint): # pragma: no cover
            raise TypeError('`fp` should be a Footprint')
        array = np.asarray(array)
        if array.shape[:2] != tuple(fp.shape): # pragma: no cover
            raise ValueError('Incompatible shape between `array` and `fp`')
        if array.ndim not in [2, 3]: # pragma: no cover
            raise ValueError('Array should have 2 or 3 dimensions')
        channel_count = 1 if array.ndim == 2 else array.shape[-1]
        channels_schema = _tools.sanitize_channels_schema(channels_schema, channel_count)
        if sr is not None:
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr)
            if not success:
                raise ValueError('Could not transform `sr` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt = payload
        else:
            wkt = None
        del sr
        _ = conv.of_of_mode(mode)

        if wkt is not None:
            fp = self._back.convert_footprint(fp, wkt)

        # Construction *********************************************************
        prox = NumpyRaster(self, fp, array, channels_schema, wkt, mode)

        # Dataset Registering ***********************************************
        if not isinstance(key, _AnonymousSentry):
            self._register([key], prox)
        else:
            self._register([], prox)
        return prox

[docs]    def awrap_numpy_raster(self, fp, array, channels_schema=None, sr=None, mode='w', **kwargs):
        """Register a numpy array as a raster anonymously within this Dataset.

        See Also
        --------
        - :py:meth:`Dataset.wrap_numpy_raster`: To assign a `key` to this source within the `Dataset`
        - :py:meth:`buzzard.wrap_numpy_raster`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """
        return self.wrap_numpy_raster(
            _AnonymousSentry(), fp, array, channels_schema, sr, mode, **kwargs
        )

[docs]    def create_raster_recipe(
            self, key,

            # raster attributes
            fp, dtype, channel_count, channels_schema=None, sr=None,

            # callbacks running on pool
            compute_array=None, merge_arrays=buzzard.utils.concat_arrays,

            # primitives
            queue_data_per_primitive=MappingProxyType({}), convert_footprint_per_primitive=None,

            # pools
            computation_pool='cpu', merge_pool='cpu', resample_pool='cpu',

            # misc
            computation_tiles=None, max_computation_size=None,
            max_resampling_size=None, automatic_remapping=True,
            debug_observers=(),
    ):
        """

        .. warning::
            This method is not yet implemented. It exists for documentation purposes.

        Create a *raster recipe* and register it under `key` within this Dataset.

        A *raster recipe* implements the same interfaces as all other rasters, but internally it
        computes data on the fly by calling a callback. The main goal of the *raster recipes* is to
        provide a boilerplate-free interface that automatize those cumbersome tasks:

        - tiling,
        - parallelism
        - caching
        - file reads
        - resampling
        - lazy evaluation
        - backpressure prevention and
        - optimised task scheduling.

        If you are familiar with `create_cached_raster_recipe` two parameters are new here:
        `automatic_remapping` and `max_computation_size`.

        Parameters
        ----------
        key:
            see :py:meth:`Dataset.create_raster`
        fp:
            see :py:meth:`Dataset.create_raster`
        dtype:
            see :py:meth:`Dataset.create_raster`
        channel_count:
            see :py:meth:`Dataset.create_raster`
        channels_schema:
            see :py:meth:`Dataset.create_raster`
        sr:
            see :py:meth:`Dataset.create_raster`
        compute_array: callable
            see :ref:`Computation Function` below
        merge_arrays: callable
            see :ref:`Merge Function` below
        queue_data_per_primitive: dict of hashable (like a string) to a `queue_data` method pointer
            see :ref:`Primitives` below
        convert_footprint_per_primitive: None or dict of hashable (like a string) to a callable
            see :ref:`Primitives` below
        computation_pool:
            see :ref:`Pools` below
        merge_pool:
            see :ref:`Pools` below
        resample_pool:
            see :ref:`Pools` below
        computation_tiles: None or (int, int) or numpy.ndarray of Footprint
            see :ref:`Computation Tiling` below
        max_computation_size:  None or int or (int, int)
            see :ref:`Computation Tiling` below
        max_resampling_size: None or int or (int, int)
            Optionally define a maximum resampling size. If a larger resampling has to be performed,
            it will be performed tile by tile in parallel.
        automatic_remapping: bool
            see :ref:`Automatic Remapping` below
        debug_observers: sequence of object
            Entry points that observe what is happening with this raster in the Dataset's scheduler.

        Returns
        -------
        source: NocacheRasterRecipe
            ..

        .. _Computation Function:
        Computation Function
        --------------------
        The function that will map a Footprint to a numpy.ndarray. If `queue_data_per_primitive`
        is not empty, it will map a Footprint and primitive arrays to a numpy.ndarray.

        It will be called in parallel according to the `computation_pool` parameter provided at
        construction.

        The function will be called with the following positional parameters:

        - fp: Footprint of shape (Y, X)
            The location at which the pixels should be computed
        - primitive_fps: dict of hashable to Footprint
            For each primitive defined through the `queue_data_per_primitive` parameter, the input
            Footprint.
        - primitive_arrays: dict of hashable to numpy.ndarray
            For each primitive defined through the `queue_data_per_primitive` parameter, the input
            numpy.ndarray that was automatically computed.
        - raster: CachedRasterRecipe or None
            The Raster object of the ongoing computation.

        It should return either:

        - a single ndarray of shape (Y, X) if only one channel was computed
            ..
        - a single ndarray of shape (Y, X, C) if one or more channels were computed
            ..

        If `computation_pool` points to a process pool, the `compute_array` function must be
        picklable and the `raster` parameter will be None.

        .. _Computation Tiling:
        Computation Tiling
        ------------------
        You may sometimes want to have control on the Footprints that are requested to the
        `compute_array` function, for example:

        - If pixels computed by `compute_array` are long to compute, you want to tile to increase
          parallelism.
        - If the `compute_array` function scales badly in term of memory or time, you want to tile
          to reduce complexity.
        - If `compute_array` can work only on certain Footprints, you want a hard constraint on the
          set of Footprint that can be queried from `compute_array`. (This may happen with
          *convolutional neural networks*)

        To do so use the `computation_tiles` or `max_computation_size` parameter (not both).

        If `max_computation_size` is provided, a Footprint to be computed will be tiled given this
        parameter.

        If `computation_tiles` is a numpy.ndarray of Footprint, it should be a tiling of the `fp`
        parameter. Only the Footprints contained in this tiling will be asked to the
        `computation_tiles`.
        If `computation_tiles` is (int, int), a tiling will be constructed using Footprint.tile
        using those two ints.

        .. _Merge Function:
        Merge Function
        --------------
        The function that will map several pairs of Footprint/numpy.ndarray to a single
        numpy.ndarray. If the `computation_tiles` is None, it will never be called.

        It will be called in parallel according to the `merge_pool` parameter provided at
        construction.

        The function will be called with the following positional parameters:

        - fp: Footprint of shape (Y, X)
            The location at which the pixels should be computed.
        - array_per_fp: dict of Footprint to numpy.ndarray
            The pairs of Footprint/numpy.ndarray of each arrays that were computed by
            `compute_array` and that overlap with `fp`.
        - raster: CachedRasterRecipe or None
            The Raster object of the ongoing computation.

        It should return either:

        - a single ndarray of shape (Y, X) if only one channel was computed
            ..
        - a single ndarray of shape (Y, X, C) if one or more channels were computed
            ..

        If `merge_pool` points to a process pool, the `merge_array` function must be picklable and
        the `raster` parameter will be None.

        .. _Automatic Remapping:
        Automatic Remapping
        -------------------
        When creating a recipe you give a *Footprint* through the `fp` parameter. When calling your
        `compute_array` function the scheduler will only ask for slices of `fp`. This means that the
        scheduler takes care of those boilerplate steps:

        - If you request a *Footprint* on a different grid in a `get_data()` call, the scheduler
          **takes care of resampling** the outputs of your `compute*array` function.
        - If you request a *Footprint* partially or fully outside of the raster's extent, the
          scheduler will call your `compute_array` function to get the interior pixels and then
          **pad the output with nodata**.

        This system is flexible and can be deactivated by passing `automatic_remapping=False` to
        the constructor of a *NocacheRasterRecipe*, in this case the scheduler will call your
        `compute_array` function for any kind of *Footprint*; thus your function must be able to
        comply with any request.

        .. _Primitives:
        Primitives
        ----------
        The `queue_data_per_primitive` and `convert_footprint_per_primitive` parameters can be used
        to create dependencies between `dependee` *async rasters* and the *raster recipe* being
        created. The dependee/dependent relation is called primitive/derived throughout buzzard.
        A derived recipe can itself be the primitive of another raster. Pipelines of any depth and
        width can be instanciated that way.

        In `queue_data_per_primitive` you declare a `dependee` by giving it a key of your choice and
        the pointer to the `queue_data` method of `dependee` raster. You can parameterize the
        connection by *currying* the `channels`, `dst_nodata`, `interpolation` and `max_queue_size`
        parameters using `functools.partial`.

        The `convert_footprint_per_primitive` dict should contain the same keys as
        `queue_data_per_primitive`. A value in the dict should be a function that maps a Footprint
        to another Footprint. It can be used for example to request larger rectangles of primitives
        data to compute a derived array.

        e.g. If the primitive raster is an `rgb` image, and the derived raster only needs the green
        channel but with a context of 10 additional pixels on all 4 sides:

        >>> derived = ds.create_raster_recipe(
        ...     # <other parameters>
        ...     queue_data_per_primitive={'green': functools.partial(primitive.queue_data, channels=1)},
        ...     convert_footprint_per_primitive={'green': lambda fp: fp.dilate(10)},
        ... )

        .. _Pools:
        Pools
        -----
        The `*_pool` parameters can be used to select where certain computations occur. Those
        parameters can be of the following types:

        - A *multiprocessing.pool.ThreadPool*, should be the default choice.
        - A *multiprocessing.pool.Pool*, a process pool. Useful for computations that requires the
          GIL or that leaks memory.
        - `None`, to request the scheduler thread to perform the tasks itself. Should be used when
          the computation is very light.
        - A *hashable* (like a *string*), that will map to a pool registered in the *Dataset*. If
          that key is missing from the *Dataset*, a *ThreadPool* with
          `multiprocessing.cpu_count()` workers will be automatically instanciated. When the
          Dataset is closed, the pools instanciated that way will be joined.

        See Also
        --------
        - :py:meth:`Dataset.acreate_raster_recipe`: To skip the `key` assigment
        - :py:meth:`Dataset.create_raster_recipe`: For results `caching`
        - :py:meth:`Dataset.acreate_cached_raster_recipe`: To skip the `key` assigment

        """
        raise NotImplementedError()

[docs]    def create_cached_raster_recipe(
            self, key,

            # raster attributes
            fp, dtype, channel_count, channels_schema=None, sr=None,

            # callbacks running on pool
            compute_array=None, merge_arrays=buzzard.utils.concat_arrays,

            # filesystem
            cache_dir=None, ow=False,

            # primitives
            queue_data_per_primitive=MappingProxyType({}), convert_footprint_per_primitive=None,

            # pools
            computation_pool='cpu', merge_pool='cpu', io_pool='io', resample_pool='cpu',

            # misc
            cache_tiles=(512, 512), computation_tiles=None, max_resampling_size=None,
            debug_observers=()
    ):
        """Create a *cached raster recipe* and register it under `key` within this Dataset.

        Compared to a `NocacheRasterRecipe`, in a `CachedRasterRecipe` the pixels are never computed
        twice. Cache files are used to store and reuse pixels from computations. The cache can even
        be reused between python sessions.

        If you are familiar with `create_raster_recipe` four parameters are new here: `io_pool`,
        `cache_tiles`, `cache_dir` and `ow`. They are all related to file system operations.

        See `create_raster_recipe` method, since it shares most of the features:

        >>> help(CachedRasterRecipe)

        Parameters
        ----------
        key:
            see :py:meth:`Dataset.create_raster` method
        fp:
            see :py:meth:`Dataset.create_raster` method
        dtype:
            see :py:meth:`Dataset.create_raster` method
        channel_count:
            see :py:meth:`Dataset.create_raster` method
        channels_schema:
            see :py:meth:`Dataset.create_raster` method
        sr:
            see :py:meth:`Dataset.create_raster` method
        compute_array:
            see :py:meth:`Dataset.create_raster_recipe` method
        merge_arrays:
            see :py:meth:`Dataset.create_raster_recipe` method
        cache_dir: str or pathlib.Path
            Path to the directory that holds the cache files associated with this raster. If cache
            files are present, they will be reused (or erased if corrupted). If a cache file is
            needed and missing, it will be computed.
        ow: bool
            Overwrite. Whether or not to erase the old cache files contained in `cache_dir`.

            .. warning::
                not only the tiles needed (hence computed) but all buzzard cache files in
                `cache_dir` will be deleted.

        queue_data_per_primitive:
            see :py:meth:`Dataset.create_raster_recipe` method
        convert_footprint_per_primitive:
            see :py:meth:`Dataset.create_raster_recipe` method
        computation_pool:
            see :py:meth:`Dataset.create_raster_recipe` method
        merge_pool:
            see :py:meth:`Dataset.create_raster_recipe` method
        io_pool:
            see :py:meth:`Dataset.create_raster_recipe` method
        resample_pool:
            see :py:meth:`Dataset.create_raster_recipe` method
        cache_tiles: (int, int) or numpy.ndarray of Footprint
            A tiling of the `fp` parameter. Each tile will correspond to one cache file.
            if (int, int): Construct the tiling by calling Footprint.tile with this parameter
        computation_tiles:
            if None: Use the same tiling as `cache_tiles`
            else: see `create_raster_recipe` method
        max_resampling_size: None or int or (int, int)
            see :py:meth:`Dataset.create_raster_recipe` method
        debug_observers: sequence of object
            see :py:meth:`Dataset.create_raster_recipe` method

        Returns
        -------
        source: CachedRasterRecipe
            ..

        See Also
        --------
        - :py:meth:`Dataset.create_raster_recipe`: To skip the `caching`
        - :py:meth:`Dataset.acreate_cached_raster_recipe`: To skip the `key` assigment

        """
        # Parameter checking ***************************************************
        # Classic RasterSource parameters *******************
        if not isinstance(fp, Footprint): # pragma: no cover
            raise TypeError('`fp` should be a Footprint')
        dtype = np.dtype(dtype)
        channel_count = int(channel_count)
        if channel_count <= 0:
            raise ValueError('`channel_count` should be >0')
        channels_schema = _tools.sanitize_channels_schema(channels_schema, channel_count)
        if sr is not None:
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr)
            if not success:
                raise ValueError('Could not transform `sr` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt = payload
        else:
            wkt = None
        del sr
        if wkt is not None:
            fp = self._back.convert_footprint(fp, wkt)

        # Callables ****************************************
        if compute_array is None:
            raise ValueError('Missing `compute_array` parameter')
        if not callable(compute_array):
            raise TypeError('`compute_array` should be callable')
        if not callable(merge_arrays):
            raise TypeError('`merge_arrays` should be callable')

        # Primitives ***************************************
        if convert_footprint_per_primitive is None:
            convert_footprint_per_primitive = {
                name: (lambda fp: fp)
                for name in queue_data_per_primitive.keys()
            }

        if queue_data_per_primitive.keys() != convert_footprint_per_primitive.keys():
            err = 'There should be the same keys in `queue_data_per_primitive` and '
            err += '`convert_footprint_per_primitive`.'
            if queue_data_per_primitive.keys() - convert_footprint_per_primitive.keys():
                err += '\n{} are missing from `convert_footprint_per_primitive`.'.format(
                    queue_data_per_primitive.keys() - convert_footprint_per_primitive.keys()
                )
            if convert_footprint_per_primitive.keys() - queue_data_per_primitive.keys():
                err += '\n{} are missing from `queue_data_per_primitive`.'.format(
                    convert_footprint_per_primitive.keys() - queue_data_per_primitive.keys()
                )
            raise ValueError(err)

        primitives_back = {}
        primitives_kwargs = {}
        for name, met in queue_data_per_primitive.items():
            primitives_back[name], primitives_kwargs[name] = _tools.shatter_queue_data_method(met, name)
            if primitives_back[name].back_ds is not self._back:
                raise ValueError('The `{}` primitive comes from another Dataset'.format(
                    name
                ))

        for name, func in convert_footprint_per_primitive.items():
            if not callable(func):
                raise TypeError('convert_footprint_per_primitive[{}] should be callable'.format(
                    name
                ))

        # Pools ********************************************
        computation_pool = self._back.pools_container._normalize_pool_parameter(
            computation_pool, 'computation_pool'
        )
        merge_pool = self._back.pools_container._normalize_pool_parameter(
            merge_pool, 'merge_pool'
        )
        io_pool = self._back.pools_container._normalize_pool_parameter(
            io_pool, 'io_pool'
        )
        resample_pool = self._back.pools_container._normalize_pool_parameter(
            resample_pool, 'resample_pool'
        )

        # Tilings ******************************************
        if isinstance(cache_tiles, np.ndarray) and cache_tiles.dtype == np.object:
            if not _tools.is_tiling_covering_fp(
                    cache_tiles, fp,
                    allow_outer_pixels=False, allow_overlapping_pixels=False,
            ):
                raise ValueError("`cache_tiles` should be a tiling of raster's Footprint, " +\
                                "without overlap, with `boundary_effect='shrink'`"
                )
        else:
            # Defer the parameter checking to fp.tile
            cache_tiles = fp.tile(cache_tiles, 0, 0, boundary_effect='shrink')

        if computation_tiles is None:
            computation_tiles = cache_tiles
        elif isinstance(computation_tiles, np.ndarray) and computation_tiles.dtype == np.object:
            if not _tools.is_tiling_covering_fp(
                    cache_tiles, fp,
                    allow_outer_pixels=True, allow_overlapping_pixels=True,
            ):
                raise ValueError("`computation_tiles` should be a tiling covering raster's Footprint")
        else:
            # Defer the parameter checking to fp.tile
            computation_tiles = fp.tile(computation_tiles, 0, 0, boundary_effect='shrink')

        # Misc *********************************************
        if max_resampling_size is not None:
            max_resampling_size = int(max_resampling_size)
            if max_resampling_size <= 0:
                raise ValueError('`max_resampling_size` should be >0')

        if cache_dir is None:
            raise ValueError('Missing `cache_dir` parameter')
        if not isinstance(cache_dir, (str, pathlib.Path)):
            raise TypeError('cache_dir should be a string')
        cache_dir = str(cache_dir)
        overwrite = bool(ow)
        del ow

        # Construction *********************************************************
        prox = CachedRasterRecipe(
            self,
            fp, dtype, channel_count, channels_schema, wkt,
            compute_array, merge_arrays,
            cache_dir, overwrite,
            primitives_back, primitives_kwargs, convert_footprint_per_primitive,
            computation_pool, merge_pool, io_pool, resample_pool,
            cache_tiles, computation_tiles,
            max_resampling_size,
            debug_observers,
        )

        # Dataset Registering ***********************************************
        if not isinstance(key, _AnonymousSentry):
            self._register([key], prox)
        else:
            self._register([], prox)
        return prox

[docs]    def acreate_cached_raster_recipe(
            self,

            # raster attributes
            fp, dtype, channel_count, channels_schema=None, sr=None,

            # callbacks running on pool
            compute_array=None, merge_arrays=buzzard.utils.concat_arrays,

            # filesystem
            cache_dir=None, ow=False,

            # primitives
            queue_data_per_primitive=MappingProxyType({}), convert_footprint_per_primitive=None,

            # pools
            computation_pool='cpu', merge_pool='cpu', io_pool='io', resample_pool='cpu',

            # misc
            cache_tiles=(512, 512), computation_tiles=None, max_resampling_size=None,
            debug_observers=()
    ):
        """Create a cached raster reciped anonymously within this Dataset.

        See Dataset.create_cached_raster_recipe

        See Also
        --------
        - :py:meth:`Dataset.create_raster_recipe`: To skip the `caching`
        - :py:meth:`Dataset.create_cached_raster_recipe`: To assign a `key` to this source within the `Dataset`

        """
        return self.create_cached_raster_recipe(
            _AnonymousSentry(),
            fp, dtype, channel_count, channels_schema, sr,
            compute_array, merge_arrays,
            cache_dir, ow,
            queue_data_per_primitive, convert_footprint_per_primitive,
            computation_pool, merge_pool, io_pool, resample_pool,
            cache_tiles, computation_tiles, max_resampling_size,
            debug_observers,
        )

    # Vector entry points *********************************************************************** **
[docs]    def open_vector(self, key, path, layer=None, driver='ESRI Shapefile', options=(), mode='r'):
        """Open a vector file within this Dataset under `key`. Only metadata are kept in memory.

        >>> help(GDALFileVector)

        Parameters
        ----------
        key: hashable (like a string)
            File identifier within Dataset

            To avoid using a `key`, you may use :py:meth:`aopen_vector`
        path: string
            ..
        layer: None or int or string
            ..
        driver: string
            ogr driver to use when opening the file
            http://www.gdal.org/ogr_formats.html
        options: sequence of str
            options for ogr
        mode: one of {'r', 'w'}
            ..

        Returns
        -------
        source: GDALFileVector
            ..

        Example
        -------
        >>> ds.open_vector('trees', '/path/to.shp')
        >>> feature_count = len(ds.trees)

        >>> ds.open_vector('roofs', '/path/to.json', driver='GeoJSON', mode='w')
        >>> fields_list = ds.roofs.fields

        See Also
        --------
        - :py:meth:`Dataset.aopen_vector`: To skip the `key` assigment
        - :py:func:`buzzard.open_vector`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """
        # Parameter checking ***************************************************
        path = str(path)
        if layer is None:
            layer = 0
        elif np.all(np.isreal(layer)):
            layer = int(layer)
        else:
            layer = str(layer)
        driver = str(driver)
        options = [str(arg) for arg in options]
        _ = conv.of_of_mode(mode)

        # Construction dispatch ************************************************
        if driver.lower() == 'memory': # pragma: no cover
            raise ValueError("Can't open a MEMORY vector, user create_vector")
        elif True:
            allocator = lambda: BackGDALFileVector.open_file(
                path, layer, driver, options, mode
            )
            prox = GDALFileVector(self, allocator, options, mode)
        else:
            pass

        # Dataset Registering ***********************************************
        if not isinstance(key, _AnonymousSentry):
            self._register([key], prox)
        else:
            self._register([], prox)
        return prox

[docs]    def aopen_vector(self, path, layer=None, driver='ESRI Shapefile', options=(), mode='r'):
        """Open a vector file anonymously within this Dataset. Only metadata are kept in memory.

        See :py:meth:`~Dataset.open_vector`

        Example
        -------
        >>> trees = ds.aopen_vector('/path/to.shp')
        >>> features_bounds = trees.bounds

        See Also
        --------
        - :py:meth:`Dataset.open_vector`: To assign a `key` to this source within the `Dataset`
        - :py:func:`buzzard.open_vector`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """
        return self.open_vector(_AnonymousSentry(), path, layer, driver, options, mode)

[docs]    def create_vector(self, key, path, type, fields=(), layer=None,
                      driver='ESRI Shapefile', options=(), sr=None, ow=False):
        """Create an empty vector file and register it under `key` within this Dataset. Only metadata
        are kept in memory.

        >>> help(GDALFileVector)
        >>> help(GDALMemoryVector)

        Parameters
        ----------
        key: hashable (like a string)
            File identifier within Dataset

            To avoid using a `key`, you may use :py:meth:`acreate_vector`
        path: string
            Anything that makes sense to GDAL:

                + A path to a file
                + An empty string when using `driver=Memory`
        type: string
            name of a wkb geometry type, without the `wkb` prefix.

            list: http://www.gdal.org/ogr__core_8h.html#a800236a0d460ef66e687b7b65610f12a

        fields: sequence of dict
            Attributes of fields, one dict per field. (see :ref:`Field Attributes` below)
        layer: None or string
            ..
        driver: string
            ogr driver to use when opening the file
            http://www.gdal.org/ogr_formats.html
        options: sequence of str
            options for ogr
        sr: string or None
            Spatial reference of the new file

            In order not to set a spatial reference, use `None`.

            In order to set a spatial reference, use a string that can be `converted to WKT by GDAL
            <https://gdal.org/doxygen/classOGRSpatialReference.html#aec3c6a49533fe457ddc763d699ff8796>`_.
        ow: bool
            Overwrite. Whether or not to erase the existing files.

        Returns
        -------
        source: GDALFileVector or GDALMemoryVector
            The type depends on the `driver` parameter

        Example
        -------
        >>> ds.create_vector('lines', '/path/to.shp', 'linestring')
        >>> geometry_type = ds.lines.type
        >>> ds.lines.insert_data([[0, 0], [1, 1], [1, 2]])

        >>> fields = [
            {'name': 'name', 'type': str},
            {'name': 'count', 'type': 'int32'},
            {'name': 'area', 'type': np.float64, 'width': 5, precision: 18},
            {'name': 'when', 'type': np.datetime64},
        ]
        >>> ds.create_vector('zones', '/path/to.shp', 'polygon', fields)
        >>> field0_type = ds.zones.fields[0]['type']
        >>> ds.zones.insert_data(shapely.geometry.box(10, 10, 15, 15))

        .. _Field Attributes:
        Field Attributes
        ----------------
        Attributes:

        - "name": string
        - "type": string (see :ref:`Field Types` below)
        - "precision": int
        - "width": int
        - "nullable": bool
        - "default": same as `type`

        An attribute missing or None is kept to default value.

        .. _Field Types:
        Field Types
        -----------
        +---------------+------------------------------------------------------------------------+
        | Type          | Type names                                                             |
        +===============+========================================================================+
        | Binary        | "binary", bytes, np.bytes\_, aliases of np.bytes\_                     |
        +---------------+------------------------------------------------------------------------+
        | Date          | "date"                                                                 |
        +---------------+------------------------------------------------------------------------+
        | DateTime      | "datetime", datetime.datetime, np.datetime64, aliases of np.datetime64 |
        +---------------+------------------------------------------------------------------------+
        | Time          | "time"                                                                 |
        +---------------+------------------------------------------------------------------------+
        | Integer       | "integer" np.int32, aliases of np.int32                                |
        +---------------+------------------------------------------------------------------------+
        | Integer64     | "integer64", int, np.int64, aliases of np.int64                        |
        +---------------+------------------------------------------------------------------------+
        | Real          | "real", float, np.float64, aliases of np.float64                       |
        +---------------+------------------------------------------------------------------------+
        | String        | "string", str, np.str\_, aliases of np.str\_                           |
        +---------------+------------------------------------------------------------------------+
        | Integer64List | "integer64list", "int list"                                            |
        +---------------+------------------------------------------------------------------------+
        | IntegerList   | "integerlist"                                                          |
        +---------------+------------------------------------------------------------------------+
        | RealList      | "reallist", "float list"                                               |
        +---------------+------------------------------------------------------------------------+

        See Also
        --------
        - :py:meth:`Dataset.acreate_vector`: To skip the `key` assigment
        - :py:func:`buzzard.create_vector`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """
        type_ = type
        del type

        # Parameter checking ***************************************************
        path = str(path)
        type_ = conv.str_of_wkbgeom(conv.wkbgeom_of_str(type_))
        fields = _tools.normalize_fields_defn(fields)
        if layer is None:
            layer = '.'.join(os.path.basename(path).split('.')[:-1])
        else:
            layer = str(layer)
        driver = str(driver)
        options = [str(arg) for arg in options]
        ow = bool(ow)
        if sr is None:
            wkt = None
        else:
            success, payload = Catch(osr.GetUserInputAsWKT, nonzero_int_is_error=True)(sr)
            if not success:
                raise ValueError('Could not transform `sr` to `wkt` (gdal error: `{}`)'.format(
                    payload[1]
                ))
            wkt = payload

        # Construction dispatch ************************************************
        if driver.lower() == 'memory':
            allocator = lambda: BackGDALFileVector.create_file(
                '', type_, fields, layer, 'Memory', options, wkt, False,
            )
            prox = GDALMemoryVector(self, allocator, options)
        elif True:
            allocator = lambda: BackGDALFileVector.create_file(
                path, type_, fields, layer, driver, options, wkt, ow
            )
            prox = GDALFileVector(self, allocator, options, 'w')
        else:
            pass

        # Dataset Registering ***********************************************
        if not isinstance(key, _AnonymousSentry):
            self._register([key], prox)
        else:
            self._register([], prox)
        return prox

[docs]    def acreate_vector(self, path, type, fields=(), layer=None,
                       driver='ESRI Shapefile', options=(), sr=None, ow=False):
        """Create a vector file anonymously within this Dataset. Only metadata are kept in memory.

        See :py:meth:`~Dataset.create_vector`

        Example
        -------
        >>> lines = ds.acreate_vector('/path/to.shp', 'linestring')
        >>> file_proj4 = lines.proj4_stored

        See Also
        --------
        - :py:meth:`Dataset.create_vector`: To assign a `key` to this source within the `Dataset`
        - :py:func:`buzzard.create_vector`: To skip the `key` assigment and the explicit `Dataset` instanciation

        """
        return self.create_vector(_AnonymousSentry(), path, type, fields, layer,
                                  driver, options, sr, ow)

    # Cleanup *********************************************************************************** **
[docs]    def __del__(self):
        if not self._ds_closed:
            self.close()

    @property
    def close(self):
        """Close the Dataset with a call or a context management.
        The `close` attribute returns an object that can be both called and used in a with statement

        The Dataset can be closed manually or automatically when garbage collected, it is safer
        to do it manually.

        The internal steps are:

        - Stopping the scheduler
        - Joining the mp.Pool that have been automatically allocated
        - Closing all sources

        Examples
        --------
        >>> ds = buzz.Dataset()
        ... # code...
        ... ds.close()

        >>> with buzz.Dataset().close as ds
        ...     # code...

        Caveat
        ------
        When using a scheduler, some memory leaks may still occur after closing a Dataset.
        Possible origins:

        - https://bugs.python.org/issue34172 (update your python to >=3.6.7)
        - Gdal cache not flushed (not a leak)
        - The gdal version
        - https://stackoverflow.com/a/1316799 (not a leak)
        - Some unknown leak in the python `threading` or `multiprocessing` standard library
        - Some unknown library leaking memory on the `C` side
        - Some unknown library storing data in global variables

        You can use a `debug_observer` with an `on_object_allocated` method to track large objects
        allocated in the scheduler. It will likely not be the source of the problem. If you
        even find a source of leaks please contact the buzzard team.
        https://github.com/airware/buzzard/issues

        """
        if self._ds_closed:
            raise RuntimeError("Dataset already closed")

        def _close():
            if self._ds_closed:
                raise RuntimeError("Dataset already closed")
            self._ds_closed = True

            # Tell scheduler to stop, wait until it is done
            self._back.stop_scheduler()

            # Safely release all resources
            self._back.pools_container._close()
            for source in list(self._keys_of_source.keys()):
                source.close()

        return _CloseRoutine(self, _close)

    # Source infos ******************************************************************************* **
[docs]    def __getitem__(self, key):
        """Retrieve a source from its key"""
        return self._source_of_key[key]

[docs]    def __contains__(self, item):
        """Is key or source registered in Dataset"""
        if isinstance(item, ASource):
            return item in self._keys_of_source
        return item in self._source_of_key

[docs]    def items(self):
        """Generate the pair of (keys_of_source, source) for all proxies"""
        for source, keys in self._keys_of_source.items():
            yield list(keys), source

[docs]    def keys(self):
        """Generate all source keys"""
        for source, keys in self._keys_of_source.items():
            for key in keys:
                yield key

[docs]    def values(self):
        """Generate all proxies"""
        for source, _ in self._keys_of_source.items():
            yield source

[docs]    def __len__(self):
        """Retrieve source count registered within this Dataset"""
        return len(self._keys_of_source)

    # Spatial reference getters ***************************************************************** **
    @property
    def proj4(self):
        """Dataset's work spatial reference in WKT proj4.
        Returns None if `mode 1`.
        """
        if self._back.wkt_work is None:
            return None
        return osr.SpatialReference(self._back.wkt_work).ExportToProj4()

    @property
    def wkt(self):
        """Dataset's work spatial reference in WKT format.
        Returns None if `mode 1`.
        """
        return self._back.wkt_work

    # Activation mechanisms ********************************************************************* **
    @property
    def active_count(self):
        """Count how many driver objects are currently active"""
        return self._back.active_count()

[docs]    def activate_all(self):
        """Activate all deactivable proxies.
        May raise an exception if the number of sources is greater than `max_activated`
        """
        proxs = [
            prox
            for prox in self._keys_of_source.keys()
            if isinstance(prox, APooledEmissary)
        ]
        total = len(proxs)

        if self._back.max_active < total:
            raise RuntimeError("Can't activate all pooled sources at the same time: {} pooled sources and max_activated is {}".format(
                total, self._back.max_active,
            ))

        # Hacky implementation to get the expected behavior
        # TODO: Implement that routine in the back driver pool. Is it possible? We need to call `.activate`
        i = 0
        for prox in itertools.cycle(proxs):
            if i == total:
                break
            if not prox.active:
                prox.activate()
                i = 1
            else:
                i += 1

[docs]    def deactivate_all(self):
        """Deactivate all deactivable proxies. Useful to flush all files to disk"""
        for prox in self._keys_of_source.keys():
            if prox.active:
                prox.deactivate()

    # Pools infos ******************************************************************************* **
    @property
    def pools(self):
        """Get the Pool Container.

        >>> help(PoolsContainer)

        """
        return self._back.pools_container

    # Deprecation ******************************************************************************* **
    open_araster = deprecation_pool.wrap_method(
        aopen_raster,
        '0.4.4'
    )
    create_araster = deprecation_pool.wrap_method(
        acreate_raster,
        '0.4.4'
    )
    open_avector = deprecation_pool.wrap_method(
        aopen_vector,
        '0.4.4'
    )
    create_avector = deprecation_pool.wrap_method(
        acreate_vector,
        '0.4.4'
    )

    # The end *********************************************************************************** **
    # ******************************************************************************************* **

if sys.version_info < (3, 6):
    # https://www.python.org/dev/peps/pep-0487/
    for k, v in Dataset.__dict__.items():
        if hasattr(v, '__set_name__'):
            v.__set_name__(Dataset, k)

[docs]def open_raster(*args, **kwargs):
    """Shortcut for `Dataset().aopen_raster`

    >>> help(Dataset.open_raster)

    See Also
    --------
    - :py:func:`Dataset.open_raster`
    - :py:meth:`Dataset.aopen_raster`

    """
    return Dataset().aopen_raster(*args, **kwargs)

[docs]def create_raster(*args, **kwargs):
    """Shortcut for `Dataset().acreate_raster`

    >>> help(Dataset.create_raster)

    See Also
    --------
    - :py:func:`Dataset.create_raster`
    - :py:meth:`Dataset.acreate_raster`

    """
    return Dataset().acreate_raster(*args, **kwargs)

[docs]def open_vector(*args, **kwargs):
    """Shortcut for `Dataset().aopen_vector`

    >>> help(Dataset.open_vector)

    See Also
    --------
    - :py:func:`Dataset.open_vector`
    - :py:meth:`Dataset.aopen_vector`

    """
    return Dataset().aopen_vector(*args, **kwargs)

[docs]def create_vector(*args, **kwargs):
    """Shortcut for `Dataset().acreate_vector`

    >>> help(Dataset.create_vector)

    See Also
    --------
    - :py:func:`Dataset.create_vector`
    - :py:meth:`Dataset.acreate_vector`

    """
    return Dataset().acreate_vector(*args, **kwargs)

[docs]def wrap_numpy_raster(*args, **kwargs):
    """Shortcut for `Dataset().awrap_numpy_raster`

    >>> help(Dataset.wrap_numpy_raster)

    See Also
    --------
    - :py:func:`Dataset.wrap_numpy_raster`
    - :py:meth:`Dataset.awrap_numpy_raster`

    """
    return Dataset().awrap_numpy_raster(*args, **kwargs)

_CloseRoutine = type('_CloseRoutine', (_tools.CallOrContext,), {
    '__doc__': Dataset.close.__doc__,

})

DataSource = deprecation_pool.wrap_class(Dataset, 'DataSource', '0.6.0')

class _AnonymousSentry(object):
    """Sentry object used to instanciate anonymous proxies"""