diff -Nru datacube-1.8.7/conda-environment.yml datacube-1.8.9/conda-environment.yml --- datacube-1.8.7/conda-environment.yml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/conda-environment.yml 2022-11-17 00:47:28.000000000 +0000 @@ -22,14 +22,15 @@ - pyproj - shapely - jsonschema - - lark-parser + - lark - netcdf4 - numpy - pandas - psycopg2 - python-dateutil - pyyaml - - rasterio >=1.0.2 + - rasterio >=1.3.2 - sqlalchemy - - xarray >=0.9 + - GeoAlchemy2 + - xarray >=0.9,!=2022.6.0 - toolz diff -Nru datacube-1.8.7/datacube/api/core.py datacube-1.8.9/datacube/api/core.py --- datacube-1.8.7/datacube/api/core.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/api/core.py 2022-11-17 00:47:28.000000000 +0000 @@ -27,7 +27,7 @@ from ..drivers import new_datasource -class TerminateCurrentLoad(Exception): +class TerminateCurrentLoad(Exception): # noqa: N818 """ This exception is raised by user code from `progress_cbk` to terminate currently running `.load` """ @@ -126,16 +126,16 @@ # Optionally compute dataset count for each product and add to row/cols # Product lists are sorted by product name to ensure 1:1 match - if dataset_count: - + if dataset_count: + # Load counts counts = [(p.name, c) for p, c in self.index.datasets.count_by_product()] - + # Sort both rows and counts by product name from operator import itemgetter rows = sorted(rows, key=itemgetter(0)) counts = sorted(counts, key=itemgetter(0)) - + # Add sorted count to each existing row rows = [row + [count[1]] for row, count in zip(rows, counts)] cols = cols + ['dataset_count'] @@ -181,10 +181,11 @@ #: pylint: disable=too-many-arguments, too-many-locals def load(self, product=None, measurements=None, output_crs=None, resolution=None, resampling=None, - skip_broken_datasets=False, dask_chunks=None, like=None, fuse_func=None, align=None, - datasets=None, dataset_predicate=None, progress_cbk=None, **query): + skip_broken_datasets=False, dask_chunks=None, like=None, fuse_func=None, align=None, + datasets=None, dataset_predicate=None, progress_cbk=None, patch_url=None, **query): """ - Load data as an ``xarray.Dataset`` object. Each measurement will be a data variable in the :class:`xarray.Dataset`. + Load data as an ``xarray.Dataset`` object. + Each measurement will be a data variable in the :class:`xarray.Dataset`. See the `xarray documentation `_ for usage of the :class:`xarray.Dataset` and :class:`xarray.DataArray` objects. @@ -279,7 +280,8 @@ :param list(str) measurements: Measurements name or list of names to be included, as listed in :meth:`list_measurements`. - These will be loaded as individual ``xr.DataArray`` variables in the output ``xarray.Dataset`` object. + These will be loaded as individual ``xr.DataArray`` variables in + the output ``xarray.Dataset`` object. If a list is specified, the measurements will be returned in the order requested. By default all available measurements are included. @@ -289,8 +291,8 @@ For example: ``'x', 'y', 'time', 'crs'``. :param str output_crs: - The CRS of the returned data, for example ``EPSG:3577``. If no CRS is supplied, the CRS of the stored data is used - if available. + The CRS of the returned data, for example ``EPSG:3577``. + If no CRS is supplied, the CRS of the stored data is used if available. This differs from the ``crs`` parameter desribed above, which is used to define the CRS of the coordinates in the query itself. @@ -358,7 +360,8 @@ :param function dataset_predicate: Optional. A function that can be passed to restrict loaded datasets. A predicate function should - take a :class:`datacube.model.Dataset` object (e.g. as returned from :meth:`find_datasets`) and return a boolean. + take a :class:`datacube.model.Dataset` object (e.g. as returned from :meth:`find_datasets`) and + return a boolean. For example, loaded data could be filtered to January observations only by passing the following predicate function that returns True for datasets acquired in January:: @@ -373,6 +376,10 @@ if supplied will be called for every file read with ``files_processed_so_far, total_files``. This is only applicable to non-lazy loads, ignored when using dask. + :param Callable[[str], str], patch_url: + if supplied, will be used to patch/sign the url(s), as required to access some commercial archives + (e.g. Microsoft Planetary Computer). + :return: Requested data in a :class:`xarray.Dataset` @@ -431,7 +438,8 @@ dask_chunks=dask_chunks, skip_broken_datasets=skip_broken_datasets, progress_cbk=progress_cbk, - extra_dims=extra_dims) + extra_dims=extra_dims, + patch_url=patch_url) return result @@ -618,7 +626,7 @@ @staticmethod def _dask_load(sources, geobox, measurements, dask_chunks, - skip_broken_datasets=False, extra_dims=None): + skip_broken_datasets=False, extra_dims=None, patch_url=None): chunk_sizes = _calculate_chunk_sizes(sources, geobox, dask_chunks, extra_dims) needed_irr_chunks = chunk_sizes[0] if extra_dims: @@ -648,14 +656,16 @@ measurement, chunks=chunks, skip_broken_datasets=skip_broken_datasets, - extra_dims=extra_dims) + extra_dims=extra_dims, + patch_url=patch_url) return Datacube.create_storage(sources.coords, geobox, measurements, data_func, extra_dims) @staticmethod def _xr_load(sources, geobox, measurements, skip_broken_datasets=False, - progress_cbk=None, extra_dims=None): + progress_cbk=None, extra_dims=None, + patch_url=None): def mk_cbk(cbk): if cbk is None: @@ -700,7 +710,8 @@ try: _fuse_measurement(data_slice, datasets, geobox, m, skip_broken_datasets=skip_broken_datasets, - progress_cbk=_cbk, extra_dim_index=extra_dim_index) + progress_cbk=_cbk, extra_dim_index=extra_dim_index, + patch_url=patch_url) except (TerminateCurrentLoad, KeyboardInterrupt): data.attrs['dc_partial_load'] = True return data @@ -710,7 +721,7 @@ @staticmethod def load_data(sources, geobox, measurements, resampling=None, fuse_func=None, dask_chunks=None, skip_broken_datasets=False, - progress_cbk=None, extra_dims=None, + progress_cbk=None, extra_dims=None, patch_url=None, **extra): """ Load data from :meth:`group_datasets` into an :class:`xarray.Dataset`. @@ -754,6 +765,9 @@ :param ExtraDimensions extra_dims: A ExtraDimensions describing the any additional dimensions on top of (t, y, x) + :param Callable[[str], str], patch_url: + if supplied, will be used to patch/sign the url(s), as required to access some commercial archives. + :rtype: xarray.Dataset .. seealso:: :meth:`find_datasets` :meth:`group_datasets` @@ -763,12 +777,14 @@ if dask_chunks is not None: return Datacube._dask_load(sources, geobox, measurements, dask_chunks, skip_broken_datasets=skip_broken_datasets, - extra_dims=extra_dims) + extra_dims=extra_dims, + patch_url=patch_url) else: return Datacube._xr_load(sources, geobox, measurements, skip_broken_datasets=skip_broken_datasets, progress_cbk=progress_cbk, - extra_dims=extra_dims) + extra_dims=extra_dims, + patch_url=patch_url) def __str__(self): return "Datacube".format(self.index) @@ -882,24 +898,29 @@ yield dataset -def fuse_lazy(datasets, geobox, measurement, skip_broken_datasets=False, prepend_dims=0, extra_dim_index=None): +def fuse_lazy(datasets, geobox, measurement, + skip_broken_datasets=False, prepend_dims=0, extra_dim_index=None, patch_url=None): prepend_shape = (1,) * prepend_dims data = numpy.full(geobox.shape, measurement.nodata, dtype=measurement.dtype) _fuse_measurement(data, datasets, geobox, measurement, skip_broken_datasets=skip_broken_datasets, - extra_dim_index=extra_dim_index) + extra_dim_index=extra_dim_index, + patch_url=patch_url) return data.reshape(prepend_shape + geobox.shape) def _fuse_measurement(dest, datasets, geobox, measurement, skip_broken_datasets=False, progress_cbk=None, - extra_dim_index=None): + extra_dim_index=None, + patch_url=None): srcs = [] for ds in datasets: src = None with ignore_exceptions_if(skip_broken_datasets): - src = new_datasource(BandInfo(ds, measurement.name, extra_dim_index=extra_dim_index)) + src = new_datasource( + BandInfo(ds, measurement.name, extra_dim_index=extra_dim_index, patch_url=patch_url) + ) if src is None: if not skip_broken_datasets: @@ -975,7 +996,8 @@ measurement, chunks, skip_broken_datasets=False, - extra_dims=None): + extra_dims=None, + patch_url=None): dsk = dsk.copy() # this contains mapping from dataset id to dataset object token = uuid.uuid4().hex @@ -1030,11 +1052,11 @@ # Do extra_dim subsetting here index_subset = extra_dims.measurements_index(measurement.extra_dim) for result_index, extra_dim_index in enumerate(range(*index_subset)): - dsk[key_prefix + (result_index,) + idx] = val + (extra_dim_index,) + dsk[key_prefix + (result_index,) + idx] = val + (extra_dim_index, patch_url) else: # Get extra_dim index if available extra_dim_index = measurement.get('extra_dim_index', None) - dsk[key_prefix + idx] = val + (extra_dim_index,) + dsk[key_prefix + idx] = val + (extra_dim_index, patch_url) y_shapes = [grid_chunks[0]]*gbt.shape[0] x_shapes = [grid_chunks[1]]*gbt.shape[1] diff -Nru datacube-1.8.7/datacube/api/grid_workflow.py datacube-1.8.9/datacube/api/grid_workflow.py --- datacube-1.8.7/datacube/api/grid_workflow.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/api/grid_workflow.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,9 +3,7 @@ # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import logging -import numpy import xarray -from itertools import groupby from collections import OrderedDict import pandas as pd diff -Nru datacube-1.8.7/datacube/api/query.py datacube-1.8.9/datacube/api/query.py --- datacube-1.8.7/datacube/api/query.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/api/query.py 2022-11-17 00:47:28.000000000 +0000 @@ -46,7 +46,7 @@ self.sort_key = sort_key if group_key is None: - group_key = lambda datasets: group_by_func(datasets[0]) + group_key = lambda datasets: group_by_func(datasets[0]) # noqa: E731 self.group_key = group_key diff -Nru datacube-1.8.7/datacube/_celery_runner.py datacube-1.8.9/datacube/_celery_runner.py --- datacube-1.8.7/datacube/_celery_runner.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/_celery_runner.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,272 +0,0 @@ -# This file is part of the Open Data Cube, see https://opendatacube.org for more information -# -# Copyright (c) 2015-2020 ODC Contributors -# SPDX-License-Identifier: Apache-2.0 -# -# type: ignore -import cloudpickle -import logging -from celery import Celery -from time import sleep -import redis -import os -import kombu.serialization - -from celery.backends import base as celery_base -_LOG = logging.getLogger(__name__) - -# This can be changed via environment variable `REDIS` -REDIS_URL = 'redis://localhost:6379/0' - -kombu.serialization.registry.register( - 'cloudpickle', - cloudpickle.dumps, cloudpickle.loads, - content_type='application/x-python-cloudpickle', - content_encoding='binary' -) - -# Tell celery that it's ok to serialise exceptions using cloudpickle. -celery_base.EXCEPTION_ABLE_CODECS = celery_base.EXCEPTION_ABLE_CODECS.union({'cloudpickle'}) - - -def mk_celery_app(addr=None): - if addr is None: - url = os.environ.get('REDIS', REDIS_URL) - else: - url = 'redis://{}:{}/0'.format(*addr) - - _app = Celery('datacube_task', broker=url, backend=url) - - _app.conf.update( - task_serializer='cloudpickle', - result_serializer='cloudpickle', - event_serializer='cloudpickle', - accept_content=['cloudpickle', 'json', 'pickle'] - ) - - return _app - - -# Celery worker launch script expects to see app object at the top level -# pylint: disable=invalid-name -app = mk_celery_app() - - -def set_address(host, port=6379, db=0, password=None): - if password is None: - url = 'redis://{}:{}/{}'.format(host, port, db) - else: - url = 'redis://:{}@{}:{}/{}'.format(password, host, port, db) - - app.conf.update(result_backend=url, - broker_url=url) - - -@app.task() -def run_function(func, *args, **kwargs): - return func(*args, **kwargs) - - -def launch_worker(host, port=6379, password=None, nprocs=None): - if password == '': - password = get_redis_password(generate_if_missing=False) - - set_address(host, port, password=password) - - argv = ['worker', '-A', 'datacube._celery_runner', '-E', '-l', 'INFO'] - if nprocs is not None: - argv.extend(['-c', str(nprocs)]) - - app.worker_main(argv) - - -def get_redis_password(generate_if_missing=False): - from .utils import write_user_secret_file, slurp, gen_password - - REDIS_PASSWORD_FILE = '.datacube-redis' - - password = slurp(REDIS_PASSWORD_FILE, in_home_dir=True) - if password is not None: - return password - - if generate_if_missing: - password = gen_password(12) - write_user_secret_file(password, REDIS_PASSWORD_FILE, in_home_dir=True) - - return password - - -class CeleryExecutor(object): - def __init__(self, host=None, port=None, password=None): - # print('Celery: {}:{}'.format(host, port)) - self._shutdown = None - - if port or host or password: - if password == '': - password = get_redis_password(generate_if_missing=True) - - set_address(host if host else 'localhost', - port if port else 6379, - password=password) - - host = host if host else 'localhost' - port = port if port else 6379 - - if not check_redis(host, port, password): - if host in ['localhost', '127.0.0.1']: - self._shutdown = launch_redis(port if port else 6379, password=password) - else: - raise IOError("Can't connect to redis server @ {}:{}".format(host, port)) - - def __del__(self): - if self._shutdown: - app.control.shutdown() - sleep(1) - self._shutdown() - - def __repr__(self): - return 'CeleryRunner' - - def submit(self, func, *args, **kwargs): - _LOG.warning("WARNING: Celery executor is deprecated and will be removed in a future release.") - return run_function.delay(func, *args, **kwargs) - - def map(self, func, iterable): - return [self.submit(func, data) for data in iterable] - - @staticmethod - def get_ready(futures): - completed = [] - failed = [] - pending = [] - for f in futures: - if f.ready(): - if f.failed(): - failed.append(f) - else: - completed.append(f) - else: - pending.append(f) - return completed, failed, pending - - @staticmethod - def as_completed(futures): - while len(futures) > 0: - pending = [] - - for promise in futures: - if promise.ready(): - yield promise - else: - pending.append(promise) - - if len(pending) == len(futures): - # If no change detected sleep for a bit - # TODO: this is sub-optimal, not sure what other options are - # though? - sleep(0.1) - - futures = pending - - @classmethod - def next_completed(cls, futures, default): - results = list(futures) - if not results: - return default, results - result = next(cls.as_completed(results), default) - results.remove(result) - return result, results - - @staticmethod - def results(futures): - return [future.get() for future in futures] - - @staticmethod - def result(future): - return future.get() - - @staticmethod - def release(future): - future.forget() - - -def check_redis(host='localhost', port=6379, password=None): - if password == '': - password = get_redis_password() - - server = redis.Redis(host, port, password=password) - try: - server.ping() - except redis.exceptions.ConnectionError: - return False - except redis.exceptions.ResponseError as error: - print('Redis responded with an error: {}'.format(error)) - return False - return True - - -def launch_redis(port=6379, password=None, **kwargs): - import tempfile - from os import path - import subprocess - import shutil - from .utils import write_user_secret_file - - def stringify(v): - if isinstance(v, str): - return '"' + v + '"' if v.find(' ') >= 0 else v - - if isinstance(v, bool): - return {True: 'yes', False: 'no'}[v] - - return str(v) - - def fix_key(k): - return k.replace('_', '-') - - def write_config(params, cfgfile): - lines = ['{} {}'.format(fix_key(k), stringify(v)) for k, v in params.items()] - cfg_txt = '\n'.join(lines) - write_user_secret_file(cfg_txt, cfgfile) - - workdir = tempfile.mkdtemp(prefix='redis-') - - defaults = dict(maxmemory_policy='noeviction', - daemonize=True, - port=port, - databases=4, - maxmemory="100mb", - hz=50, - loglevel='notice', - pidfile=path.join(workdir, 'redis.pid'), - logfile=path.join(workdir, 'redis.log')) - - if password is not None: - if password == '': - password = get_redis_password(generate_if_missing=True) - - defaults['requirepass'] = password - else: - password = defaults.get('requirepass', None) - - defaults.update(kwargs) - - cfgfile = path.join(workdir, 'redis.cfg') - write_config(defaults, cfgfile) - - def cleanup(): - shutil.rmtree(workdir) - - def shutdown(): - server = redis.Redis('localhost', port, password=password) - server.shutdown() - sleep(1) - cleanup() - - try: - subprocess.check_call(['redis-server', cfgfile]) - except subprocess.CalledProcessError: - cleanup() - return False - - return shutdown diff -Nru datacube-1.8.7/datacube/config.py datacube-1.8.9/datacube/config.py --- datacube-1.8.7/datacube/config.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/config.py 2022-11-17 00:47:28.000000000 +0000 @@ -10,7 +10,7 @@ from pathlib import Path import configparser from urllib.parse import unquote_plus, urlparse, parse_qsl -from typing import Any, Dict, Iterable, MutableMapping, Optional, Tuple, Union, cast +from typing import Any, Dict, Iterable, MutableMapping, Optional, Tuple, Union PathLike = Union[str, 'os.PathLike[Any]'] diff -Nru datacube-1.8.7/datacube/drivers/netcdf/_safestrings.py datacube-1.8.9/datacube/drivers/netcdf/_safestrings.py --- datacube-1.8.7/datacube/drivers/netcdf/_safestrings.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/netcdf/_safestrings.py 2022-11-17 00:47:28.000000000 +0000 @@ -75,7 +75,7 @@ return _VariableProxy(var) #: pylint: disable=invalid-name - def createVariable(self, *args, **kwargs): + def createVariable(self, *args, **kwargs): # noqa: N802 new_var = super(_NC4DatasetProxy, self).createVariable(*args, **kwargs) return _VariableProxy(new_var) diff -Nru datacube-1.8.7/datacube/drivers/netcdf/writer.py datacube-1.8.9/datacube/drivers/netcdf/writer.py --- datacube-1.8.7/datacube/drivers/netcdf/writer.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/netcdf/writer.py 2022-11-17 00:47:28.000000000 +0000 @@ -192,7 +192,7 @@ # e.g. http://spatialreference.org/ref/sr-org/mexico-inegi-lambert-conformal-conic/ crs_var.grid_mapping_name = cf['grid_mapping_name'] - crs_var.standard_parallel = cf['standard_parallel'] + crs_var.standard_parallel = cf['standard_parallel'] crs_var.latitude_of_projection_origin = cf['latitude_of_projection_origin'] crs_var.longitude_of_central_meridian = cf['longitude_of_central_meridian'] crs_var.false_easting = cf['false_easting'] diff -Nru datacube-1.8.7/datacube/drivers/postgis/_api.py datacube-1.8.9/datacube/drivers/postgis/_api.py --- datacube-1.8.7/datacube/drivers/postgis/_api.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/_api.py 2022-11-17 00:47:28.000000000 +0000 @@ -13,51 +13,50 @@ Persistence API implementation for postgis. """ +import json import logging import uuid # noqa: F401 from sqlalchemy import cast -from sqlalchemy import delete -from sqlalchemy import select, text, bindparam, and_, or_, func, literal, distinct +from sqlalchemy import delete, update +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy import select, text, and_, or_, func from sqlalchemy.dialects.postgresql import INTERVAL -from sqlalchemy.dialects.postgresql import JSONB, insert -from sqlalchemy.exc import IntegrityError -from typing import Iterable, Tuple +from typing import Iterable, Tuple, Sequence -from datacube.index.exceptions import MissingRecordError from datacube.index.fields import OrExpression from datacube.model import Range +from datacube.utils import geometry +from datacube.utils.geometry import CRS, Geometry from . import _core from . import _dynamic as dynamic from ._fields import parse_fields, Expression, PgField, PgExpression # noqa: F401 from ._fields import NativeField, DateDocField, SimpleDocField -from ._schema import DATASET, DATASET_SOURCE, METADATA_TYPE, DATASET_LOCATION, PRODUCT +from ._schema import MetadataType, Product, \ + Dataset, DatasetSource, DatasetLocation, SelectedDatasetLocation +from ._spatial import geom_alchemy from .sql import escape_pg_identifier -def _dataset_uri_field(table): - return table.c.uri_scheme + ':' + table.c.uri_body - - -# Fields for selecting dataset with uris -# Need to alias the table, as queries may join the location table for filtering. -SELECTED_DATASET_LOCATION = DATASET_LOCATION.alias('selected_dataset_location') -_DATASET_SELECT_FIELDS = ( - DATASET, - # All active URIs, from newest to oldest - func.array( - select([ - _dataset_uri_field(SELECTED_DATASET_LOCATION) - ]).where( - and_( - SELECTED_DATASET_LOCATION.c.dataset_ref == DATASET.c.id, - SELECTED_DATASET_LOCATION.c.archived == None - ) - ).order_by( - SELECTED_DATASET_LOCATION.c.added.desc(), - SELECTED_DATASET_LOCATION.c.id.desc() +# Make a function because it's broken +def _dataset_select_fields(): + return ( + Dataset, + # All active URIs, from newest to oldest + func.array( + select( + SelectedDatasetLocation.uri + ).where( + and_( + SelectedDatasetLocation.dataset_ref == Dataset.id, + SelectedDatasetLocation.archived == None + ) + ).order_by( + SelectedDatasetLocation.added.desc(), + SelectedDatasetLocation.id.desc() + ).label('uris') ).label('uris') - ).label('uris') -) + ) + PGCODE_UNIQUE_CONSTRAINT = '23505' PGCODE_FOREIGN_KEY_VIOLATION = '23503' @@ -83,42 +82,42 @@ 'id': NativeField( 'id', 'Dataset UUID', - DATASET.c.id + Dataset.id ), 'indexed_time': NativeField( 'indexed_time', 'When dataset was indexed', - DATASET.c.added + Dataset.added ), 'indexed_by': NativeField( 'indexed_by', 'User who indexed the dataset', - DATASET.c.added_by + Dataset.added_by ), 'product': NativeField( 'product', 'Product name', - PRODUCT.c.name + Product.name ), - 'dataset_type_id': NativeField( - 'dataset_type_id', + 'product_id': NativeField( + 'product_id', 'ID of a dataset type', - DATASET.c.dataset_type_ref + Dataset.product_ref ), 'metadata_type': NativeField( 'metadata_type', 'Metadata type name of dataset', - METADATA_TYPE.c.name + MetadataType.name ), 'metadata_type_id': NativeField( 'metadata_type_id', 'ID of a metadata type', - DATASET.c.metadata_type_ref + Dataset.metadata_type_ref ), 'metadata_doc': NativeField( 'metadata_doc', 'Full metadata document', - DATASET.c.metadata + Dataset.metadata_doc ), # Fields that can affect row selection @@ -127,8 +126,8 @@ 'uri': NativeField( 'uri', "Dataset URI", - DATASET_LOCATION.c.uri_body, - alchemy_expression=_dataset_uri_field(DATASET_LOCATION), + DatasetLocation.uri_body, + alchemy_expression=DatasetLocation.uri, affects_row_selection=True ), } @@ -144,21 +143,21 @@ creation_time=DateDocField( 'creation_time', 'Time when dataset was created (processed)', - DATASET.c.metadata, + Dataset.metadata_doc, False, offset=dataset_section.get('creation_dt') or ['creation_dt'] ), format=SimpleDocField( 'format', 'File format (GeoTiff, NetCDF)', - DATASET.c.metadata, + Dataset.metadata_doc, False, offset=dataset_section.get('format') or ['format', 'name'] ), label=SimpleDocField( 'label', 'Label', - DATASET.c.metadata, + Dataset.metadata_doc, False, offset=dataset_section.get('label') or ['label'] ), @@ -168,20 +167,27 @@ fields.update( parse_fields( dataset_section['search_fields'], - DATASET.c.metadata + Dataset.metadata_doc ) ) return fields class PostgisDbAPI(object): - def __init__(self, connection): + def __init__(self, parentdb, connection): + self._db = parentdb self._connection = connection @property def in_transaction(self): return self._connection.in_transaction() + def begin(self): + self._connection.execute(text('BEGIN')) + + def commit(self): + self._connection.execute(text('COMMIT')) + def rollback(self): self._connection.execute(text('ROLLBACK')) @@ -197,25 +203,16 @@ :return: whether it was inserted :rtype: bool """ - dataset_type_ref = bindparam('dataset_type_ref') + metadata_subquery = select(Product.metadata_type_ref).where(Product.id == product_id).scalar_subquery() ret = self._connection.execute( - insert(DATASET).from_select( - ['id', 'dataset_type_ref', 'metadata_type_ref', 'metadata'], - select([ - bindparam('id'), dataset_type_ref, - select([ - PRODUCT.c.metadata_type_ref - ]).where( - PRODUCT.c.id == dataset_type_ref - ).label('metadata_type_ref'), - bindparam('metadata', type_=JSONB) - ]) + insert(Dataset).values( + id=dataset_id, + product_ref=product_id, + metadata=metadata_doc, + metadata_type_ref=metadata_subquery ).on_conflict_do_nothing( index_elements=['id'] - ), - id=dataset_id, - dataset_type_ref=product_id, - metadata=metadata_doc + ) ) return ret.rowcount > 0 @@ -227,11 +224,10 @@ :type product_id: int """ res = self._connection.execute( - DATASET.update().returning(DATASET.c.id).where( - and_( - DATASET.c.id == dataset_id, - DATASET.c.dataset_type_ref == product_id - ) + update(Dataset).returning(Dataset.id).where( + Dataset.id == dataset_id + ).where( + Dataset.product_ref == product_id ).values( metadata=metadata_doc ) @@ -252,23 +248,90 @@ scheme, body = _split_uri(uri) r = self._connection.execute( - insert(DATASET_LOCATION).on_conflict_do_nothing( + insert(DatasetLocation).on_conflict_do_nothing( index_elements=['uri_scheme', 'uri_body', 'dataset_ref'] - ), - dataset_ref=dataset_id, - uri_scheme=scheme, - uri_body=body, + ).values( + dataset_ref=dataset_id, + uri_scheme=scheme, + uri_body=body, + ) ) return r.rowcount > 0 + @staticmethod + def _sanitise_extent(extent, crs): + if not crs.valid_region: + # No valid region on CRS, just reproject + return extent.to_crs(crs) + geo_extent = extent.to_crs(CRS("EPSG:4326")) + if crs.valid_region.contains(geo_extent): + # Valid region contains extent, just reproject + return extent.to_crs(crs) + if not crs.valid_region.intersects(geo_extent): + # Extent is entirely outside of valid region - return None + return None + # Clip to valid region and reproject + valid_extent = geo_extent & crs.valid_region + if valid_extent.wkt == "POLYGON EMPTY": + # Extent is entirely outside of valid region - return None + return None + return valid_extent.to_crs(crs) + + def insert_dataset_spatial(self, dataset_id, crs, extent): + """ + Add a spatial index entry for a dataset if it is not already recorded. + + Returns True if success, False if this location already existed + + :type dataset_id: str or uuid.UUID + :type crs: CRS + :type extent: Geometry + :rtype bool: + """ + extent = self._sanitise_extent(extent, crs) + if extent is None: + return False + SpatialIndex = self._db.spatial_index(crs) # noqa: N806 + geom_alch = geom_alchemy(extent) + r = self._connection.execute( + insert( + SpatialIndex + ).values( + dataset_ref=dataset_id, + extent=geom_alch, + ).on_conflict_do_update( + index_elements=[SpatialIndex.dataset_ref], + set_=dict(extent=geom_alch) + ) + ) + return r.rowcount > 0 + + def spatial_extent(self, ids, crs): + SpatialIndex = self._db.spatial_index(crs) # noqa: N806 + if SpatialIndex is None: + return None + result = self._connection.execute( + select([ + func.ST_AsGeoJSON(func.ST_Union(SpatialIndex.extent)) + ]).select_from( + SpatialIndex + ).where( + SpatialIndex.dataset_ref.in_(ids) + ) + ) + for r in result: + extent_json = r[0] + if extent_json is None: + return None + return Geometry(json.loads(extent_json), crs=crs) + return None + def contains_dataset(self, dataset_id): return bool( self._connection.execute( - select( - [DATASET.c.id] - ).where( - DATASET.c.id == dataset_id + select(Dataset.id).where( + Dataset.id == dataset_id ) ).fetchone() ) @@ -276,12 +339,14 @@ def datasets_intersection(self, dataset_ids): """ Compute set intersection: db_dataset_ids & dataset_ids """ - return [r[0] - for r in self._connection.execute(select( - [DATASET.c.id] + return [ds.id for ds in self._connection.execute( + select( + Dataset.id ).where( - DATASET.c.id.in_(dataset_ids) - )).fetchall()] + Dataset.id.in_(dataset_ids) + ) + ).fetchall() + ] def get_datasets_for_location(self, uri, mode=None): scheme, body = _split_uri(uri) @@ -290,152 +355,111 @@ mode = 'exact' if body.count('#') > 0 else 'prefix' if mode == 'exact': - body_query = DATASET_LOCATION.c.uri_body == body + body_query = DatasetLocation.uri_body == body elif mode == 'prefix': - body_query = DATASET_LOCATION.c.uri_body.startswith(body) + body_query = DatasetLocation.uri_body.startswith(body) else: raise ValueError('Unsupported query mode {}'.format(mode)) return self._connection.execute( select( - _DATASET_SELECT_FIELDS - ).select_from( - DATASET_LOCATION.join(DATASET) + _dataset_select_fields() + ).join( + Dataset.locations ).where( - and_(DATASET_LOCATION.c.uri_scheme == scheme, body_query) + and_(DatasetLocation.uri_scheme == scheme, body_query) ) ).fetchall() def all_dataset_ids(self, archived: bool): - query = select( - DATASET.c.id # type: ignore[arg-type] - ).select_from( - DATASET - ) + query = select(Dataset.id) if archived: query = query.where( - DATASET.c.archived != None + Dataset.archived != None ) else: query = query.where( - DATASET.c.archived == None + Dataset.archived == None ) return self._connection.execute(query).fetchall() def insert_dataset_source(self, classifier, dataset_id, source_dataset_id): - try: - r = self._connection.execute( - insert(DATASET_SOURCE).on_conflict_do_nothing( - index_elements=['classifier', 'dataset_ref'] - ), + r = self._connection.execute( + insert(DatasetSource).on_conflict_do_nothing( + index_elements=['classifier', 'dataset_ref'] + ).values( classifier=classifier, dataset_ref=dataset_id, source_dataset_ref=source_dataset_id ) - return r.rowcount > 0 - except IntegrityError as e: - if e.orig.pgcode == PGCODE_FOREIGN_KEY_VIOLATION: - raise MissingRecordError("Referenced source dataset doesn't exist") - raise + ) + return r.rowcount > 0 def archive_dataset(self, dataset_id): - self._connection.execute( - DATASET.update().where( - DATASET.c.id == dataset_id + r = self._connection.execute( + update(Dataset).where( + Dataset.id == dataset_id ).where( - DATASET.c.archived == None + Dataset.archived == None ).values( archived=func.now() ) ) + return r.rowcount > 0 def restore_dataset(self, dataset_id): - self._connection.execute( - DATASET.update().where( - DATASET.c.id == dataset_id + r = self._connection.execute( + update(Dataset).where( + Dataset.id == dataset_id ).values( archived=None ) ) + return r.rowcount > 0 def delete_dataset(self, dataset_id): self._connection.execute( - DATASET_LOCATION.delete().where( - DATASET_LOCATION.c.dataset_ref == dataset_id + delete(DatasetLocation).where( + DatasetLocation.dataset_ref == dataset_id ) ) self._connection.execute( - DATASET_SOURCE.delete().where( - DATASET_SOURCE.c.dataset_ref == dataset_id + delete(DatasetSource).where( + DatasetSource.dataset_ref == dataset_id ) ) - self._connection.execute( - DATASET.delete().where( - DATASET.c.id == dataset_id + for crs in self._db.spatial_indexes(): + SpatialIndex = self._db.spatial_index(crs) # noqa: N806 + self._connection.execute( + delete( + SpatialIndex + ).where( + SpatialIndex.dataset_ref == dataset_id + ) + ) + + r = self._connection.execute( + delete(Dataset).where( + Dataset.id == dataset_id ) ) + return r.rowcount > 0 def get_dataset(self, dataset_id): return self._connection.execute( - select(_DATASET_SELECT_FIELDS).where(DATASET.c.id == dataset_id) + select(_dataset_select_fields()).where(Dataset.id == dataset_id) ).first() def get_datasets(self, dataset_ids): return self._connection.execute( - select(_DATASET_SELECT_FIELDS).where(DATASET.c.id.in_(dataset_ids)) + select(_dataset_select_fields()).where(Dataset.id.in_(dataset_ids)) ).fetchall() def get_derived_datasets(self, dataset_id): - return self._connection.execute( - select( - _DATASET_SELECT_FIELDS - ).select_from( - DATASET.join(DATASET_SOURCE, DATASET.c.id == DATASET_SOURCE.c.dataset_ref) - ).where( - DATASET_SOURCE.c.source_dataset_ref == dataset_id - ) - ).fetchall() + raise NotImplementedError def get_dataset_sources(self, dataset_id): - # recursively build the list of (dataset_ref, source_dataset_ref) pairs starting from dataset_id - # include (dataset_ref, NULL) [hence the left join] - sources = select( - [DATASET.c.id.label('dataset_ref'), - DATASET_SOURCE.c.source_dataset_ref, - DATASET_SOURCE.c.classifier] - ).select_from( - DATASET.join(DATASET_SOURCE, - DATASET.c.id == DATASET_SOURCE.c.dataset_ref, - isouter=True) - ).where( - DATASET.c.id == dataset_id - ).cte(name="sources", recursive=True) - - sources = sources.union_all( - select( - [sources.c.source_dataset_ref.label('dataset_ref'), - DATASET_SOURCE.c.source_dataset_ref, - DATASET_SOURCE.c.classifier] - ).select_from( - sources.join(DATASET_SOURCE, - sources.c.source_dataset_ref == DATASET_SOURCE.c.dataset_ref, - isouter=True) - ).where(sources.c.source_dataset_ref != None)) - - # turn the list of pairs into adjacency list (dataset_ref, [source_dataset_ref, ...]) - # some source_dataset_ref's will be NULL - aggd = select( - [sources.c.dataset_ref, - func.array_agg(sources.c.source_dataset_ref).label('sources'), - func.array_agg(sources.c.classifier).label('classes')] - ).group_by(sources.c.dataset_ref).alias('aggd') - - # join the adjacency list with datasets table - query = select( - _DATASET_SELECT_FIELDS + (aggd.c.sources, aggd.c.classes) - ).select_from(aggd.join(DATASET, DATASET.c.id == aggd.c.dataset_ref)) - - return self._connection.execute(query).fetchall() + raise NotImplementedError def search_datasets_by_metadata(self, metadata): """ @@ -446,7 +470,7 @@ """ # Find any storage types whose 'dataset_metadata' document is a subset of the metadata. return self._connection.execute( - select(_DATASET_SELECT_FIELDS).where(DATASET.c.metadata.contains(metadata)) + select(_dataset_select_fields()).where(Dataset.metadata_doc.contains(metadata)) ).fetchall() @staticmethod @@ -458,17 +482,22 @@ return [raw_expr(expression) for expression in expressions] - @staticmethod - def search_datasets_query(expressions, source_exprs=None, - select_fields=None, with_source_ids=False, limit=None): + def search_datasets_query(self, + expressions, source_exprs=None, + select_fields=None, with_source_ids=False, + limit=None, geom=None): """ :type expressions: Tuple[Expression] :type source_exprs: Tuple[Expression] :type select_fields: Iterable[PgField] :type with_source_ids: bool :type limit: int + :type geom: Geometry :rtype: sqlalchemy.Expression """ + # TODO: lineage handling and source search + assert source_exprs is None + assert not with_source_ids if select_fields: select_columns = tuple( @@ -476,91 +505,52 @@ for f in select_fields ) else: - select_columns = _DATASET_SELECT_FIELDS + select_columns = _dataset_select_fields() - if with_source_ids: - # Include the IDs of source datasets - select_columns += ( - select( - (func.array_agg(DATASET_SOURCE.c.source_dataset_ref),) - ).select_from( - DATASET_SOURCE - ).where( - DATASET_SOURCE.c.dataset_ref == DATASET.c.id - ).group_by( - DATASET_SOURCE.c.dataset_ref - ).label('dataset_refs'), - ) + if geom: + # Check geom CRS - do we have a spatial index for this CRS? + # Yes? Use it! + # No? Convert to 4326 which we should always have a spatial index for by default + if not geom.crs: + raise ValueError("Search geometry must have a CRS") + SpatialIndex = self._db.spatial_index(geom.crs) # noqa: N806 + if SpatialIndex is None: + _LOG.info("No spatial index for crs %s - converting to 4326", geom.crs) + default_crs = CRS("EPSG:4326") + geom = geom.to_crs(default_crs) + SpatialIndex = self._db.spatial_index(default_crs) # noqa: N806 + geom_sql = geom_alchemy(geom) + _LOG.info("query geometry = %s (%s)", geom.json, geom.crs) + spatialquery = func.ST_Intersects(SpatialIndex.extent, geom_sql) + else: + spatialquery = None + SpatialIndex = None # noqa: N806 raw_expressions = PostgisDbAPI._alchemify_expressions(expressions) - from_expression = PostgisDbAPI._from_expression(DATASET, expressions, select_fields) - where_expr = and_(DATASET.c.archived == None, *raw_expressions) - - if not source_exprs: - return ( - select( - select_columns - ).select_from( - from_expression - ).where( - where_expr - ).limit( - limit - ) - ) - base_query = ( - select( - select_columns + (DATASET_SOURCE.c.source_dataset_ref, - literal(1).label('distance'), - DATASET_SOURCE.c.classifier.label('path')) - ).select_from( - from_expression.join(DATASET_SOURCE, DATASET.c.id == DATASET_SOURCE.c.dataset_ref) - ).where( - where_expr - ) - ).cte(name="base_query", recursive=True) - - recursive_query = base_query.union_all( - select( - [col for col in base_query.columns - if col.name not in ['source_dataset_ref', 'distance', 'path'] - ] + [ - DATASET_SOURCE.c.source_dataset_ref, - (base_query.c.distance + 1).label('distance'), - (base_query.c.path + '.' + DATASET_SOURCE.c.classifier).label('path') - ] - ).select_from( - base_query.join( - DATASET_SOURCE, base_query.c.source_dataset_ref == DATASET_SOURCE.c.dataset_ref - ) - ) - ) - - return ( - select( - [distinct(recursive_query.c.id) - ] + [ - col for col in recursive_query.columns - if col.name not in ['id', 'source_dataset_ref', 'distance', 'path']] - ).select_from( - recursive_query.join(DATASET, DATASET.c.id == recursive_query.c.source_dataset_ref) - ).where( - and_(DATASET.c.archived == None, *PostgisDbAPI._alchemify_expressions(source_exprs)) - ).limit( - limit - ) - ) + join_tables = PostgisDbAPI._join_tables(Dataset, expressions, select_fields) + where_expr = and_(Dataset.archived == None, *raw_expressions) + query = select(select_columns).select_from(Dataset) + for join in join_tables: + query = query.join(join) + if spatialquery is not None: + where_expr = and_(where_expr, spatialquery) + query = query.join(SpatialIndex) + query = query.where(where_expr).limit(limit) + return query def search_datasets(self, expressions, source_exprs=None, select_fields=None, - with_source_ids=False, limit=None): + with_source_ids=False, limit=None, + geom=None): """ :type with_source_ids: bool :type select_fields: tuple[datacube.drivers.postgis._fields.PgField] :type expressions: tuple[datacube.drivers.postgis._fields.PgExpression] """ select_query = self.search_datasets_query(expressions, source_exprs, - select_fields, with_source_ids, limit) + select_fields, with_source_ids, + limit, geom=geom) + _LOG.debug("search_datasets SQL: %s", str(select_query)) return self._connection.execute(select_query) @staticmethod @@ -574,59 +564,8 @@ in dataset_location per dataset basis if required. It returns the construted query. """ - - # expressions involving DATASET_SOURCE cannot not done for now - for expression in expressions: - assert expression.field.required_alchemy_table != DATASET_SOURCE, \ - 'Joins with dataset_source cannot be done for this query' - - # expressions involving 'uri' and 'uris' will be handled different - expressions = [expression for expression in expressions - if expression.field.required_alchemy_table != DATASET_LOCATION] - - if select_fields: - select_columns = [] - for field in select_fields: - if field.name in {'uri', 'uris'}: - # All active URIs, from newest to oldest - uris_field = func.array( - select([ - _dataset_uri_field(SELECTED_DATASET_LOCATION) - ]).where( - and_( - SELECTED_DATASET_LOCATION.c.dataset_ref == DATASET.c.id, - SELECTED_DATASET_LOCATION.c.archived == None - ) - ).order_by( - SELECTED_DATASET_LOCATION.c.added.desc(), - SELECTED_DATASET_LOCATION.c.id.desc() - ).label('uris') - ).label('uris') - select_columns.append(uris_field) - else: - select_columns.append(field.alchemy_expression.label(field.name)) - else: - select_columns = _DATASET_SELECT_FIELDS - - raw_expressions = PostgisDbAPI._alchemify_expressions(expressions) - - # We don't need 'DATASET_LOCATION table in the from expression - select_fields_ = [field for field in select_fields if field.name not in {'uri', 'uris'}] - - from_expression = PostgisDbAPI._from_expression(DATASET, expressions, select_fields_) - where_expr = and_(DATASET.c.archived == None, *raw_expressions) - - return ( - select( - select_columns - ).select_from( - from_expression - ).where( - where_expr - ).limit( - limit - ) - ) + # TODO + raise NotImplementedError() def search_unique_datasets(self, expressions, select_fields=None, limit=None): """ @@ -643,21 +582,25 @@ return self._connection.execute(select_query) def get_duplicates(self, match_fields, expressions): + # TODO # type: (Tuple[PgField], Tuple[PgExpression]) -> Iterable[tuple] group_expressions = tuple(f.alchemy_expression for f in match_fields) + join_tables = PostgisDbAPI._join_tables(Dataset, expressions, match_fields) + + query = select( + (func.array_agg(Dataset.id),) + group_expressions + ).select_from(Dataset) + for join in join_tables: + query = query.join(join) - select_query = select( - (func.array_agg(DATASET.c.id),) + group_expressions - ).select_from( - PostgisDbAPI._from_expression(DATASET, expressions, match_fields) - ).where( - and_(DATASET.c.archived == None, *(PostgisDbAPI._alchemify_expressions(expressions))) + query = query.where( + and_(Dataset.archived == None, *(PostgisDbAPI._alchemify_expressions(expressions))) ).group_by( *group_expressions ).having( - func.count(DATASET.c.id) > 1 + func.count(Dataset.id) > 1 ) - return self._connection.execute(select_query) + return self._connection.execute(query) def count_datasets(self, expressions): """ @@ -669,14 +612,13 @@ select_query = ( select( - [func.count('*')] - ).select_from( - self._from_expression(DATASET, expressions) + func.count(Dataset.id) + ).where( + Dataset.archived == None ).where( - and_(DATASET.c.archived == None, *raw_expressions) + *raw_expressions ) ) - return self._connection.scalar(select_query) def count_datasets_through_time(self, start, end, period, time_field, expressions): @@ -721,57 +663,125 @@ ) ).alias('time_ranges') - count_query = ( - select( - (func.count('*'),) - ).select_from( - self._from_expression(DATASET, expressions) - ).where( - and_( - time_field.alchemy_expression.overlaps(time_ranges.c.time_period), - DATASET.c.archived == None, - *raw_expressions - ) + count_query = select(func.count('*')) + join_tables = self._join_tables(Dataset, expressions) + for join in join_tables: + count_query = count_query.join(join) + count_query = count_query.where( + and_( + time_field.alchemy_expression.overlaps(time_ranges.c.time_period), + Dataset.archived == None, + *raw_expressions ) ) return select((time_ranges.c.time_period, count_query.label('dataset_count'))) + def update_spindex(self, crs_seq: Sequence[CRS] = [], + product_names: Sequence[str] = [], + dsids: Sequence[str] = []) -> int: + """ + Update a spatial index + :param crs: CRSs for Spatial Indexes to update. Default=all indexes + :param product_names: Product names to update + :param dsids: Dataset IDs to update + + if neither product_names nor dataset ids are supplied, update for all datasets. + + if both are supplied, both the named products and identified datasets are updated. + + :return: Number of spatial index entries updated or verified as unindexed. + """ + verified = 0 + if crs_seq: + crses = [crs for crs in crs_seq] + else: + crses = self._db.spatial_indexes() + + # Update implementation. + # Design will change, but this method should be fairly low level to be as efficient as possible + query = select( + Dataset.id, + Dataset.metadata_doc["grid_spatial"]["projection"] + ).select_from(Dataset) + if product_names: + query = query.join(Product) + if product_names and dsids: + query = query.where( + or_( + Product.name.in_(product_names), + Dataset.id.in_(dsids) + ) + ) + elif product_names: + query = query.where( + Product.name.in_(product_names) + ) + elif dsids: + query = query.where( + Dataset.id.in_(dsids) + ) + + def xytuple(o): + return (o['x'], o['y']) + + for result in self._connection.execute(query): + dsid = result[0] + native_crs = CRS(result[1]["spatial_reference"]) + geom = None + valid_data = result[1].get('valid_data') + if valid_data: + geom = geometry.Geometry(valid_data, crs=native_crs) + else: + geo_ref_points = result[1].get('geo_ref_points') + if geo_ref_points: + geom = geometry.polygon( + [xytuple(geo_ref_points[key]) for key in ('ll', 'ul', 'ur', 'lr', 'll')], + crs=native_crs + ) + if not geom: + verified += 1 + continue + for crs in crses: + self.insert_dataset_spatial(dsid, crs, geom) + verified += 1 + + return verified + @staticmethod - def _from_expression(source_table, expressions=None, fields=None): + def _join_tables(source_table, expressions=None, fields=None): join_tables = set() if expressions: join_tables.update(expression.field.required_alchemy_table for expression in expressions) if fields: join_tables.update(field.required_alchemy_table for field in fields) - join_tables.discard(source_table) - - table_order_hack = [DATASET_SOURCE, DATASET_LOCATION, DATASET, PRODUCT, METADATA_TYPE] - - from_expression = source_table - for table in table_order_hack: - if table in join_tables: - from_expression = from_expression.join(table) - return from_expression + join_tables.discard(source_table.__table__) + # TODO: Current architecture must sort-hack. Better join awareness required at field level. + sort_order_hack = [DatasetLocation, Dataset, Product, MetadataType] + return [ + orm_table + for orm_table in sort_order_hack + if orm_table.__table__ in join_tables + ] def get_product(self, id_): return self._connection.execute( - PRODUCT.select().where(PRODUCT.c.id == id_) + select(Product).where(Product.id == id_) ).first() def get_metadata_type(self, id_): return self._connection.execute( - METADATA_TYPE.select().where(METADATA_TYPE.c.id == id_) + select(MetadataType).where(MetadataType.id == id_) ).first() def get_product_by_name(self, name): return self._connection.execute( - PRODUCT.select().where(PRODUCT.c.name == name) + select(Product).where(Product.name == name) ).first() def get_metadata_type_by_name(self, name): return self._connection.execute( - METADATA_TYPE.select().where(METADATA_TYPE.c.name == name) + select(MetadataType).where(MetadataType.name == name) ).first() def insert_product(self, @@ -783,7 +793,7 @@ concurrently=True): res = self._connection.execute( - PRODUCT.insert().values( + insert(Product).values( name=name, metadata=metadata, metadata_type_ref=metadata_type_id, @@ -794,6 +804,7 @@ type_id = res.inserted_primary_key[0] # Initialise search fields. + # TODO: Isn't definition['metadata'] the same as metadata? self._setup_product_fields(type_id, name, search_fields, definition['metadata'], concurrently=concurrently) return type_id @@ -803,39 +814,42 @@ metadata, metadata_type_id, search_fields, - definition, update_metadata_type=False, concurrently=False): + definition, + update_metadata_type=False, concurrently=False): + # TODO: Isn't definition['metadata'] the same as metadata? res = self._connection.execute( - PRODUCT.update().returning(PRODUCT.c.id).where( - PRODUCT.c.name == name + update(Product).returning(Product.id).where( + Product.name == name ).values( metadata=metadata, metadata_type_ref=metadata_type_id, definition=definition ) ) - type_id = res.first()[0] + prod_id = res.first()[0] if update_metadata_type: if not self._connection.in_transaction(): raise RuntimeError('Must update metadata types in transaction') self._connection.execute( - DATASET.update().where( - DATASET.c.dataset_type_ref == type_id + update(Dataset).where( + Dataset.product_ref == prod_id ).values( metadata_type_ref=metadata_type_id, ) ) # Initialise search fields. - self._setup_product_fields(type_id, name, search_fields, definition['metadata'], + # TODO: Isn't definition['metadata'] the same as metadata? + self._setup_product_fields(prod_id, name, search_fields, definition['metadata'], concurrently=concurrently, rebuild_view=True) - return type_id + return prod_id def insert_metadata_type(self, name, definition, concurrently=False): res = self._connection.execute( - METADATA_TYPE.insert().values( + insert(MetadataType).values( name=name, definition=definition ) @@ -849,8 +863,8 @@ def update_metadata_type(self, name, definition, concurrently=False): res = self._connection.execute( - METADATA_TYPE.update().returning(METADATA_TYPE.c.id).where( - METADATA_TYPE.c.name == name + update(MetadataType).returning(MetadataType.id).where( + MetadataType.name == name ).values( name=name, definition=definition @@ -886,15 +900,6 @@ def _setup_metadata_type_fields(self, id_, name, fields, rebuild_indexes=False, rebuild_views=False, concurrently=True): - # Metadata fields are no longer used (all queries are per-dataset-type): exclude all. - # This will have the effect of removing any old indexes that still exist. - exclude_fields = tuple(fields) - - dataset_filter = and_(DATASET.c.archived == None, DATASET.c.metadata_type_ref == id_) - dynamic.check_dynamic_fields(self._connection, concurrently, dataset_filter, - exclude_fields, fields, name, - rebuild_indexes=rebuild_indexes, rebuild_view=rebuild_views) - for product in self._get_products_for_metadata_type(id_): self._setup_product_fields( product['id'], @@ -908,7 +913,7 @@ def _setup_product_fields(self, id_, name, fields, metadata_doc, rebuild_indexes=False, rebuild_view=False, concurrently=True): - dataset_filter = and_(DATASET.c.archived == None, DATASET.c.dataset_type_ref == id_) + dataset_filter = and_(Dataset.archived == None, Dataset.product_ref == id_) excluded_field_names = tuple(self._get_active_field_names(fields, metadata_doc)) dynamic.check_dynamic_fields(self._connection, concurrently, dataset_filter, @@ -928,32 +933,33 @@ def get_all_products(self): return self._connection.execute( - PRODUCT.select().order_by(PRODUCT.c.name.asc()) + select(Product).order_by(Product.name.asc()) ).fetchall() def _get_products_for_metadata_type(self, id_): return self._connection.execute( - PRODUCT.select( - ).where( - PRODUCT.c.metadata_type_ref == id_ + select(Product).where( + Product.metadata_type_ref == id_ ).order_by( - PRODUCT.c.name.asc() + Product.name.asc() )).fetchall() def get_all_metadata_types(self): - return self._connection.execute(METADATA_TYPE.select().order_by(METADATA_TYPE.c.name.asc())).fetchall() + return self._connection.execute(select(MetadataType).order_by(MetadataType.name.asc())).fetchall() def get_locations(self, dataset_id): return [ record[0] for record in self._connection.execute( - select([ - _dataset_uri_field(DATASET_LOCATION) - ]).where( - and_(DATASET_LOCATION.c.dataset_ref == dataset_id, DATASET_LOCATION.c.archived == None) + select( + DatasetLocation.uri + ).where( + DatasetLocation.dataset_ref == dataset_id + ).where( + DatasetLocation.archived == None ).order_by( - DATASET_LOCATION.c.added.desc(), - DATASET_LOCATION.c.id.desc() + DatasetLocation.added.desc(), + DatasetLocation.id.desc() ) ).fetchall() ] @@ -965,12 +971,14 @@ return [ (location_uri, archived_time) for location_uri, archived_time in self._connection.execute( - select([ - _dataset_uri_field(DATASET_LOCATION), DATASET_LOCATION.c.archived - ]).where( - and_(DATASET_LOCATION.c.dataset_ref == dataset_id, DATASET_LOCATION.c.archived != None) + select( + DatasetLocation.uri, DatasetLocation.archived + ).where( + DatasetLocation.dataset_ref == dataset_id + ).where( + DatasetLocation.archived != None ).order_by( - DATASET_LOCATION.c.added.desc() + DatasetLocation.added.desc() ) ).fetchall() ] @@ -983,12 +991,12 @@ """ scheme, body = _split_uri(uri) res = self._connection.execute( - delete(DATASET_LOCATION).where( - and_( - DATASET_LOCATION.c.dataset_ref == dataset_id, - DATASET_LOCATION.c.uri_scheme == scheme, - DATASET_LOCATION.c.uri_body == body, - ) + delete(DatasetLocation).where( + DatasetLocation.dataset_ref == dataset_id + ).where( + DatasetLocation.uri_scheme == scheme + ).where( + DatasetLocation.uri_body == body ) ) return res.rowcount > 0 @@ -996,13 +1004,14 @@ def archive_location(self, dataset_id, uri): scheme, body = _split_uri(uri) res = self._connection.execute( - DATASET_LOCATION.update().where( - and_( - DATASET_LOCATION.c.dataset_ref == dataset_id, - DATASET_LOCATION.c.uri_scheme == scheme, - DATASET_LOCATION.c.uri_body == body, - DATASET_LOCATION.c.archived == None, - ) + update(DatasetLocation).where( + DatasetLocation.dataset_ref == dataset_id + ).where( + DatasetLocation.uri_scheme == scheme + ).where( + DatasetLocation.uri_body == body + ).where( + DatasetLocation.archived == None ).values( archived=func.now() ) @@ -1012,13 +1021,14 @@ def restore_location(self, dataset_id, uri): scheme, body = _split_uri(uri) res = self._connection.execute( - DATASET_LOCATION.update().where( - and_( - DATASET_LOCATION.c.dataset_ref == dataset_id, - DATASET_LOCATION.c.uri_scheme == scheme, - DATASET_LOCATION.c.uri_body == body, - DATASET_LOCATION.c.archived != None, - ) + update(DatasetLocation).where( + DatasetLocation.dataset_ref == dataset_id + ).where( + DatasetLocation.uri_scheme == scheme + ).where( + DatasetLocation.uri_body == body + ).where( + DatasetLocation.archived != None ).values( archived=None ) @@ -1037,7 +1047,7 @@ from pg_roles group_role inner join pg_auth_members am on am.roleid = group_role.oid inner join pg_roles user_role on am.member = user_role.oid - where (group_role.rolname like 'agdc_%%') and not (user_role.rolname like 'agdc_%%') + where (group_role.rolname like 'odc_%%') and not (user_role.rolname like 'odc_%%') order by group_role.oid asc, user_role.oid asc; """) for row in result: diff -Nru datacube-1.8.7/datacube/drivers/postgis/_connections.py datacube-1.8.9/datacube/drivers/postgis/_connections.py --- datacube-1.8.7/datacube/drivers/postgis/_connections.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/_connections.py 2022-11-17 00:47:28.000000000 +0000 @@ -17,18 +17,21 @@ import os import re from contextlib import contextmanager -from typing import Callable, Optional, Union +from typing import Any, Callable, Iterable, Mapping, Optional, Union, Type -from sqlalchemy import event, create_engine, text +from sqlalchemy import event, create_engine from sqlalchemy.engine import Engine -from sqlalchemy.engine.url import URL as EngineUrl +from sqlalchemy.engine.url import URL as EngineUrl # noqa: N811 import datacube from datacube.index.exceptions import IndexSetupError from datacube.utils import jsonify_document +from datacube.utils.geometry import CRS from . import _api from . import _core +from ._spatial import ensure_spindex, spindexes, spindex_for_crs +from ._schema import SpatialIndex _LIB_ID = 'odc-' + str(datacube.__version__) @@ -67,6 +70,7 @@ # We don't recommend using this constructor directly as it may change. # Use static methods PostGisDb.create() or PostGisDb.from_config() self._engine = engine + self._spindexes: Optional[Mapping[CRS, Any]] = None @classmethod def from_config(cls, config, application_name=None, validate_connection=True): @@ -206,8 +210,42 @@ return is_new + def _refresh_spindexes(self): + self._spindexes = spindexes(self._engine) + + @property + def spindexes(self) -> Mapping[CRS, Type[SpatialIndex]]: + if self._spindexes is None: + self._refresh_spindexes() + return self._spindexes + + def create_spatial_index(self, crs: "datacube.utils.geometry.CRS") -> Optional[Type[SpatialIndex]]: + """ + Create a spatial index across the database, for the named CRS. + + :param crs_str: + :return: + """ + spidx = self.spindexes.get(crs) + if spidx is None: + spidx = spindex_for_crs(crs) + if spidx is None: + _LOG.warning("Could not dynamically model an index for CRS %s", crs._str) + return None + ensure_spindex(self._engine, spidx) + self.spindexes[crs] = spidx + return spidx + + def spatial_index(self, crs: CRS) -> Optional[Type[SpatialIndex]]: + return self.spindexes.get(crs) + + def spatial_indexes(self, refresh=False) -> Iterable[CRS]: + if refresh: + self._refresh_spindexes() + return list(self.spindexes.keys()) + @contextmanager - def connect(self): + def _connect(self): """ Borrow a connection from the pool. @@ -220,35 +258,12 @@ The connection can raise errors if not following this advice ("server closed the connection unexpectedly"), as some servers will aggressively close idle connections (eg. DEA's NCI servers). It also prevents the connection from being reused while borrowed. - """ - with self._engine.connect() as connection: - yield _api.PostgisDbAPI(connection) - connection.close() - - @contextmanager - def begin(self): - """ - Start a transaction. - - Returns an instance that will maintain a single connection in a transaction. - - Call commit() or rollback() to complete the transaction or use a context manager: - - with db.begin() as trans: - trans.insert_dataset(...) - - (Don't share an instance between threads) - :rtype: PostgresDBAPI + Low level context manager, use ._db_connection instead """ with self._engine.connect() as connection: - connection.execute(text('BEGIN')) try: - yield _api.PostgisDbAPI(connection) - connection.execute(text('COMMIT')) - except Exception: # pylint: disable=broad-except - connection.execute(text('ROLLBACK')) - raise + yield _api.PostgisDbAPI(self, connection) finally: connection.close() diff -Nru datacube-1.8.7/datacube/drivers/postgis/_core.py datacube-1.8.9/datacube/drivers/postgis/_core.py --- datacube-1.8.7/datacube/drivers/postgis/_core.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/_core.py 2022-11-17 00:47:28.000000000 +0000 @@ -9,18 +9,16 @@ import logging from datacube.drivers.postgis.sql import (INSTALL_TRIGGER_SQL_TEMPLATE, - SCHEMA_NAME, TYPES_INIT_SQL, - UPDATE_COLUMN_MIGRATE_SQL_TEMPLATE, - ADDED_COLUMN_MIGRATE_SQL_TEMPLATE, - UPDATE_TIMESTAMP_SQL, - escape_pg_identifier, - pg_column_exists, pg_exists) + SCHEMA_NAME, TYPES_INIT_SQL, + UPDATE_COLUMN_MIGRATE_SQL_TEMPLATE, + UPDATE_TIMESTAMP_SQL, + escape_pg_identifier) from sqlalchemy import MetaData from sqlalchemy.engine import Engine from sqlalchemy.schema import CreateSchema -USER_ROLES = ('agdc_user', 'agdc_ingest', 'agdc_manage', 'agdc_admin') +USER_ROLES = ('odc_user', 'odc_ingest', 'odc_manage', 'odc_admin') SQL_NAMING_CONVENTIONS = { "ix": 'ix_%(column_0_label)s', @@ -40,10 +38,10 @@ def install_timestamp_trigger(connection): from . import _schema - TABLE_NAMES = [ - _schema.METADATA_TYPE.name, - _schema.PRODUCT.name, - _schema.DATASET.name, + TABLE_NAMES = [ # noqa: N806 + _schema.MetadataType.__tablename__, + _schema.Product.__tablename__, + _schema.Dataset.__tablename__, ] # Create trigger capture function connection.execute(UPDATE_TIMESTAMP_SQL) @@ -53,11 +51,6 @@ connection.execute(UPDATE_COLUMN_MIGRATE_SQL_TEMPLATE.format(schema=SCHEMA_NAME, table=name)) connection.execute(INSTALL_TRIGGER_SQL_TEMPLATE.format(schema=SCHEMA_NAME, table=name)) -def install_added_column(connection): - from . import _schema - TABLE_NAME = _schema.DATASET_LOCATION.name - connection.execute(ADDED_COLUMN_MIGRATE_SQL_TEMPLATE.format(schema=SCHEMA_NAME, table=TABLE_NAME)) - def schema_qualified(name): """ @@ -85,15 +78,17 @@ quoted_db_name, quoted_user = _get_quoted_connection_info(c) + _ensure_extension(c, 'POSTGIS') + if with_permissions: _LOG.info('Ensuring user roles.') - _ensure_role(c, 'agdc_user') - _ensure_role(c, 'agdc_ingest', inherits_from='agdc_user') - _ensure_role(c, 'agdc_manage', inherits_from='agdc_ingest') - _ensure_role(c, 'agdc_admin', inherits_from='agdc_manage', add_user=True) + _ensure_role(c, 'odc_user') + _ensure_role(c, 'odc_ingest', inherits_from='odc_user') + _ensure_role(c, 'odc_manage', inherits_from='odc_ingest') + _ensure_role(c, 'odc_admin', inherits_from='odc_manage', add_user=True) c.execute(""" - grant all on database {db} to agdc_admin; + grant all on database {db} to odc_admin; """.format(db=quoted_db_name)) if not has_schema(engine, c): @@ -101,19 +96,20 @@ try: c.execute('begin') if with_permissions: - # Switch to 'agdc_admin', so that all items are owned by them. - c.execute('set role agdc_admin') + # Switch to 'odc_admin', so that all items are owned by them. + c.execute('set role odc_admin') _LOG.info('Creating schema.') c.execute(CreateSchema(SCHEMA_NAME)) _LOG.info('Creating tables.') c.execute(TYPES_INIT_SQL) - METADATA.create_all(c) + from ._schema import orm_registry, ALL_STATIC_TABLES + _LOG.info("Dataset indexes: %s", repr(orm_registry.metadata.tables["odc.dataset"].indexes)) + orm_registry.metadata.create_all(c, tables=ALL_STATIC_TABLES) _LOG.info("Creating triggers.") install_timestamp_trigger(c) - _LOG.info("Creating added column.") - install_added_column(c) c.execute('commit') - except: + except: # noqa: E722 + _LOG.error("Unhandled SQLAlchemy error.") c.execute('rollback') raise finally: @@ -123,20 +119,20 @@ if with_permissions: _LOG.info('Adding role grants.') c.execute(""" - grant usage on schema {schema} to agdc_user; - grant select on all tables in schema {schema} to agdc_user; - grant execute on function {schema}.common_timestamp(text) to agdc_user; + grant usage on schema {schema} to odc_user; + grant select on all tables in schema {schema} to odc_user; + grant execute on function {schema}.common_timestamp(text) to odc_user; grant insert on {schema}.dataset, - {schema}.dataset_location, - {schema}.dataset_source to agdc_ingest; - grant usage, select on all sequences in schema {schema} to agdc_ingest; + {schema}.location, + {schema}.dataset_lineage to odc_ingest; + grant usage, select on all sequences in schema {schema} to odc_ingest; -- (We're only granting deletion of types that have nothing written yet: they can't delete the data itself) - grant insert, delete on {schema}.dataset_type, - {schema}.metadata_type to agdc_manage; + grant insert, delete on {schema}.product, + {schema}.metadata_type to odc_manage; -- Allow creation of indexes, views - grant create on schema {schema} to agdc_manage; + grant create on schema {schema} to odc_manage; """.format(schema=SCHEMA_NAME)) c.close() @@ -184,24 +180,12 @@ See the `schema_is_latest()` function above: this should apply updates that it requires. """ - # This will typically check if something exists (like a newly added column), and - # run the SQL of the change inside a single transaction. + # TODO: implement migrations + - # Empty, as no schema changes have been made recently. - # -> If you need to write one, look at the Git history of this - # function for some examples. - - # Post 1.8 DB Incremental Sync triggers - if not pg_column_exists(engine, schema_qualified('dataset'), 'updated'): - _LOG.info("Adding 'updated'/'added' fields and triggers to schema.") - c = engine.connect() - c.execute('begin') - install_timestamp_trigger(c) - install_added_column(c) - c.execute('commit') - c.close() - else: - _LOG.info("No schema updates required.") +def _ensure_extension(engine, extension_name="POSTGIS"): + sql = f'create extension if not exists {extension_name}' + engine.execute(sql) def _ensure_role(engine, name, inherits_from=None, add_user=False, create_db=False): @@ -251,13 +235,13 @@ Why are we even doing this? Can't we use the same names internally and externally? >>> to_pg_role('ingest') - 'agdc_ingest' + 'odc_ingest' >>> to_pg_role('fake') Traceback (most recent call last): ... ValueError: Unknown role 'fake'. Expected one of ... """ - pg_role = 'agdc_' + role.lower() + pg_role = 'odc_' + role.lower() if pg_role not in USER_ROLES: raise ValueError( 'Unknown role %r. Expected one of %r' % @@ -270,7 +254,7 @@ """ Convert a PostgreSQL role name back to an ODC name. - >>> from_pg_role('agdc_admin') + >>> from_pg_role('odc_admin') 'admin' >>> from_pg_role('fake') Traceback (most recent call last): diff -Nru datacube-1.8.7/datacube/drivers/postgis/_dynamic.py datacube-1.8.9/datacube/drivers/postgis/_dynamic.py --- datacube-1.8.7/datacube/drivers/postgis/_dynamic.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/_dynamic.py 2022-11-17 00:47:28.000000000 +0000 @@ -12,11 +12,12 @@ from sqlalchemy import select from ._core import schema_qualified -from ._schema import DATASET, PRODUCT, METADATA_TYPE from .sql import pg_exists, CreateView _LOG = logging.getLogger(__name__) +# TODO: Dynamic indexes currently disable in postgis driver. + def contains_all(d_, *keys): """ @@ -36,6 +37,11 @@ """ Ensure a view exists for the given fields """ + view_name = schema_qualified('dv_{}_dataset'.format(name)) + _LOG.warning("Skipping dynamic Index %s ", view_name) + + +def _old_ensure_view(conn, fields, name, replace_existing, where_expression): # Create a view of search fields (for debugging convenience). # 'dv_' prefix: dynamic view. To distinguish from views that are created as part of the schema itself. view_name = schema_qualified('dv_{}_dataset'.format(name)) @@ -47,14 +53,18 @@ exists = False if not exists: _LOG.debug('Creating view: %s', view_name) + # old_query = select( + # [field.alchemy_expression.label(field.name) for field in fields.values() + # if not field.affects_row_selection] + # ).select_from( + # Dataset.join(Product).join(MetadataType) + # ).where(where_expression) conn.execute( CreateView( view_name, select( - [field.alchemy_expression.label(field.name) for field in fields.values() + [field.alchemy_expression for field in fields.values() if not field.affects_row_selection] - ).select_from( - DATASET.join(PRODUCT).join(METADATA_TYPE) ).where(where_expression) ) ) @@ -71,7 +81,12 @@ """ Check that we have expected indexes and views for the given fields """ + _LOG.warning("Skipping all dynamic indexes for %s", name) + return + +def old_check_dynamic_fields(conn, concurrently, dataset_filter, excluded_field_names, fields, name, + rebuild_indexes=False, rebuild_view=False): # If this type has time/space fields, create composite indexes (as they are often searched together) # We will probably move these into product configuration in the future. composite_indexes = ( diff -Nru datacube-1.8.7/datacube/drivers/postgis/_fields.py datacube-1.8.9/datacube/drivers/postgis/_fields.py --- datacube-1.8.7/datacube/drivers/postgis/_fields.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/_fields.py 2022-11-17 00:47:28.000000000 +0000 @@ -491,7 +491,7 @@ @property def alchemy_expression(self): return self.field.alchemy_expression.overlaps( - self._range_class(self.low_value, self.high_value) + self._range_class(self.low_value, self.high_value, bounds='[]') ) diff -Nru datacube-1.8.7/datacube/drivers/postgis/__init__.py datacube-1.8.9/datacube/drivers/postgis/__init__.py --- datacube-1.8.7/datacube/drivers/postgis/__init__.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/__init__.py 2022-11-17 00:47:28.000000000 +0000 @@ -9,5 +9,6 @@ """ from ._connections import PostGisDb +from ._api import PostgisDbAPI -__all__ = ['PostGisDb'] +__all__ = ['PostGisDb', 'PostgisDbAPI'] diff -Nru datacube-1.8.7/datacube/drivers/postgis/_schema.py datacube-1.8.9/datacube/drivers/postgis/_schema.py --- datacube-1.8.7/datacube/drivers/postgis/_schema.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/_schema.py 2022-11-17 00:47:28.000000000 +0000 @@ -7,9 +7,11 @@ """ import logging +from typing import Type -from sqlalchemy import ForeignKey, UniqueConstraint, PrimaryKeyConstraint, CheckConstraint, SmallInteger -from sqlalchemy import Table, Column, Integer, String, DateTime +from sqlalchemy.orm import aliased, registry, relationship, column_property +from sqlalchemy import ForeignKey, UniqueConstraint, PrimaryKeyConstraint, CheckConstraint, SmallInteger, Text +from sqlalchemy import Column, Integer, String, DateTime from sqlalchemy.dialects import postgresql as postgres from sqlalchemy.sql import func @@ -18,123 +20,185 @@ _LOG = logging.getLogger(__name__) -METADATA_TYPE = Table( - 'metadata_type', _core.METADATA, - Column('id', SmallInteger, primary_key=True, autoincrement=True), +orm_registry = registry() - Column('name', String, unique=True, nullable=False), - - Column('definition', postgres.JSONB, nullable=False), - - # When it was added and by whom. - Column('added', DateTime(timezone=True), server_default=func.now(), nullable=False), - Column('added_by', sql.PGNAME, server_default=func.current_user(), nullable=False), - - # Note that the `updated` column is not included here to maintain backwards-compatibility - # with pre-1.8.3 datacubes (and it is not used by any internal ODC functionality yet anyway) - - # Name must be alphanumeric + underscores. - CheckConstraint(r"name ~* '^\w+$'", name='alphanumeric_name'), -) - -PRODUCT = Table( - 'dataset_type', _core.METADATA, - Column('id', SmallInteger, primary_key=True, autoincrement=True), - - # A name/label for this type (eg. 'ls7_nbar'). Specified by users. - Column('name', String, unique=True, nullable=False), - - # All datasets of this type should contain these fields. - # (newly-ingested datasets may be matched against these fields to determine the dataset type) - Column('metadata', postgres.JSONB, nullable=False), - - # The metadata format expected (eg. what fields to search by) - # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. - Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False), # type: ignore[call-overload] - - Column('definition', postgres.JSONB, nullable=False), - - # When it was added and by whom. - Column('added', DateTime(timezone=True), server_default=func.now(), nullable=False), - Column('added_by', sql.PGNAME, server_default=func.current_user(), nullable=False), - - # Note that the `updated` column is not included here to maintain backwards-compatibility - # with pre-1.8.3 datacubes (and it is not used by any internal ODC functionality yet anyway) - - # Name must be alphanumeric + underscores. - CheckConstraint(r"name ~* '^\w+$'", name='alphanumeric_name'), -) - -DATASET = Table( - 'dataset', _core.METADATA, - Column('id', postgres.UUID(as_uuid=True), primary_key=True), - - # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. - Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False), # type: ignore[call-overload] - # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. - Column('dataset_type_ref', None, ForeignKey(PRODUCT.c.id), index=True, nullable=False), # type: ignore[call-overload] - - Column('metadata', postgres.JSONB, index=False, nullable=False), - - # Date it was archived. Null for active datasets. - Column('archived', DateTime(timezone=True), default=None, nullable=True), - - # When it was added and by whom. - Column('added', DateTime(timezone=True), server_default=func.now(), nullable=False), - Column('added_by', sql.PGNAME, server_default=func.current_user(), nullable=False), - - # Note that the `updated` column is not included here to maintain backwards-compatibility - # with pre-1.8.3 datacubes (and it is not used by any internal ODC functionality yet anyway) -) - -DATASET_LOCATION = Table( - 'dataset_location', _core.METADATA, - Column('id', Integer, primary_key=True, autoincrement=True), - # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. - Column('dataset_ref', None, ForeignKey(DATASET.c.id), index=True, nullable=False), # type: ignore[call-overload] - - # The base URI to find the dataset. - # - # All paths in the dataset metadata can be computed relative to this. - # (it is often the path of the source metadata file) - # - # eg 'file:///g/data/datasets/LS8_NBAR/agdc-metadata.yaml' or 'ftp://eo.something.com/dataset' - # 'file' is a scheme, '///g/data/datasets/LS8_NBAR/agdc-metadata.yaml' is a body. - Column('uri_scheme', String, nullable=False), - Column('uri_body', String, nullable=False), +@orm_registry.mapped +class MetadataType: + __tablename__ = "metadata_type" + __table_args__ = ( + _core.METADATA, + CheckConstraint(r"name ~* '^\w+$'", name='alphanumeric_name'), + { + "schema": sql.SCHEMA_NAME, + "comment": "Metadata type, defining search fields requiring dynamic indexes", + } + ) + id = Column(SmallInteger, primary_key=True, autoincrement=True) + name = Column(String, unique=True, nullable=True, comment="A human-friendly name/label for this metadata type") + definition = Column(postgres.JSONB, nullable=False, comment="metadata schema with search fields") # When it was added and by whom. - Column('added', DateTime(timezone=True), server_default=func.now(), nullable=False), - Column('added_by', sql.PGNAME, server_default=func.current_user(), nullable=False), + added = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, comment="when added") + added_by = Column(Text, server_default=func.current_user(), nullable=False, comment="added by whom") - # Date it was archived. Null for active locations. - Column('archived', DateTime(timezone=True), default=None, nullable=True), + products = relationship("Product") + datasets = relationship("Dataset") - UniqueConstraint('uri_scheme', 'uri_body', 'dataset_ref'), - # Note that the `updated` column is not included here to maintain backwards-compatibility - # with pre-1.8.3 datacubes (and it is not used by any internal ODC functionality yet anyway) -) - -# Link datasets to their source datasets. -DATASET_SOURCE = Table( - 'dataset_source', _core.METADATA, - # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. - Column('dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False), # type: ignore[call-overload] - - # An identifier for this source dataset. - # -> Usually it's the dataset type ('ortho', 'nbar'...), as there's typically only one source - # of each type. - Column('classifier', String, nullable=False), - # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. - Column('source_dataset_ref', None, ForeignKey(DATASET.c.id), nullable=False), # type: ignore[call-overload] - - PrimaryKeyConstraint('dataset_ref', 'classifier'), - UniqueConstraint('source_dataset_ref', 'dataset_ref'), - - # Note that the `updated` column is not included here to maintain backwards-compatibility - # with pre-1.8.3 datacubes (and it is not used by any internal ODC functionality yet anyway) - - # This table is immutable and uses a migrations based `added` column to keep track of new - # dataset locations being added. The added column defaults to `now()` -) +@orm_registry.mapped +class Product: + __tablename__ = "product" + __table_args__ = ( + _core.METADATA, + CheckConstraint(r"name ~* '^\w+$'", name='alphanumeric_name'), + { + "schema": sql.SCHEMA_NAME, + "comment": "A product or dataset type, family of related datasets." + } + ) + id = Column(SmallInteger, primary_key=True, autoincrement=True) + name = Column(String, unique=True, nullable=False, comment="A human-friendly name/label for this product") + # DB column named metadata for (temporary) backwards compatibility, + # but is forbidden by SQLAlchemy declarative style + metadata_doc = Column(name="metadata", + type_=postgres.JSONB, nullable=False, + comment="""The product metadata document (subset of the full definition) +All datasets of this type should contain these fields. +(newly-ingested datasets may be matched against these fields to determine the dataset type)""") + metadata_type_ref = Column(SmallInteger, ForeignKey(MetadataType.id), nullable=False, + comment="The metadata type - how to interpret the metadata") + definition = Column('definition', postgres.JSONB, nullable=False, comment="Full product definition document") + added = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, comment="when added") + added_by = Column(Text, server_default=func.current_user(), nullable=False, comment="added by whom") + + datasets = relationship("Dataset") + + +@orm_registry.mapped +class Dataset: + __tablename__ = "dataset" + __table_args__ = ( + _core.METADATA, + { + "schema": sql.SCHEMA_NAME, + "comment": "A dataset." + } + ) + id = Column(postgres.UUID(as_uuid=True), primary_key=True) + metadata_type_ref = Column(SmallInteger, ForeignKey(MetadataType.id), nullable=False, + comment="The metadata type - how to interpret the metadata") + product_ref = Column(SmallInteger, ForeignKey(Product.id), nullable=False, + comment="The product this dataset belongs to") + # DB column named metadata for (temporary) backwards compatibility, + # but is forbidden by SQLAlchemy declarative style + metadata_doc = Column(name="metadata", type_=postgres.JSONB, index=False, nullable=False, + comment="The dataset metadata document") + archived = Column(DateTime(timezone=True), default=None, nullable=True, + comment="when archived, null if active") + added = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, comment="when added") + added_by = Column(Text, server_default=func.current_user(), nullable=False, comment="added by whom") + + locations = relationship("DatasetLocation", viewonly=True) + active_locations = relationship("DatasetLocation", + primaryjoin="and_(Dataset.id==DatasetLocation.dataset_ref, " + "DatasetLocation.archived==None)", + viewonly=True, + order_by="desc(DatasetLocation.added)") + archived_locations = relationship("DatasetLocation", + viewonly=True, + primaryjoin="and_(Dataset.id==DatasetLocation.dataset_ref, " + "DatasetLocation.archived!=None)" + ) + + +@orm_registry.mapped +class DatasetLocation: + __tablename__ = "location" + __table_args__ = ( + _core.METADATA, + UniqueConstraint('uri_scheme', 'uri_body', 'dataset_ref'), + { + "schema": sql.SCHEMA_NAME, + "comment": "Where data for the dataset can be found (uri)." + } + ) + id = Column(Integer, primary_key=True, autoincrement=True) + dataset_ref = Column(postgres.UUID(as_uuid=True), ForeignKey(Dataset.id), nullable=False, + comment="The product this dataset belongs to") + uri_scheme = Column(String, nullable=False, comment="The scheme of the uri.") + uri_body = Column(String, nullable=False, comment="""The body of the uri. + +The uri scheme and body make up the base URI to find the dataset. + +All paths in the dataset metadata can be computed relative to this. +(it is often the path of the source metadata file) + +eg 'file:///g/data/datasets/LS8_NBAR/odc-metadata.yaml' or 'ftp://eo.something.com/dataset' +'file' is a scheme, '///g/data/datasets/LS8_NBAR/odc-metadata.yaml' is a body.""") + added = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, comment="when added") + added_by = Column(Text, server_default=func.current_user(), nullable=False, comment="added by whom") + archived = Column(DateTime(timezone=True), default=None, nullable=True, + comment="when archived, null for the active location") + uri = column_property(uri_scheme + ':' + uri_body) + dataset = relationship("Dataset") + + +SelectedDatasetLocation = aliased(DatasetLocation, name="sel_loc") + + +@orm_registry.mapped +class DatasetSource: + __tablename__ = "dataset_lineage" + __table_args__ = ( + _core.METADATA, + PrimaryKeyConstraint('dataset_ref', 'classifier'), + UniqueConstraint('source_dataset_ref', 'dataset_ref'), + { + "schema": sql.SCHEMA_NAME, + "comment": "Represents a source-lineage relationship between two datasets" + } + ) + dataset_ref = Column(postgres.UUID(as_uuid=True), nullable=False, index=True, + comment="The downstream derived dataset produced from the upstream source dataset.") + source_dataset_ref = Column( + postgres.UUID(as_uuid=True), nullable=False, index=True, + comment="An upstream source dataset that the downstream derived dataset was produced from." + ) + classifier = Column(String, nullable=False, comment="""An identifier for this source dataset. +E.g. the dataset type ('ortho', 'nbar'...) if there's only one source of each type, or a datestamp +for a time-range summary.""") + + +class SpatialIndex: + """ + Base class for dynamically SpatialIndex ORM models (See _spatial.py) + """ + + +@orm_registry.mapped +class SpatialIndexRecord: + __tablename__ = "spatial_indicies" + __table_args__ = ( + _core.METADATA, + { + "schema": sql.SCHEMA_NAME, + "comment": "Record of the existence of a Spatial Index Table for an SRID/CRS", + } + ) + srid = Column(SmallInteger, primary_key=True, autoincrement=False) + table_name = Column(String, + unique=True, nullable=True, + comment="The name of the table implementing the index - DO NOT CHANGE") + added = Column(DateTime(timezone=True), server_default=func.now(), nullable=False, comment="when added") + added_by = Column(Text, server_default=func.current_user(), nullable=False, comment="added by whom") + + @classmethod + def from_spindex(cls, spindex: Type[SpatialIndex]) -> "SpatialIndexRecord": + return cls(srid=spindex.__tablename__[8:], + table_name=spindex.__tablename__) + + +ALL_STATIC_TABLES = [ + MetadataType.__table__, Product.__table__, Dataset.__table__, + DatasetLocation.__table__, DatasetSource.__table__, SpatialIndexRecord.__table__ +] diff -Nru datacube-1.8.7/datacube/drivers/postgis/_spatial.py datacube-1.8.9/datacube/drivers/postgis/_spatial.py --- datacube-1.8.7/datacube/drivers/postgis/_spatial.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/_spatial.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,165 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2020 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 +""" +Tracking spatial indexes +""" + +import logging +from threading import Lock +from typing import Mapping, Optional, Type, Union + +from sqlalchemy import ForeignKey, select +from sqlalchemy.dialects import postgresql as postgres +from geoalchemy2 import Geometry + +from sqlalchemy.engine import Connectable +from sqlalchemy import Column +from sqlalchemy.orm import Session + +from datacube.utils.geometry import CRS, Geometry as Geom, multipolygon +from ._core import METADATA +from .sql import SCHEMA_NAME +from ._schema import orm_registry, Dataset, SpatialIndex, SpatialIndexRecord + +_LOG = logging.getLogger(__name__) + + +# In theory we could just use the SQLAlchemy registry for this, but it is not indexed +# in a useful way. +class SpatialIndexORMRegistry: + """Threadsafe global registry of SpatialIndex ORM classes, indexed by EPSG/SRID code.""" + _registry: Mapping[int, Type[SpatialIndex]] = {} + _lock = Lock() + + def __init__(self): + self._registry = self.__class__._registry + self._lock = self.__class__._lock + + def _to_epsg(self, epsg_or_crs: Union[CRS, int]) -> int: + """Utility method to convert a epsg_or_crs to an epsg.""" + if isinstance(epsg_or_crs, CRS): + return epsg_or_crs.epsg + else: + return epsg_or_crs + + def register(self, epsg_or_crs: Union[CRS, int]) -> bool: + """Ensure that SpatialIndex ORM clss is registered for this EPSG/SRID""" + epsg = self._to_epsg(epsg_or_crs) + added = False + with self._lock: + if epsg not in self._registry: + self._registry[epsg] = self._mint_new_spindex(epsg) + added = True + return added + + def get(self, epsg_or_crs: Union[CRS, int]) -> Optional[Type[SpatialIndex]]: + """Retrieve the registered SpatialIndex ORM class""" + epsg = self._to_epsg(epsg_or_crs) + return self._registry.get(epsg) + + def _mint_new_spindex(self, epsg: int): + """ + Dynamically create a new ORM class for a EPSG/SRID. + + Note: Called within registry lock. + """ + table_name = f"spatial_{epsg}" + attributes = { + '__tablename__': table_name, + '__table_args__': ( + METADATA, + { + "schema": SCHEMA_NAME, + "comment": "A product or dataset type, family of related datasets." + } + ), + "dataset_ref": Column(postgres.UUID(as_uuid=True), ForeignKey(Dataset.id), + primary_key=True, + nullable=False, + comment="The dataset being indexed") + } + # Add geometry column + attributes["extent"] = Column(Geometry('MULTIPOLYGON', srid=epsg), + nullable=False, + comment="The extent of the dataset") + return orm_registry.mapped(type(f'SpatialIdx{epsg}', (SpatialIndex,), attributes)) + + +def spindex_for_epsg(epsg: int) -> Type[SpatialIndex]: + """Return ORM class of a SpatialIndex for EPSG/SRID - dynamically creating if necessary""" + sir = SpatialIndexORMRegistry() + spindex = sir.get(epsg) + if spindex is None: + sir.register(epsg) + spindex = sir.get(epsg) + return spindex + + +def spindex_for_crs(crs: CRS) -> Type[SpatialIndex]: + """Return ORM class of a SpatialIndex for CRS - dynamically creating if necessary""" + if not str(crs).startswith("EPSG:") and crs.epsg is None: + # Postgis identifies CRSs by a numeric "SRID" which is equivalent to EPSG number. + _LOG.error("Cannot create a postgis spatial index for a non-EPSG-style CRS.") + return None + + return spindex_for_epsg(crs.epsg) + + +def spindex_for_record(rec: SpatialIndexRecord) -> Type[SpatialIndex]: + """Convert a Record of a SpatialIndex created in a particular database to an ORM class""" + return spindex_for_crs(rec.crs) + + +def ensure_spindex(engine: Connectable, sp_idx: Type[SpatialIndex]) -> None: + """Ensure a Spatial Index exists in a particular database.""" + with Session(engine) as session: + results = session.execute( + select(SpatialIndexRecord.srid).where(SpatialIndexRecord.srid == sp_idx.__tablename__[8:]) + ) + for result in results: + # SpatialIndexRecord exists - actual index assumed to exist too. + return + # SpatialIndexRecord doesn't exist - create the index table... + orm_registry.metadata.create_all(engine, [sp_idx.__table__]) + # ... and add a SpatialIndexRecord + session.add(SpatialIndexRecord.from_spindex(sp_idx)) + session.flush() + return + + +def spindexes(engine: Connectable) -> Mapping[CRS, Type[SpatialIndex]]: + """ + Return a CRS-to-Spatial Index ORM class mapping for indexes that exist in a particular database. + """ + out = {} + sir = SpatialIndexORMRegistry() + with Session(engine) as session: + results = session.execute(select(SpatialIndexRecord.srid)) + for result in results: + epsg = int(result[0]) + spindex = spindex_for_epsg(epsg) + crs = CRS(f'EPSG:{epsg}') + out[crs] = spindex + return out + + +def promote_to_multipolygon(geom: Geom) -> Geom: + # Assumes input is a polygon or multipolygon - does not work on lines or points + if geom.type == "Multipolygon": + return geom + elif geom.type == "Polygon": + # Promote to multipolygon (is there a more elegant way to do this?? + polycoords = [list(geom.geom.exterior.coords)] + for interior in geom.geom.interiors: + polycoords.append(list(interior.coords)) + geom = multipolygon([polycoords], crs=geom.crs) + return geom + else: + raise ValueError(f"Cannot promote geometry type {geom.type} to multi-polygon") + + +def geom_alchemy(geom: Geom) -> str: + geom = promote_to_multipolygon(geom) + return f"SRID={geom.crs.epsg};{geom.wkt}" diff -Nru datacube-1.8.7/datacube/drivers/postgis/sql.py datacube-1.8.9/datacube/drivers/postgis/sql.py --- datacube-1.8.7/datacube/drivers/postgis/sql.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgis/sql.py 2022-11-17 00:47:28.000000000 +0000 @@ -18,6 +18,7 @@ class CreateView(Executable, ClauseElement): inherit_cache = True + def __init__(self, name, select): self.name = name self.select = select diff -Nru datacube-1.8.7/datacube/drivers/postgres/_api.py datacube-1.8.9/datacube/drivers/postgres/_api.py --- datacube-1.8.7/datacube/drivers/postgres/_api.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgres/_api.py 2022-11-17 00:47:28.000000000 +0000 @@ -182,9 +182,15 @@ def in_transaction(self): return self._connection.in_transaction() + def begin(self): + self._connection.execute(text('BEGIN')) + def rollback(self): self._connection.execute(text('ROLLBACK')) + def commit(self): + self._connection.execute(text('COMMIT')) + def execute(self, command): return self._connection.execute(command) diff -Nru datacube-1.8.7/datacube/drivers/postgres/_connections.py datacube-1.8.9/datacube/drivers/postgres/_connections.py --- datacube-1.8.7/datacube/drivers/postgres/_connections.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgres/_connections.py 2022-11-17 00:47:28.000000000 +0000 @@ -19,9 +19,9 @@ from contextlib import contextmanager from typing import Callable, Optional, Union -from sqlalchemy import event, create_engine, text +from sqlalchemy import event, create_engine from sqlalchemy.engine import Engine -from sqlalchemy.engine.url import URL as EngineUrl +from sqlalchemy.engine.url import URL as EngineUrl # noqa: N811 import datacube from datacube.index.exceptions import IndexSetupError @@ -207,7 +207,7 @@ return is_new @contextmanager - def connect(self): + def _connect(self): """ Borrow a connection from the pool. @@ -220,35 +220,12 @@ The connection can raise errors if not following this advice ("server closed the connection unexpectedly"), as some servers will aggressively close idle connections (eg. DEA's NCI servers). It also prevents the connection from being reused while borrowed. - """ - with self._engine.connect() as connection: - yield _api.PostgresDbAPI(connection) - connection.close() - - @contextmanager - def begin(self): - """ - Start a transaction. - - Returns an instance that will maintain a single connection in a transaction. - - Call commit() or rollback() to complete the transaction or use a context manager: - - with db.begin() as trans: - trans.insert_dataset(...) - - (Don't share an instance between threads) - :rtype: PostgresDBAPI + Low level context manager, use ._db_connection instead """ with self._engine.connect() as connection: - connection.execute(text('BEGIN')) try: yield _api.PostgresDbAPI(connection) - connection.execute(text('COMMIT')) - except Exception: # pylint: disable=broad-except - connection.execute(text('ROLLBACK')) - raise finally: connection.close() diff -Nru datacube-1.8.7/datacube/drivers/postgres/_core.py datacube-1.8.9/datacube/drivers/postgres/_core.py --- datacube-1.8.7/datacube/drivers/postgres/_core.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgres/_core.py 2022-11-17 00:47:28.000000000 +0000 @@ -14,7 +14,7 @@ ADDED_COLUMN_MIGRATE_SQL_TEMPLATE, UPDATE_TIMESTAMP_SQL, escape_pg_identifier, - pg_column_exists, pg_exists) + pg_column_exists) from sqlalchemy import MetaData from sqlalchemy.engine import Engine from sqlalchemy.schema import CreateSchema @@ -40,7 +40,7 @@ def install_timestamp_trigger(connection): from . import _schema - TABLE_NAMES = [ + TABLE_NAMES = [ # noqa: N806 _schema.METADATA_TYPE.name, _schema.PRODUCT.name, _schema.DATASET.name, @@ -53,9 +53,10 @@ connection.execute(UPDATE_COLUMN_MIGRATE_SQL_TEMPLATE.format(schema=SCHEMA_NAME, table=name)) connection.execute(INSTALL_TRIGGER_SQL_TEMPLATE.format(schema=SCHEMA_NAME, table=name)) + def install_added_column(connection): from . import _schema - TABLE_NAME = _schema.DATASET_LOCATION.name + TABLE_NAME = _schema.DATASET_LOCATION.name # noqa: N806 connection.execute(ADDED_COLUMN_MIGRATE_SQL_TEMPLATE.format(schema=SCHEMA_NAME, table=TABLE_NAME)) @@ -113,7 +114,7 @@ _LOG.info("Creating added column.") install_added_column(c) c.execute('commit') - except: + except: # noqa: E722 c.execute('rollback') raise finally: diff -Nru datacube-1.8.7/datacube/drivers/postgres/_fields.py datacube-1.8.9/datacube/drivers/postgres/_fields.py --- datacube-1.8.7/datacube/drivers/postgres/_fields.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgres/_fields.py 2022-11-17 00:47:28.000000000 +0000 @@ -491,7 +491,7 @@ @property def alchemy_expression(self): return self.field.alchemy_expression.overlaps( - self._range_class(self.low_value, self.high_value) + self._range_class(self.low_value, self.high_value, bounds="[]") ) diff -Nru datacube-1.8.7/datacube/drivers/postgres/__init__.py datacube-1.8.9/datacube/drivers/postgres/__init__.py --- datacube-1.8.7/datacube/drivers/postgres/__init__.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgres/__init__.py 2022-11-17 00:47:28.000000000 +0000 @@ -9,5 +9,6 @@ """ from ._connections import PostgresDb +from ._api import PostgresDbAPI -__all__ = ['PostgresDb'] +__all__ = ['PostgresDb', 'PostgresDbAPI'] diff -Nru datacube-1.8.7/datacube/drivers/postgres/_schema.py datacube-1.8.9/datacube/drivers/postgres/_schema.py --- datacube-1.8.7/datacube/drivers/postgres/_schema.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgres/_schema.py 2022-11-17 00:47:28.000000000 +0000 @@ -72,7 +72,7 @@ # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. Column('metadata_type_ref', None, ForeignKey(METADATA_TYPE.c.id), nullable=False), # type: ignore[call-overload] # Typing note: sqlalchemy-stubs doesn't handle this legitimate calling pattern. - Column('dataset_type_ref', None, ForeignKey(PRODUCT.c.id), index=True, nullable=False), # type: ignore[call-overload] + Column('dataset_type_ref', None, ForeignKey(PRODUCT.c.id), index=True, nullable=False), # type: ignore[call-overload] # noqa: E501 Column('metadata', postgres.JSONB, index=False, nullable=False), diff -Nru datacube-1.8.7/datacube/drivers/postgres/sql.py datacube-1.8.9/datacube/drivers/postgres/sql.py --- datacube-1.8.7/datacube/drivers/postgres/sql.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/drivers/postgres/sql.py 2022-11-17 00:47:28.000000000 +0000 @@ -18,6 +18,7 @@ class CreateView(Executable, ClauseElement): inherit_cache = True + def __init__(self, name, select): self.name = name self.select = select diff -Nru datacube-1.8.7/datacube/executor.py datacube-1.8.9/datacube/executor.py --- datacube-1.8.7/datacube/executor.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/executor.py 2022-11-17 00:47:28.000000000 +0000 @@ -242,15 +242,3 @@ return concurrent_exec return SerialExecutor() - - -def mk_celery_executor(host, port, password=''): - """ - :param host: Address of the redis database server - :param port: Port of the redis database server - :password: Authentication for redis or None or '' - '' -- load from home folder, or generate if missing, - None -- no authentication - """ - from ._celery_runner import CeleryExecutor - return CeleryExecutor(host, port, password=password) diff -Nru datacube-1.8.7/datacube/helpers.py datacube-1.8.9/datacube/helpers.py --- datacube-1.8.7/datacube/helpers.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/helpers.py 2022-11-17 00:47:28.000000000 +0000 @@ -9,8 +9,6 @@ """ import numpy as np -import rasterio # type: ignore[import] -import warnings DEFAULT_PROFILE = { 'blockxsize': 256, @@ -22,55 +20,6 @@ 'tiled': True} -def write_geotiff(filename, dataset, profile_override=None): - """ - DEPRECATED: use datacube.utils.cog.write_cog instead. - - Write an ODC style xarray.Dataset to a GeoTIFF file. - - :param filename: Output filename - :param dataset: xarray dataset containing one or more bands to write to a file. - :param profile_override: option dict, overrides rasterio file creation options. - :param time_index: DEPRECATED - """ - warnings.warn("""Function datacube.helpers.write_geotiff is deprecated, -please use datacube.utils.cog.write_cog instead""", - category=DeprecationWarning) - - profile_override = profile_override or {} - - geobox = getattr(dataset, 'geobox', None) - - if geobox is None: - raise ValueError('Can only write datasets with specified `crs` attribute') - - try: - dtypes = {val.dtype for val in dataset.data_vars.values()} - assert len(dtypes) == 1 # Check for multiple dtypes - except AttributeError: - dtypes = [dataset.dtype] - - profile = DEFAULT_PROFILE.copy() - height, width = geobox.shape - - profile.update({ - 'width': width, - 'height': height, - 'transform': geobox.affine, - 'crs': str(geobox.crs), - 'count': len(dataset.data_vars), - 'dtype': str(dtypes.pop()) - }) - profile.update(profile_override) - - _calculate_blocksize(profile) - - with rasterio.open(str(filename), 'w', **profile) as dest: - if hasattr(dataset, 'data_vars'): - for bandnum, data in enumerate(dataset.data_vars.values(), start=1): - dest.write(data.data, bandnum) - - def _calculate_blocksize(profile): # Block size must be smaller than the image size, and for geotiffs must be divisible by 16 # Fix for small images. diff -Nru datacube-1.8.7/datacube/index/abstract.py datacube-1.8.9/datacube/index/abstract.py --- datacube-1.8.7/datacube/index/abstract.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/abstract.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,20 +3,27 @@ # Copyright (c) 2015-2022 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import datetime +import logging from pathlib import Path +from threading import Lock from abc import ABC, abstractmethod from typing import (Any, Iterable, Iterator, List, Mapping, Optional, - Tuple, Union) + Tuple, Union, Sequence) from uuid import UUID from datacube.config import LocalConfig +from datacube.index.exceptions import TransactionException from datacube.index.fields import Field from datacube.model import Dataset, MetadataType, Range from datacube.model import DatasetType as Product from datacube.utils import cached_property, read_documents, InvalidDocException from datacube.utils.changes import AllowPolicy, Change, Offset +from datacube.utils.generic import thread_local_cache +from datacube.utils.geometry import CRS, Geometry, box + +_LOG = logging.getLogger(__name__) class AbstractUserResource(ABC): @@ -238,9 +245,11 @@ :returns: All available MetadataType models """ + QueryField = Union[str, float, int, Range, datetime.datetime] QueryDict = Mapping[str, QueryField] + class AbstractProductResource(ABC): """ Abstract base class for the Product portion of an index api. @@ -539,9 +548,8 @@ """ @abstractmethod - def add(self, - dataset: Dataset, - with_lineage: Optional[bool] = None + def add(self, dataset: Dataset, + with_lineage: bool = True ) -> Dataset: """ Add ``dataset`` to the index. No-op if it is already present. @@ -549,7 +557,7 @@ :param dataset: Unpersisted dataset model :param with_lineage: - - ``True|None`` attempt adding lineage datasets if missing + - ``True (default)`` attempt adding lineage datasets if missing - ``False`` record lineage relations, but do not attempt adding lineage datasets to the db @@ -756,7 +764,7 @@ """ Perform a search, returning results as Dataset objects. - :param limit: Limit number of dataset (None/default = unlimited) + :param limit: Limit number of datasets per product (None/default = unlimited) :param query: search query parameters :return: Matching datasets """ @@ -901,6 +909,244 @@ property functions). """ + @abstractmethod + def spatial_extent(self, ids: Iterable[DSID], crs: CRS = CRS("EPSG:4326")) -> Optional[Geometry]: + """ + Return the combined spatial extent of the nominated datasets. + + Uses spatial index. + Returns None if no index for the CRS, or if no identified datasets are indexed in the relevant spatial index. + Result will not include extents of datasets that cannot be validly projected into the CRS. + + :param ids: An iterable of dataset IDs + :param crs: A CRS (defaults to EPSG:4326) + :return: The combined spatial extents of the datasets. + """ + + def _extract_geom_from_query(self, q: Mapping[str, QueryField]) -> Optional[Geometry]: + """ + Utility method for index drivers supporting spatial indexes. + + Extract a Geometry from a dataset query. Backwards compatible with old lat/lon style queries. + + :param q: A query dictionary + :return: A polygon or multipolygon type Geometry. None if no spatial query clauses. + """ + geom: Optional[Geometry] = None + if "geometry" in q: + # New geometry-style spatial query + geom_term = q.pop("geometry") + try: + geom = Geometry(geom_term) + except ValueError: + # Can't convert to single Geometry. If it is an iterable of Geometries, return the union + for term in geom_term: + if geom is None: + geom = Geometry(term) + else: + geom = geom.union(Geometry(term)) + if "lat" in q or "lon" in q: + raise ValueError("Cannot specify lat/lon AND geometry in the same query") + assert geom.crs + else: + # Old lat/lon--style spatial query (or no spatial query) + lat = q.pop("lat", None) + lon = q.pop("lon", None) + if lat is None and lon is None: + # No spatial query + _LOG.info("No spatial query") + return None + + # Old lat/lon--style spatial query + if lat is None: + lat = Range(begin=-90, end=90) + if lon is None: + lon = Range(begin=-180, end=180) + delta = 0.000001 + if isinstance(lat, Range) and isinstance(lon, Range): + # ranges for both - build a box. + geom = box(lon.begin, lat.begin, lon.end, lat.end, crs=CRS("EPSG:4326")) + elif isinstance(lat, Range): + if isinstance(lon, (int, float)): + # lat is a range, but lon is scalar - geom is ideally a line + # datacube.utils.geometry is always (x, y) order - ignore lat,lon order specified by EPSG:4326 + geom = box(lon - delta, lat.begin, lon + delta, lat.end, crs=CRS("EPSG:4326")) + else: + raise ValueError("lon search term must be a Range or a numeric scalar") + elif isinstance(lon, Range): + if isinstance(lat, (int, float)): + # lon is a range, but lat is scalar - geom is ideally a line + # datacube.utils.geometry is always (x, y) order - ignore lat,lon order specified by EPSG:4326 + geom = box(lon.begin, lat - delta, lon.end, lat + delta, crs=CRS("EPSG:4326")) + else: + raise ValueError("lat search term must be a Range or a numeric scalar") + else: + if isinstance(lon, (int, float)) and isinstance(lat, (int, float)): + # Lat and Lon are both scalars - geom is ideally point + # datacube.utils.geometry is always (x, y) order - ignore lat,lon order specified by EPSG:4326 + geom = box(lon - delta, lat - delta, lon + delta, lat + delta, crs=CRS("EPSG:4326")) + else: + raise ValueError("lat and lon search terms must be of type Range or a numeric scalar") + _LOG.info("Spatial Query Geometry: %s", geom.wkt) + return geom + + +class AbstractTransaction(ABC): + """ + Abstract base class for a Transaction Manager. All index implementations should extend this base class. + + Thread-local storage and locks ensures one active transaction per index per thread. + """ + + def __init__(self, index_id: str): + self._connection: Any = None + self._tls_id = f"txn-{index_id}" + self._obj_lock = Lock() + + # Main Transaction API + def begin(self) -> None: + """ + Start a new transaction. + + Raises an error if a transaction is already active for this thread. + + Calls implementation-specific _new_connection() method and manages thread local storage and locks. + """ + with self._obj_lock: + if self._connection is not None: + raise ValueError("Cannot start a new transaction as one is already active") + self._tls_stash() + + def commit(self) -> None: + """ + Commit the transaction. + + Raises an error if transaction is not active. + + Calls implementation-specific _commit() method, and manages thread local storage and locks. + """ + with self._obj_lock: + if self._connection is None: + raise ValueError("Cannot commit inactive transaction") + self._commit() + self._release_connection() + self._connection = None + self._tls_purge() + + def rollback(self) -> None: + """ + Rollback the transaction. + + Raises an error if transaction is not active. + + Calls implementation-specific _rollback() method, and manages thread local storage and locks. + """ + with self._obj_lock: + if self._connection is None: + raise ValueError("Cannot rollback inactive transaction") + self._rollback() + self._release_connection() + self._connection = None + self._tls_purge() + + @property + def active(self): + """ + :return: True if the transaction is active. + """ + return self._connection is not None + + # Manage thread-local storage + def _tls_stash(self) -> None: + """ + Check TLS is empty, create a new connection and stash it. + :return: + """ + stored_val = thread_local_cache(self._tls_id) + if stored_val is not None: + raise ValueError("Cannot start a new transaction as one is already active for this thread") + self._connection = self._new_connection() + thread_local_cache(self._tls_id, purge=True) + thread_local_cache(self._tls_id, self) + + def _tls_purge(self) -> None: + thread_local_cache(self._tls_id, purge=True) + + # Commit/Rollback exceptions for Context Manager usage patterns + def commit_exception(self, errmsg: str) -> TransactionException: + return TransactionException(errmsg, commit=True) + + def rollback_exception(self, errmsg: str) -> TransactionException: + return TransactionException(errmsg, commit=False) + + # Context Manager Interface + def __enter__(self): + self.begin() + return self + + def __exit__(self, exc_type, exc_value, traceback): + if not self.active: + # User has already manually committed or rolled back. + return True + if exc_type is not None and issubclass(exc_type, TransactionException): + # User raised a TransactionException, Commit or rollback as per exception + if exc_value.commit: + self.commit() + else: + self.rollback() + # Tell runtime exception is caught and handled. + return True + elif exc_value is not None: + # Any other exception - rollback + self.rollback() + # Instruct runtime to rethrow exception + return False + else: + # Exited without exception - commit and continue + self.commit() + return True + + # Internal abstract methods for implementation-specific functionality + @abstractmethod + def _new_connection(self) -> Any: + """ + :return: a new index driver object representing a database connection or equivalent against which transactions + will be executed. + """ + + @abstractmethod + def _commit(self) -> None: + """ + Commit the transaction. + """ + + @abstractmethod + def _rollback(self) -> None: + """ + Rollback the transaction. + """ + + @abstractmethod + def _release_connection(self) -> None: + """ + Release the connection object stored in self._connection + """ + + +class UnhandledTransaction(AbstractTransaction): + # Minimal implementation for index drivers with no transaction handling. + def _new_connection(self) -> Any: + return True + + def _commit(self) -> None: + pass + + def _rollback(self) -> None: + pass + + def _release_connection(self) -> None: + pass + class AbstractIndex(ABC): """ @@ -908,25 +1154,43 @@ inherit from this base class and implement all abstract methods. """ + # Interface contracts + # supports add() update() remove() etc methods. + supports_persistance = True + # supports legacy ODCv1 EO style metadata types. + supports_legacy = True + # supports non-geospatial (e.g. telemetry) metadata types + supports_nongeo = True + # supports lineage + supports_lineage = True + supports_source_filters = True + # Supports ACID transactions + supports_transactions = False + @property @abstractmethod - def url(self) -> str: pass + def url(self) -> str: + """A string representing the index""" @property @abstractmethod - def users(self) -> AbstractUserResource: pass + def users(self) -> AbstractUserResource: + """A User Resource instance for the index""" @property @abstractmethod - def metadata_types(self) -> AbstractMetadataTypeResource: pass + def metadata_types(self) -> AbstractMetadataTypeResource: + """A MetadataType Resource instance for the index""" @property @abstractmethod - def products(self) -> AbstractProductResource: pass + def products(self) -> AbstractProductResource: + """A Product Resource instance for the index""" @property @abstractmethod - def datasets(self) -> AbstractDatasetResource: pass + def datasets(self) -> AbstractDatasetResource: + """A Dataset Resource instance for the index""" @classmethod @abstractmethod @@ -935,22 +1199,95 @@ application_name: Optional[str] = None, validate_connection: bool = True ) -> "AbstractIndex": - pass + """Instantiate a new index from a LocalConfig object""" @classmethod @abstractmethod def get_dataset_fields(cls, doc: dict ) -> Mapping[str, Field]: - pass + """Return dataset search fields from a metadata type document""" @abstractmethod def init_db(self, with_default_types: bool = True, - with_permissions: bool = True) -> bool: pass + with_permissions: bool = True) -> bool: + """ + Initialise an empty database. + + :param with_default_types: Whether to create default metadata types + :param with_permissions: Whether to create db permissions + :return: true if the database was created, false if already exists + """ + + @abstractmethod + def close(self) -> None: + """ + Close and cleanup the Index. + """ + + @property + @abstractmethod + def index_id(self) -> str: + """ + :return: Unique ID for this index + (e.g. same database/dataset storage + same index driver implementation = same id) + """ + + @abstractmethod + def transaction(self) -> AbstractTransaction: + """ + :return: a Transaction context manager for this index. + """ @abstractmethod - def close(self) -> None: pass + def create_spatial_index(self, crs: CRS) -> bool: + """ + Create a spatial index using the nominated CRS. + + :param crs: The CRS to use in the spatial index. + :return: True is the index was successfully created or already exists. + None if spatial indexes are not supported. + """ + + def thread_transaction(self) -> Optional["AbstractTransaction"]: + """ + :return: The existing Transaction object cached in thread-local storage for this index, if there is one. + """ + return thread_local_cache(f"txn-{self.index_id}", None) + + def spatial_indexes(self, refresh=False) -> Iterable[CRS]: + """ + Return a list of CRSs for which spatiotemporal indexes exist in the database. + + :param refresh: If true, re-read from database record (e.g. to catch spatial + indexes recently created in another datacube session. + :return: + """ + _LOG.warning("Spatial index API is unstable and may change between releases.") + return [] + + def update_spatial_index(self, + crses: Sequence[CRS] = [], + product_names: Sequence[str] = [], + dataset_ids: Sequence[DSID] = [] + ) -> int: + """ + Update a spatial index + :param crs: CRSs for Spatial Indexes to update. Default=all indexes + :param product_names: Product names to update + :param dsids: Dataset IDs to update + + If neither product_names nor dataset ids are supplied, update for all datasets. + + If both are supplied, both the named products and identified datasets are updated. + + If spatial indexes are not supported by the index driver, always return zero. + + :return: Number of spatial index entries updated or verified as unindexed. + """ + _LOG.warning("Spatial index API is unstable and may change between releases.") + return 0 def __enter__(self): return self @@ -970,14 +1307,13 @@ application_name: Optional[str] = None, validate_connection: bool = True ) -> "AbstractIndex": - pass + ... @staticmethod @abstractmethod - def metadata_type_from_doc( - definition: dict + def metadata_type_from_doc(definition: dict ) -> MetadataType: - pass + ... # The special handling of grid_spatial, etc appears to NOT apply to EO3. @@ -1003,4 +1339,4 @@ @property def bounds(self): - return Dataset.bounds.__get__(self) \ No newline at end of file + return Dataset.bounds.__get__(self) diff -Nru datacube-1.8.7/datacube/index/default-metadata-types.yaml datacube-1.8.9/datacube/index/default-metadata-types.yaml --- datacube-1.8.7/datacube/index/default-metadata-types.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/default-metadata-types.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -310,4 +310,4 @@ max_offset: - [image, satellite_ref_point_end, y] # If an end is not specified, use the start. - - [image, satellite_ref_point_start, y] \ No newline at end of file + - [image, satellite_ref_point_start, y] diff -Nru datacube-1.8.7/datacube/index/eo3.py datacube-1.8.9/datacube/index/eo3.py --- datacube-1.8.7/datacube/index/eo3.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/eo3.py 2022-11-17 00:47:28.000000000 +0000 @@ -7,17 +7,16 @@ # TODO: typehints need attention """ Tools for working with EO3 metadata """ -from types import SimpleNamespace from affine import Affine -import toolz # type: ignore[import] -from typing import Dict, Any, Optional +from functools import reduce +from typing import Dict, Any, Iterable, Optional, Tuple, Union +from uuid import UUID from datacube.utils.geometry import ( SomeCRS, CRS, Geometry, polygon, - bbox_union, CoordList, BoundingBox, lonlat_bounds, @@ -26,69 +25,68 @@ EO3_SCHEMA = "https://schemas.opendatacube.org/dataset" -def _norm_grid(grid: Dict[str, Any]) -> Any: - shape = grid.get('shape') - transform = grid.get('transform') - if shape is None or transform is None: - raise ValueError("Each grid must have .shape and .transform") - return SimpleNamespace(shape=shape, - transform=Affine(*transform[:6])) - - -def grid2points(grid: Dict[str, Any], - ring: bool = False) -> CoordList: - grid = _norm_grid(grid) - - ny, nx = (float(dim) for dim in grid.shape) - transform = grid.transform - pts = [(0.0, 0.0), (nx, 0.0), (nx, ny), (0.0, ny)] - if ring: - pts += pts[:1] - return [transform*pt for pt in pts] - - -def grid2ref_points(grid: Dict[str, Any]) -> Dict[str, Any]: - nn = ['ul', 'ur', 'lr', 'll'] - return {n: dict(x=x, y=y) - for n, (x, y) in zip(nn, grid2points(grid))} - - -def grid2polygon(grid: Dict[str, Any], crs: SomeCRS) -> Geometry: - return polygon(grid2points(grid, ring=True), crs) - - -def eo3_lonlat_bbox(doc: Dict[str, Any], +class EO3Grid: + def __init__(self, grid: Dict[str, Any]) -> None: + shape = grid.get("shape") + if shape is None: + raise ValueError("Each grid must have a shape") + if len(shape) != 2: + raise ValueError("Grid shape must be two dimensional") + self.shape: Tuple[int, int] = tuple(int(x) for x in shape) + xform = grid.get("transform") + if xform is None: + raise ValueError("Each grid must have a transform") + if len(xform) != 6 and len(xform) != 9: + raise ValueError("Grid transform must have 6 or 9 elements.") + for elem in xform: + if type(elem) not in (int, float): + raise ValueError("All grid transform elements must be numbers") + if len(xform) == 9 and list(xform[6:]) != [0, 0, 1]: + raise ValueError("Grid transform must be a valid Affine matrix") + self.transform = Affine(*xform[:6]) + + def points(self, ring: bool = False) -> CoordList: + ny, nx = (float(dim) for dim in self.shape) + pts = [(0.0, 0.0), (nx, 0.0), (nx, ny), (0.0, ny)] + if ring: + pts += pts[:1] + return [self.transform * pt for pt in pts] + + def ref_points(self) -> Dict[str, Dict[str, float]]: + nn = ['ul', 'ur', 'lr', 'll'] + return {n: dict(x=x, y=y) + for n, (x, y) in zip(nn, self.points())} + + def polygon(self, crs: Optional[SomeCRS] = None) -> Geometry: + return polygon(self.points(ring=True), crs=crs) + + +def eo3_lonlat_bbox(grids: Iterable[EO3Grid], + crs: CRS, + valid_data: Optional[Geometry] = None, resolution: Optional[float] = None) -> BoundingBox: - """ Compute bounding box in Lon/Lat for a given EO3 document. + """ Compute bounding box for all grids in Lon/Lat """ - crs = doc.get('crs') - grids = doc.get('grids') - - if crs is None or grids is None: - raise ValueError("Input must have crs and grids") + if valid_data is not None: + return lonlat_bounds(valid_data, resolution=resolution) - crs = CRS(crs) - geom = doc.get('geometry', None) - if geom is not None: - geom = Geometry(geom, crs) - return lonlat_bounds(geom, resolution=resolution) - - bounds = [lonlat_bounds(grid2polygon(grid, crs), resolution=resolution) - for grid in grids.values()] - - return bbox_union(bounds) + all_grids_extent = reduce(lambda x, y: x.union(y), (grid.polygon(crs) for grid in grids)) + return lonlat_bounds(all_grids_extent, resolution=resolution) def eo3_grid_spatial(doc: Dict[str, Any], - resolution: Optional[float] = None) -> Dict[str, Any]: + resolution: Optional[float] = None, + grid_name: str = "default") -> Dict[str, Any]: """Using doc[grids|crs|geometry] compute EO3 style grid spatial: Note that `geo_ref_points` are set to the 4 corners of the default grid only, while lon/lat bounds are computed using all the grids, unless tighter valid region is defined via `geometry` key, in which case it is used to determine lon/lat bounds instead. + Uses the default grid. inputs: + ``` crs: "<:str>" geometry: <:GeoJSON object> # optional @@ -120,25 +118,32 @@ ``` """ - grid = toolz.get_in(['grids', 'default'], doc, None) + gridspecs = doc.get("grids", {}) crs = doc.get('crs', None) - if crs is None or grid is None: - raise ValueError("Input must have crs and grids.default") + if crs is None or not gridspecs: + raise ValueError("Input must have crs and grids.") + grids = {name: EO3Grid(grid_spec) for name, grid_spec in gridspecs.items()} + grid = grids.get(grid_name) + if not grid: + raise ValueError(f"Input must have grids.{grid_name}") geometry = doc.get('geometry') - if geometry is not None: - valid_data = dict(valid_data=geometry) + valid_data: Dict[str, Any] = dict(valid_data=geometry) + valid_geom: Optional[Geometry] = polygon(valid_data["valid_data"]["coordinates"][0], crs=crs) else: - valid_data = {} + valid_data = dict(valid_data=grid.polygon().json) + valid_geom = None oo = dict(grid_spatial=dict(projection={ 'spatial_reference': crs, - 'geo_ref_points': grid2ref_points(grid), + 'geo_ref_points': grid.ref_points(), **valid_data, })) - x1, y1, x2, y2 = eo3_lonlat_bbox(doc, resolution=resolution) + x1, y1, x2, y2 = eo3_lonlat_bbox(grids.values(), crs, + valid_data=valid_geom, + resolution=resolution) oo['extent'] = dict(lon=dict(begin=x1, end=x2), lat=dict(begin=y1, end=y2)) return oo @@ -179,6 +184,25 @@ raise ValueError(f'Unsupported dataset schema: {schema!r}') +def is_doc_geo(doc: Dict[str, Any], check_eo3: bool = True) -> bool: + """ Is this document geospatial? + + :param doc: Parsed ODC Dataset metadata document + :param check_url: Set to false to skip the EO3 check and assume doc isn't EO3. + + :returns: + True if this document specifies geospatial dimensions + False if this document does not specify geospatial dimensions (e.g. telemetry only) + + :raises ValueError: For an unsupported document + """ + # EO3 is geospatial + if check_eo3 and is_doc_eo3(doc): + return True + # Does this cover EO legacy datasets ok? at all?? + return "extent" in doc or "grid_spatial" in doc + + def prep_eo3(doc: Dict[str, Any], auto_skip: bool = False, resolution: Optional[float] = None) -> Dict[str, Any]: @@ -194,6 +218,11 @@ if not is_doc_eo3(doc): return doc + def stringify(u: Optional[Union[str, UUID]]) -> Optional[str]: + return u if isinstance(u, str) else str(u) if u else None + + doc['id'] = stringify(doc.get('id', None)) + doc = add_eo3_parts(doc, resolution=resolution) lineage = doc.pop('lineage', {}) @@ -205,11 +234,11 @@ if isinstance(uuids, dict) or isinstance(uuids[0], dict): raise ValueError("Embedded lineage not supported for eo3 metadata types") if len(uuids) == 1: - return {name: {'id': uuids[0]}} + return {name: {'id': stringify(uuids[0])}} out = {} for idx, uuid in enumerate(uuids, start=1): - out[name+str(idx)] = {'id': uuid} + out[name+str(idx)] = {'id': stringify(uuid)} return out sources = {} diff -Nru datacube-1.8.7/datacube/index/exceptions.py datacube-1.8.9/datacube/index/exceptions.py --- datacube-1.8.7/datacube/index/exceptions.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/exceptions.py 2022-11-17 00:47:28.000000000 +0000 @@ -2,6 +2,8 @@ # # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 + + class DuplicateRecordError(Exception): pass @@ -12,3 +14,9 @@ class IndexSetupError(Exception): pass + + +class TransactionException(Exception): # noqa: N818 + def __init__(self, *args, commit=False, **kwargs): + super().__init__(*args, **kwargs) + self.commit = commit diff -Nru datacube-1.8.7/datacube/index/fields.py datacube-1.8.9/datacube/index/fields.py --- datacube-1.8.7/datacube/index/fields.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/fields.py 2022-11-17 00:47:28.000000000 +0000 @@ -29,6 +29,8 @@ def __init__(self, *exprs): super(OrExpression, self).__init__() self.exprs = exprs + # Or expressions built by dc.load are always made up of simple expressions that share the same field. + self.field = exprs[0].field def evaluate(self, ctx): return any(expr.evaluate(ctx) for expr in self.exprs) diff -Nru datacube-1.8.7/datacube/index/hl.py datacube-1.8.9/datacube/index/hl.py --- datacube-1.8.7/datacube/index/hl.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/hl.py 2022-11-17 00:47:28.000000000 +0000 @@ -7,17 +7,28 @@ """ import json import toolz -from types import SimpleNamespace +from uuid import UUID +from typing import cast, Any, Callable, Optional, Iterable, List, Mapping, Sequence, Tuple, Union, MutableMapping -from datacube.model import Dataset +from datacube.model import Dataset, DatasetType as Product +from datacube.index.abstract import AbstractIndex from datacube.utils import changes, InvalidDocException, SimpleDocNav, jsonify_document from datacube.model.utils import BadMatch, dedup_lineage, remap_lineage_doc, flatten_datasets from datacube.utils.changes import get_doc_changes -from .eo3 import prep_eo3, is_doc_eo3 # type: ignore[attr-defined] +from .eo3 import prep_eo3, is_doc_eo3, is_doc_geo # type: ignore[attr-defined] -def load_rules_from_types(index, product_names=None, excluding=None): - products = [] +class ProductRule: + def __init__(self, product: Product, signature: Mapping[str, Any]): + self.product = product + self.signature = signature + + +def load_rules_from_types(index: AbstractIndex, + product_names: Optional[Iterable[str]] = None, + excluding: Optional[Iterable[str]] = None + ) -> Union[Tuple[List[ProductRule], None], Tuple[None, str]]: + products: List[Product] = [] if product_names: for name in product_names: product = index.products.get_by_name(name) @@ -32,23 +43,23 @@ products = [p for p in products if p.name not in excluding] if len(products) == 0: - return None, 'Found no products in the database' + return None, 'Found no matching products in the database' - return [SimpleNamespace(product=p, signature=p.metadata_doc) for p in products], None + return [ProductRule(p, p.metadata_doc) for p in products], None -def product_matcher(rules): +def product_matcher(rules: Sequence[ProductRule]) -> Callable[[Mapping[str, Any]], Product]: """Given product matching rules return a function mapping a document to a matching product. """ assert len(rules) > 0 - def matches(doc, rule): + def matches(doc: Mapping[str, Any], rule: ProductRule) -> bool: return changes.contains(doc, rule.signature) def single_product_matcher(rule): - def match(doc): + def match(doc: Mapping[str, Any]) -> bool: if matches(doc, rule): return rule.product @@ -64,7 +75,7 @@ if len(rules) == 1: return single_product_matcher(rules[0]) - def match(doc): + def match(doc: Mapping[str, Any]) -> Product: matched = [rule.product for rule in rules if changes.contains(doc, rule.signature)] if len(matched) == 1: @@ -82,7 +93,7 @@ return match -def check_dataset_consistent(dataset): +def check_dataset_consistent(dataset: Dataset) -> Tuple[bool, Optional[str]]: """ :type dataset: datacube.model.Dataset :return: (Is consistent, [error message|None]) @@ -113,7 +124,7 @@ return True, None -def check_consistent(a, b): +def check_consistent(a: Mapping[str, Any], b: Mapping[str, Any]) -> Tuple[bool, Optional[str]]: diffs = get_doc_changes(a, b) if len(diffs) == 0: return True, None @@ -125,14 +136,20 @@ return False, ", ".join([render_diff(offset, a, b) for offset, a, b in diffs]) -def dataset_resolver(index, - product_matching_rules, - fail_on_missing_lineage=False, - verify_lineage=True, - skip_lineage=False): +DatasetOrError = Union[ + Tuple[Dataset, None], + Tuple[None, Union[str, Exception]] +] + + +def dataset_resolver(index: AbstractIndex, + product_matching_rules: Sequence[ProductRule], + fail_on_missing_lineage: bool = False, + verify_lineage: bool = True, + skip_lineage: bool = False) -> Callable[[SimpleDocNav, str], DatasetOrError]: match_product = product_matcher(product_matching_rules) - def resolve_no_lineage(ds, uri): + def resolve_no_lineage(ds: SimpleDocNav, uri: str) -> DatasetOrError: doc = ds.doc_without_lineage_sources try: product = match_product(doc) @@ -141,44 +158,57 @@ return Dataset(product, doc, uris=[uri], sources={}), None - def resolve(main_ds, uri): + def resolve(main_ds_doc: SimpleDocNav, uri: str) -> DatasetOrError: try: - main_ds = SimpleDocNav(dedup_lineage(main_ds)) + main_ds = SimpleDocNav(dedup_lineage(main_ds_doc)) except InvalidDocException as e: return None, e main_uuid = main_ds.id + if not main_uuid: + return None, "No id defined in dataset doc" + ds_by_uuid = toolz.valmap(toolz.first, flatten_datasets(main_ds)) all_uuid = list(ds_by_uuid) - db_dss = {str(ds.id): ds for ds in index.datasets.bulk_get(all_uuid)} + db_dss = {ds.id: ds for ds in index.datasets.bulk_get(all_uuid)} lineage_uuids = set(filter(lambda x: x != main_uuid, all_uuid)) missing_lineage = lineage_uuids - set(db_dss) if missing_lineage and fail_on_missing_lineage: - return None, "Following lineage datasets are missing from DB: %s" % (','.join(missing_lineage)) + return None, "Following lineage datasets are missing from DB: %s" % ( + ','.join(str(m) for m in missing_lineage)) - if verify_lineage and not is_doc_eo3(main_ds.doc): - bad_lineage = [] - - for uuid in lineage_uuids: - if uuid in db_dss: - ok, err = check_consistent(jsonify_document(ds_by_uuid[uuid].doc_without_lineage_sources), - db_dss[uuid].metadata_doc) - if not ok: - bad_lineage.append((uuid, err)) - - if len(bad_lineage) > 0: - error_report = '\n'.join('Inconsistent lineage dataset {}:\n> {}'.format(uuid, err) - for uuid, err in bad_lineage) - return None, error_report + if not is_doc_eo3(main_ds.doc): + if is_doc_geo(main_ds.doc, check_eo3=False): + if not index.supports_legacy: + return None, "Legacy metadata formats not supported by the current index driver." + else: + if not index.supports_nongeo: + return None, "Non-geospatial metadata formats not supported by the current index driver." + if verify_lineage: + bad_lineage = [] + + for uuid in lineage_uuids: + if uuid in db_dss: + ok, err = check_consistent(jsonify_document(ds_by_uuid[uuid].doc_without_lineage_sources), + db_dss[uuid].metadata_doc) + if not ok: + bad_lineage.append((uuid, err)) + + if len(bad_lineage) > 0: + error_report = '\n'.join('Inconsistent lineage dataset {}:\n> {}'.format(uuid, err) + for uuid, err in bad_lineage) + return None, error_report - def with_cache(v, k, cache): + def with_cache(v: Dataset, k: UUID, cache: MutableMapping[UUID, Dataset]) -> Dataset: cache[k] = v return v - def resolve_ds(ds, sources, cache=None): + def resolve_ds(ds: SimpleDocNav, + sources: Optional[Mapping[UUID, Dataset]], + cache: MutableMapping[UUID, Dataset]) -> Dataset: cached = cache.get(ds.id) if cached is not None: return cached @@ -194,12 +224,10 @@ product = match_product(doc) return with_cache(Dataset(product, doc, uris=uris, sources=sources), ds.id, cache) - try: return remap_lineage_doc(main_ds, resolve_ds, cache={}), None except BadMatch as e: return None, e - return resolve_no_lineage if skip_lineage else resolve @@ -238,13 +266,17 @@ :param eo3: 'auto'/True/False by default auto-detect EO3 datasets and pre-process them """ def __init__(self, - index, - products=None, - exclude_products=None, - fail_on_missing_lineage=False, - verify_lineage=True, - skip_lineage=False, - eo3='auto'): + index: AbstractIndex, + products: Optional[Sequence[str]] = None, + exclude_products: Optional[Sequence[str]] = None, + fail_on_missing_lineage: bool = False, + verify_lineage: bool = True, + skip_lineage: bool = False, + eo3: Union[bool, str] = 'auto'): + if not index.supports_legacy and not index.supports_nongeo: + if not eo3: + raise ValueError("EO3 cannot be set to False for a non-legacy geo-only index.") + eo3 = True rules, err_msg = load_rules_from_types(index, product_names=products, excluding=exclude_products) @@ -258,7 +290,7 @@ verify_lineage=verify_lineage, skip_lineage=skip_lineage) - def __call__(self, doc, uri): + def __call__(self, doc_in: Union[SimpleDocNav, Mapping[str, Any]], uri: str) -> DatasetOrError: """Attempt to construct dataset from metadata document and a uri. :param doc: Dictionary or SimpleDocNav object @@ -267,8 +299,10 @@ :return: (dataset, None) is successful, :return: (None, ErrorMessage) on failure """ - if not isinstance(doc, SimpleDocNav): - doc = SimpleDocNav(doc) + if isinstance(doc_in, SimpleDocNav): + doc: SimpleDocNav = doc_in + else: + doc = SimpleDocNav(doc_in) if self._eo3: auto_skip = self._eo3 == 'auto' @@ -276,10 +310,10 @@ dataset, err = self._ds_resolve(doc, uri) if dataset is None: - return None, err + return None, cast(Union[str, Exception], err) is_consistent, reason = check_dataset_consistent(dataset) if not is_consistent: - return None, reason + return None, cast(Union[str, Exception], reason) return dataset, None diff -Nru datacube-1.8.7/datacube/index/memory/_datasets.py datacube-1.8.9/datacube/index/memory/_datasets.py --- datacube-1.8.7/datacube/index/memory/_datasets.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/memory/_datasets.py 2022-11-17 00:47:28.000000000 +0000 @@ -72,8 +72,7 @@ return (self.has(id_) for id_ in ids_) def add(self, dataset: Dataset, - with_lineage: Optional[bool] = None, - **kwargs) -> Dataset: + with_lineage: bool = True) -> Dataset: if with_lineage is None: with_lineage = True _LOG.info('indexing %s', dataset.id) @@ -330,9 +329,9 @@ raise ValueError(f"Unsupported query mode: {mode}") ids: Set[DSID] = set() if mode == "exact": - test: Callable[[str], bool] = lambda l: l == uri + test: Callable[[str], bool] = lambda l: l == uri # noqa: E741 else: - test = lambda l: l.startswith(uri) + test = lambda l: l.startswith(uri) # noqa: E731 for id_, locs in self.locations.items(): for loc in locs: if test(loc): @@ -401,9 +400,10 @@ if not product_queries: raise ValueError(f"No products match source filter: {source_filter}") if len(product_queries) > 1: - raise RuntimeError(f"Multiproduct source_filters are not supported. Try adding 'product' field.") + raise RuntimeError("Multiproduct source_filters are not supported. Try adding 'product' field.") source_queries, source_product = product_queries[0] - source_exprs = tuple(fields.to_expressions(source_product.metadata_type.dataset_fields.get, **source_queries)) + source_exprs = tuple(fields.to_expressions(source_product.metadata_type.dataset_fields.get, + **source_queries)) else: source_product = None source_exprs = () @@ -462,10 +462,10 @@ **query: QueryField ) -> Iterable[Dataset]: return cast(Iterable[Dataset], self._search( - return_format=self.RET_FORMAT_DATASETS, - limit=limit, - source_filter=source_filter, - **query) + return_format=self.RET_FORMAT_DATASETS, + limit=limit, + source_filter=source_filter, + **query) ) def _search_grouped( @@ -475,10 +475,10 @@ **query: QueryField ) -> Iterable[Tuple[Iterable[Dataset], Product]]: return cast(Iterable[Tuple[Iterable[Dataset], Product]], self._search( - return_format=self.RET_FORMAT_PRODUCT_GROUPED, - limit=limit, - source_filter=source_filter, - **query) + return_format=self.RET_FORMAT_PRODUCT_GROUPED, + limit=limit, + source_filter=source_filter, + **query) ) def _get_prod_queries(self, **query: QueryField) -> Iterable[Tuple[Mapping[str, QueryField], Product]]: @@ -503,8 +503,8 @@ for ds in self.search(limit=limit, **query): # type: ignore[arg-type] ds_fields = get_dataset_fields(ds.type.metadata_type.definition) result_vals = { - fn: ds_fields[fn].extract(ds.metadata_doc) # type: ignore[attr-defined] - for fn in field_names + fn: ds_fields[fn].extract(ds.metadata_doc) # type: ignore[attr-defined] + for fn in field_names } yield result_type(**result_vals) @@ -516,8 +516,8 @@ yield (prod, len(list(datasets))) def count_by_product_through_time(self, - period: str, - **query: QueryField + period: str, + **query: QueryField ) -> Iterable[ Tuple[ Product, @@ -543,6 +543,7 @@ if precision <= 0: raise ValueError('Invalid period string. Must specify a natural number of days, weeks, months or years') unit = match.group("unit") + def next_period(prev: datetime.datetime) -> datetime.datetime: if unit == 'day': return prev + datetime.timedelta(days=precision) @@ -598,7 +599,7 @@ ], ] ]: - YieldType = Tuple[Product, Iterable[Tuple[Range, int]]] + YieldType = Tuple[Product, Iterable[Tuple[Range, int]]] # noqa: N806 query = dict(query) try: start, end = cast(Range, query.pop('time')) @@ -665,7 +666,7 @@ min_time = dsmin if max_time is None or dsmax > max_time: max_time = dsmax - return (cast(datetime.datetime, min_time), cast(datetime.datetime,max_time)) + return (cast(datetime.datetime, min_time), cast(datetime.datetime, max_time)) # pylint: disable=redefined-outer-name def search_returning_datasets_light( @@ -679,6 +680,7 @@ custom_fields = build_custom_fields(custom_offsets) else: custom_fields = {} + def make_ds_light(ds: Dataset) -> Tuple: fields = { fname: ds.metadata_type.dataset_fields[fname] @@ -718,3 +720,6 @@ indexed_time=datetime.datetime.now() if for_save and orig.indexed_time is None else orig.indexed_time, archived_time=None if for_save else orig.archived_time ) + + def spatial_extent(self, ids, crs=None): + return None diff -Nru datacube-1.8.7/datacube/index/memory/_fields.py datacube-1.8.9/datacube/index/memory/_fields.py --- datacube-1.8.7/datacube/index/memory/_fields.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/memory/_fields.py 2022-11-17 00:47:28.000000000 +0000 @@ -2,6 +2,7 @@ from datacube.model.fields import SimpleField, Field, get_dataset_fields as generic_get_dataset_fields from datacube.index.abstract import Offset + # TODO: SimpleFields cannot handle non-metadata fields because e.g. the extract API expects a doc, not a Dataset model def get_native_fields() -> MutableMapping[str, Field]: return { @@ -37,11 +38,13 @@ ), } + def get_dataset_fields(metadata_definition: Mapping[str, Any]) -> Mapping[str, Field]: fields = get_native_fields() fields.update(generic_get_dataset_fields(metadata_definition)) return fields + def build_custom_fields(custom_offsets: Mapping[str, Offset]): return { name: SimpleField( diff -Nru datacube-1.8.7/datacube/index/memory/index.py datacube-1.8.9/datacube/index/memory/index.py --- datacube-1.8.7/datacube/index/memory/index.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/memory/index.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,18 +3,24 @@ # Copyright (c) 2015-2022 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import logging +from threading import Lock from datacube.index.memory._datasets import DatasetResource # type: ignore from datacube.index.memory._fields import get_dataset_fields from datacube.index.memory._metadata_types import MetadataTypeResource from datacube.index.memory._products import ProductResource from datacube.index.memory._users import UserResource -from datacube.index.abstract import AbstractIndex, AbstractIndexDriver +from datacube.index.abstract import AbstractIndex, AbstractIndexDriver, UnhandledTransaction from datacube.model import MetadataType +from datacube.utils.geometry import CRS _LOG = logging.getLogger(__name__) +counter = 0 +counter_lock = Lock() + + class Index(AbstractIndex): """ Lightweight in-memory index driver @@ -25,6 +31,10 @@ self._metadata_types = MetadataTypeResource() self._products = ProductResource(self.metadata_types) self._datasets = DatasetResource(self.products) + global counter + with counter_lock: + counter = counter + 1 + self._index_id = f"memory={counter}" @property def users(self) -> UserResource: @@ -46,6 +56,13 @@ def url(self) -> str: return "memory" + @property + def index_id(self) -> str: + return self._index_id + + def transaction(self) -> UnhandledTransaction: + return UnhandledTransaction(self.index_id) + @classmethod def from_config(cls, config, application_name=None, validate_connection=True): return cls() @@ -60,6 +77,10 @@ def close(self): pass + def create_spatial_index(self, crs: CRS) -> bool: + _LOG.warning("memory index driver does not support spatio-temporal indexes") + return False + def __repr__(self): return "Index" diff -Nru datacube-1.8.7/datacube/index/memory/_metadata_types.py datacube-1.8.9/datacube/index/memory/_metadata_types.py --- datacube-1.8.7/datacube/index/memory/_metadata_types.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/memory/_metadata_types.py 2022-11-17 00:47:28.000000000 +0000 @@ -31,6 +31,7 @@ MetadataType.validate(metadata_type.definition) # type: ignore[attr-defined] if metadata_type.name in self.by_name: # Error unless it's the exact same metadata_type + _LOG.warning("Metadata Type exists, checking for differences") check_doc_unchanged(self.by_name[metadata_type.name].definition, jsonify_document(metadata_type.definition), f"Metadata Type {metadata_type.name}") @@ -74,7 +75,7 @@ can_update, safe_changes, unsafe_changes = self.can_update(metadata_type, allow_unsafe_updates) if not safe_changes and not unsafe_changes: - _LOG.info(f"No changes detected for metadata type {metadata_type.name}") + _LOG.warning(f"No changes detected for metadata type {metadata_type.name}") return cast(MetadataType, self.get_by_name(metadata_type.name)) if not can_update: diff -Nru datacube-1.8.7/datacube/index/memory/_products.py datacube-1.8.9/datacube/index/memory/_products.py --- datacube-1.8.7/datacube/index/memory/_products.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/memory/_products.py 2022-11-17 00:47:28.000000000 +0000 @@ -10,7 +10,7 @@ from datacube.model import DatasetType as Product from datacube.utils import changes, jsonify_document, _readable_offset from datacube.utils.changes import AllowPolicy, Change, Offset, check_doc_unchanged, get_doc_changes, classify_changes -from typing import Iterable, Iterator, Mapping, Tuple, Union, cast +from typing import Iterable, Iterator, Mapping, Tuple, cast _LOG = logging.getLogger(__name__) @@ -26,6 +26,7 @@ Product.validate(product.definition) # type: ignore[attr-defined] existing = self.get_by_name(product.name) if existing: + _LOG.warning(f"Product {product.name} is already in the database, checking for differences") check_doc_unchanged( existing.definition, jsonify_document(product.definition), @@ -36,7 +37,7 @@ if mdt is None: _LOG.warning(f'Adding metadata_type "{product.metadata_type.name}" as it doesn\'t exist') product.metadata_type = self.metadata_type_resource.add(product.metadata_type, - allow_table_lock=allow_table_lock) + allow_table_lock=allow_table_lock) clone = self.clone(product) clone.id = self.next_id self.next_id += 1 @@ -90,7 +91,7 @@ can_update, safe_changes, unsafe_changes = self.can_update(product, allow_unsafe_updates) if not safe_changes and not unsafe_changes: - _LOG.info(f"No changes detected for product {product.name}") + _LOG.warning(f"No changes detected for product {product.name}") return cast(Product, self.get_by_name(product.name)) if not can_update: @@ -100,7 +101,9 @@ existing = cast(Product, self.get_by_name(product.name)) if product.metadata_type.name != existing.metadata_type.name: raise ValueError("Unsafe change: cannot (currently) switch metadata types for a product") + _LOG.info(f"Updating product {product.name}") + persisted = self.clone(product) persisted.id = existing.id self.by_id[persisted.id] = persisted diff -Nru datacube-1.8.7/datacube/index/memory/_users.py datacube-1.8.9/datacube/index/memory/_users.py --- datacube-1.8.7/datacube/index/memory/_users.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/memory/_users.py 2022-11-17 00:47:28.000000000 +0000 @@ -5,6 +5,7 @@ from typing import Iterable, Optional, Tuple from datacube.index.abstract import AbstractUserResource + class User: def __init__(self, username: str, password: str, role: str, description: Optional[str] = None): @@ -29,7 +30,7 @@ "agdc_ingest", "agdc_manage", "agdc_admin", - + # For forwards compatibility with future driver(s) "odc_user", "odc_ingest", diff -Nru datacube-1.8.7/datacube/index/null/_datasets.py datacube-1.8.9/datacube/index/null/_datasets.py --- datacube-1.8.7/datacube/index/null/_datasets.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/null/_datasets.py 2022-11-17 00:47:28.000000000 +0000 @@ -2,10 +2,10 @@ # # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 -from typing import Iterable, Union, Optional from datacube.index.abstract import AbstractDatasetResource, DSID from datacube.model import Dataset, DatasetType +from typing import Iterable class DatasetResource(AbstractDatasetResource): @@ -28,8 +28,7 @@ return [False for id_ in ids_] def add(self, dataset: Dataset, - with_lineage: Optional[bool] = None, - **kwargs) -> Dataset: + with_lineage: bool = True) -> Dataset: raise NotImplementedError() def search_product_duplicates(self, product: DatasetType, *args): @@ -113,3 +112,6 @@ # pylint: disable=redefined-outer-name def search_returning_datasets_light(self, field_names: tuple, custom_offsets=None, limit=None, **query): return [] + + def spatial_extent(self, ids, crs=None): + return None diff -Nru datacube-1.8.7/datacube/index/null/index.py datacube-1.8.9/datacube/index/null/index.py --- datacube-1.8.7/datacube/index/null/index.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/null/index.py 2022-11-17 00:47:28.000000000 +0000 @@ -8,9 +8,10 @@ from datacube.index.null._metadata_types import MetadataTypeResource from datacube.index.null._products import ProductResource from datacube.index.null._users import UserResource -from datacube.index.abstract import AbstractIndex, AbstractIndexDriver +from datacube.index.abstract import AbstractIndex, AbstractIndexDriver, UnhandledTransaction from datacube.model import MetadataType from datacube.model.fields import get_dataset_fields +from datacube.utils.geometry import CRS _LOG = logging.getLogger(__name__) @@ -19,6 +20,8 @@ """ (Sub-)Minimal (non-)implementation of the Index API. """ + # Supports everything but persistance + supports_persistance = False def __init__(self) -> None: self._users = UserResource() @@ -46,6 +49,13 @@ def url(self) -> str: return "null" + @property + def index_id(self) -> str: + return "null" + + def transaction(self) -> UnhandledTransaction: + return UnhandledTransaction(self.index_id) + @classmethod def from_config(cls, config, application_name=None, validate_connection=True): return cls() @@ -60,6 +70,10 @@ def close(self): pass + def create_spatial_index(self, crs: CRS) -> bool: + _LOG.warning("null driver does not support spatio-temporal indexes") + return False + def __repr__(self): return "Index" diff -Nru datacube-1.8.7/datacube/index/null/_metadata_types.py datacube-1.8.9/datacube/index/null/_metadata_types.py --- datacube-1.8.7/datacube/index/null/_metadata_types.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/null/_metadata_types.py 2022-11-17 00:47:28.000000000 +0000 @@ -29,7 +29,7 @@ def get_by_name_unsafe(self, name): raise KeyError(name) - def check_field_indexes(self, allow_table_lock=False, rebuild_all=None, + def check_field_indexes(self, allow_table_lock=False, rebuild_views=False, rebuild_indexes=False): raise NotImplementedError diff -Nru datacube-1.8.7/datacube/index/null/_users.py datacube-1.8.9/datacube/index/null/_users.py --- datacube-1.8.7/datacube/index/null/_users.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/null/_users.py 2022-11-17 00:47:28.000000000 +0000 @@ -5,6 +5,7 @@ from typing import Iterable, Optional, Tuple from datacube.index.abstract import AbstractUserResource + class UserResource(AbstractUserResource): def __init__(self) -> None: pass diff -Nru datacube-1.8.7/datacube/index/postgis/_datasets.py datacube-1.8.9/datacube/index/postgis/_datasets.py --- datacube-1.8.7/datacube/index/postgis/_datasets.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgis/_datasets.py 2022-11-17 00:47:28.000000000 +0000 @@ -9,19 +9,21 @@ import logging import warnings from collections import namedtuple -from typing import Iterable, Tuple, Union, List, Optional +from typing import Iterable, Union, List, Optional from uuid import UUID from sqlalchemy import select, func from datacube.drivers.postgis._fields import SimpleDocField, DateDocField -from datacube.drivers.postgis._schema import DATASET -from datacube.index.abstract import AbstractDatasetResource, DSID -from datacube.model import Dataset, DatasetType +from datacube.drivers.postgis._schema import Dataset as SQLDataset +from datacube.index.abstract import AbstractDatasetResource, DatasetSpatialMixin, DSID +from datacube.index.postgis._transaction import IndexResourceAddIn +from datacube.model import Dataset, Product from datacube.model.fields import Field from datacube.model.utils import flatten_datasets -from datacube.utils import jsonify_document, _readable_offset, changes, cached_property +from datacube.utils import jsonify_document, _readable_offset, changes from datacube.utils.changes import get_doc_changes +from datacube.utils.geometry import CRS, Geometry from datacube.index import fields _LOG = logging.getLogger(__name__) @@ -30,43 +32,22 @@ # It's a public api, so we can't reorganise old methods. # pylint: disable=too-many-public-methods, too-many-lines -class DatasetSpatialMixin(object): - __slots__ = () - @property - def _gs(self): - return self.grid_spatial - - @property - def crs(self): - return Dataset.crs.__get__(self) - - @cached_property - def extent(self): - return Dataset.extent.func(self) - - @property - def transform(self): - return Dataset.transform.__get__(self) - - @property - def bounds(self): - return Dataset.bounds.__get__(self) - - -class DatasetResource(AbstractDatasetResource): +class DatasetResource(AbstractDatasetResource, IndexResourceAddIn): """ :type _db: datacube.drivers.postgis._connections.PostgresDb :type types: datacube.index._products.ProductResource """ - def __init__(self, db, dataset_type_resource): + def __init__(self, db, index): """ :type db: datacube.drivers.postgis._connections.PostgresDb - :type dataset_type_resource: datacube.index._products.ProductResource + :type product_resource: datacube.index._products.ProductResource """ self._db = db - self.types = dataset_type_resource + self._index = index + self.types = self._index.products # types is a compatibility alias for products. + self.products = self._index.products def get(self, id_: Union[str, UUID], include_sources=False): """ @@ -79,7 +60,7 @@ if isinstance(id_, str): id_ = UUID(id_) - with self._db.connect() as connection: + with self._db_connection() as connection: if not include_sources: dataset = connection.get_dataset(id_) return self._make(dataset, full_info=True) if dataset else None @@ -108,7 +89,7 @@ ids = [to_uuid(i) for i in ids] - with self._db.connect() as connection: + with self._db_connection() as connection: rows = connection.get_datasets(ids) return [self._make(r, full_info=True) for r in rows] @@ -121,7 +102,7 @@ """ if not isinstance(id_, UUID): id_ = UUID(id_) - with self._db.connect() as connection: + with self._db_connection() as connection: return [ self._make(result, full_info=True) for result in connection.get_derived_datasets(id_) @@ -134,7 +115,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: bool """ - with self._db.connect() as connection: + with self._db_connection() as connection: return connection.contains_dataset(id_) def bulk_has(self, ids_): @@ -147,58 +128,53 @@ :rtype: [bool] """ - with self._db.connect() as connection: + with self._db_connection() as connection: existing = set(connection.datasets_intersection(ids_)) return [x in existing for x in map((lambda x: UUID(x) if isinstance(x, str) else x), ids_)] def add(self, dataset: Dataset, - with_lineage: Optional[bool] = None, - **kwargs) -> Dataset: + with_lineage: bool = True) -> Dataset: """ Add ``dataset`` to the index. No-op if it is already present. :param dataset: dataset to add :param with_lineage: - - ``True|None`` attempt adding lineage datasets if missing + - ``True (default)`` attempt adding lineage datasets if missing - ``False`` record lineage relations, but do not attempt adding lineage datasets to the db - :param kwargs: only used to support deprecated behaviour :rtype: Dataset """ + sp_crses = self._db.spatial_indexes() + def process_bunch(dss, main_ds, transaction): edges = [] - - # First insert all new datasets + dsids_for_spatial_indexing = [] + # 1: Loop over datasets for ds in dss: + # 1a. insert (if not already exists) is_new = transaction.insert_dataset(ds.metadata_doc_without_lineage(), ds.id, ds.type.id) sources = ds.sources + # 1b. Build edge graph for new datasets if is_new and sources is not None: edges.extend((name, ds.id, src.id) for name, src in sources.items()) - - # Second insert lineage graph edges + # 1c. Prepare spatial index extents + if is_new: + dsids_for_spatial_indexing.append(ds.id) + # 2: insert lineage graph edges for ee in edges: transaction.insert_dataset_source(*ee) - + # 3: insert spatial indexes + transaction.update_spindex(dsids=dsids_for_spatial_indexing) # Finally update location for top-level dataset only if main_ds.uris is not None: self._ensure_new_locations(main_ds, transaction=transaction) - if with_lineage is None: - policy = kwargs.pop('sources_policy', None) - if policy is not None: - _LOG.debug('Use of sources_policy is deprecated') - with_lineage = (policy != "skip") - if policy == 'verify': - _LOG.debug('Verify is no longer done inside add') - else: - with_lineage = True - _LOG.info('Indexing %s', dataset.id) if with_lineage: @@ -219,12 +195,12 @@ dss = [dataset] - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: process_bunch(dss, dataset, transaction) return dataset - def search_product_duplicates(self, product: DatasetType, *args): + def search_product_duplicates(self, product: Product, *args): """ Find dataset ids who have duplicates of the given set of field names. @@ -244,7 +220,7 @@ expressions = [product.metadata_type.dataset_fields.get('product') == product.name] - with self._db.connect() as connection: + with self._db_connection() as connection: for record in connection.get_duplicates(group_fields, expressions): dataset_ids = set(record[0]) grouped_fields = tuple(record[1:]) @@ -259,6 +235,8 @@ :rtype: bool,list[change],list[change] """ need_sources = dataset.sources is not None + # TODO: Source retrieval is broken. + need_sources = False existing = self.get(dataset.id, include_sources=need_sources) if not existing: raise ValueError('Unknown dataset %s, cannot update – did you intend to add it?' % dataset.id) @@ -312,7 +290,7 @@ _LOG.info("Updating dataset %s", dataset.id) product = self.types.get_by_name(dataset.type.name) - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: if not transaction.update_dataset(dataset.metadata_doc_without_lineage(), dataset.id, product.id): raise ValueError("Failed to update dataset %s..." % dataset.id) @@ -331,7 +309,7 @@ # front of a stack for uri in new_uris[::-1]: if transaction is None: - with self._db.begin() as tr: + with self._db_connection(transaction=True) as tr: insert_one(uri, tr) else: insert_one(uri, transaction) @@ -342,7 +320,7 @@ :param Iterable[UUID] ids: list of dataset ids to archive """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: for id_ in ids: transaction.archive_dataset(id_) @@ -352,7 +330,7 @@ :param Iterable[UUID] ids: list of dataset ids to restore """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: for id_ in ids: transaction.restore_dataset(id_) @@ -362,7 +340,7 @@ :param ids: iterable of dataset ids to purge """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: for id_ in ids: transaction.delete_dataset(id_) @@ -376,7 +354,7 @@ :param archived: :rtype: list[UUID] """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: return [dsid[0] for dsid in transaction.all_dataset_ids(archived)] def get_field_names(self, product_name=None): @@ -403,7 +381,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: list[str] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return connection.get_locations(id_) def get_archived_locations(self, id_): @@ -413,7 +391,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: list[str] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return [uri for uri, archived_dt in connection.get_archived_locations(id_)] def get_archived_location_times(self, id_): @@ -423,7 +401,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: List[Tuple[str, datetime.datetime]] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return list(connection.get_archived_locations(id_)) def add_location(self, id_, uri): @@ -438,7 +416,7 @@ warnings.warn("Cannot add empty uri. (dataset %s)" % id_) return False - with self._db.connect() as connection: + with self._db_connection() as connection: return connection.insert_dataset_location(id_, uri) def get_datasets_for_location(self, uri, mode=None): @@ -449,7 +427,7 @@ :param str mode: 'exact', 'prefix' or None (to guess) :return: """ - with self._db.connect() as connection: + with self._db_connection() as connection: return (self._make(row) for row in connection.get_datasets_for_location(uri, mode=mode)) def remove_location(self, id_, uri): @@ -460,7 +438,7 @@ :param str uri: fully qualified uri :returns bool: Was one removed? """ - with self._db.connect() as connection: + with self._db_connection() as connection: was_removed = connection.remove_location(id_, uri) return was_removed @@ -472,7 +450,7 @@ :param str uri: fully qualified uri :return bool: location was able to be archived """ - with self._db.connect() as connection: + with self._db_connection() as connection: was_archived = connection.archive_location(id_, uri) return was_archived @@ -484,7 +462,7 @@ :param str uri: fully qualified uri :return bool: location was able to be restored """ - with self._db.connect() as connection: + with self._db_connection() as connection: was_restored = connection.restore_location(id_, uri) return was_restored @@ -499,7 +477,7 @@ else: uris = [] - product = product or self.types.get(dataset_res.dataset_type_ref) + product = product or self.types.get(dataset_res.product_ref) return Dataset( type_=product, @@ -525,7 +503,7 @@ :param dict metadata: :rtype: list[Dataset] """ - with self._db.connect() as connection: + with self._db_connection() as connection: for dataset in self._make_many(connection.search_datasets_by_metadata(metadata)): yield dataset @@ -548,7 +526,7 @@ Perform a search, returning datasets grouped by product type. :param dict[str,str|float|datacube.model.Range] query: - :rtype: __generator[(DatasetType, __generator[Dataset])]] + :rtype: __generator[(Product, __generator[Dataset])]] """ for product, datasets in self._do_search_by_product(query): yield product, self._make_many(datasets, product) @@ -572,7 +550,6 @@ return_fields=True, select_field_names=field_names, limit=limit): - for columns in results: yield result_type(*columns) @@ -596,7 +573,7 @@ :param dict[str,str|float|datacube.model.Range] query: :returns: Sequence of (product, count) - :rtype: __generator[(DatasetType, int)]] + :rtype: __generator[(Product, int)]] """ return self._do_count_by_product(query) @@ -608,7 +585,7 @@ :param dict[str,str|float|datacube.model.Range] query: :param str period: Time range for each slice: '1 month', '1 day' etc. :returns: For each matching product type, a list of time ranges and their count. - :rtype: __generator[(DatasetType, list[(datetime.datetime, datetime.datetime), int)]] + :rtype: __generator[(Product, list[(datetime.datetime, datetime.datetime), int)]] """ return self._do_time_count(period, query) @@ -626,7 +603,7 @@ """ return next(self._do_time_count(period, query, ensure_single=True))[1] - def _get_dataset_types(self, q): + def _get_products(self, q): types = set() if 'product' in q.keys(): types.add(self.types.get_by_name(q['product'])) @@ -640,27 +617,15 @@ def _get_product_queries(self, query): for product, q in self.types.search_robust(**query): - q['dataset_type_id'] = product.id + q['product_id'] = product.id yield q, product # pylint: disable=too-many-locals def _do_search_by_product(self, query, return_fields=False, select_field_names=None, with_source_ids=False, source_filter=None, limit=None): - if source_filter: - product_queries = list(self._get_product_queries(source_filter)) - if not product_queries: - # No products match our source filter, so there will be no search results regardless. - raise ValueError('No products match source filter: ' % source_filter) - if len(product_queries) > 1: - raise RuntimeError("Multi-product source filters are not supported. Try adding 'product' field") - - source_queries, source_product = product_queries[0] - dataset_fields = source_product.metadata_type.dataset_fields - source_exprs = tuple(fields.to_expressions(dataset_fields.get, **source_queries)) - else: - source_exprs = None - + assert not with_source_ids + assert source_filter is None product_queries = list(self._get_product_queries(query)) if not product_queries: product = query.get('product', None) @@ -670,6 +635,12 @@ raise ValueError(f"No such product: {product}") for q, product in product_queries: + _LOG.warning("Querying product %s", product) + # Extract Geospatial search geometry + geom = self._extract_geom_from_query(q) + assert "lat" not in q + assert "lon" not in q + dataset_fields = product.metadata_type.dataset_fields query_exprs = tuple(fields.to_expressions(dataset_fields.get, **q)) select_fields = None @@ -681,14 +652,14 @@ else: select_fields = tuple(dataset_fields[field_name] for field_name in select_field_names) - with self._db.connect() as connection: + with self._db_connection() as connection: yield (product, connection.search_datasets( query_exprs, - source_exprs, select_fields=select_fields, limit=limit, - with_source_ids=with_source_ids + with_source_ids=with_source_ids, + geom=geom )) def _do_count_by_product(self, query): @@ -697,7 +668,7 @@ for q, product in product_queries: dataset_fields = product.metadata_type.dataset_fields query_exprs = tuple(fields.to_expressions(dataset_fields.get, **q)) - with self._db.connect() as connection: + with self._db_connection() as connection: count = connection.count_datasets(query_exprs) if count > 0: yield product, count @@ -722,7 +693,7 @@ for q, product in product_queries: dataset_fields = product.metadata_type.dataset_fields query_exprs = tuple(fields.to_expressions(dataset_fields.get, **q)) - with self._db.connect() as connection: + with self._db_connection() as connection: yield product, list(connection.count_datasets_through_time( start, end, @@ -740,7 +711,9 @@ """ for _, results in self._do_search_by_product(query, return_fields=True): for columns in results: - yield dict(columns) + output = dict(columns) + _LOG.warning("search results: %s (%s)", output["id"], output["product"]) + yield output def get_product_time_bounds(self, product: str): """ @@ -755,24 +728,24 @@ time_min = DateDocField('aquisition_time_min', 'Min of time when dataset was acquired', - DATASET.c.metadata, + SQLDataset.metadata_doc, False, # is it indexed offset=min_offset, selection='least') time_max = DateDocField('aquisition_time_max', 'Max of time when dataset was acquired', - DATASET.c.metadata, + SQLDataset.metadata_doc, False, # is it indexed offset=max_offset, selection='greatest') - with self._db.connect() as connection: + with self._db_connection() as connection: result = connection.execute( select( [func.min(time_min.alchemy_expression), func.max(time_max.alchemy_expression)] ).where( - DATASET.c.dataset_type_ref == product.id + SQLDataset.product_ref == product.id ) ).first() @@ -821,7 +794,7 @@ class DatasetLight(result_type): # type: ignore __slots__ = () - with self._db.connect() as connection: + with self._db_connection() as connection: results = connection.search_unique_datasets( query_exprs, select_fields=select_fields, @@ -859,13 +832,13 @@ grid_spatial = dataset_section.get('grid_spatial') if grid_spatial: select_fields.append(SimpleDocField( - 'grid_spatial', 'grid_spatial', DATASET.c.metadata, + 'grid_spatial', 'grid_spatial', SQLDataset.metadata_doc, False, offset=grid_spatial )) elif custom_offsets and field_name in custom_offsets: select_fields.append(SimpleDocField( - field_name, field_name, DATASET.c.metadata, + field_name, field_name, SQLDataset.metadata_doc, False, offset=custom_offsets[field_name] )) @@ -912,9 +885,13 @@ for key in custom_query: # for now we assume all custom query fields are SimpleDocFields custom_field = SimpleDocField( - custom_query[key], custom_query[key], DATASET.c.metadata, + custom_query[key], custom_query[key], Dataset.metadata, False, offset=custom_offsets[key] ) custom_exprs.append(fields.as_expression(custom_field, custom_query[key])) return custom_exprs + + def spatial_extent(self, ids: Iterable[DSID], crs: CRS = CRS("EPSG:4326")) -> Optional[Geometry]: + with self._db_connection() as connection: + return connection.spatial_extent(ids, crs) diff -Nru datacube-1.8.7/datacube/index/postgis/index.py datacube-1.8.9/datacube/index/postgis/index.py --- datacube-1.8.7/datacube/index/postgis/index.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgis/index.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,14 +3,18 @@ # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import logging +from contextlib import contextmanager +from typing import Iterable, Sequence -from datacube.drivers.postgis import PostGisDb -from datacube.index.postgis._datasets import DatasetResource # type: ignore +from datacube.drivers.postgis import PostGisDb, PostgisDbAPI +from datacube.index.postgis._transaction import PostgisTransaction +from datacube.index.postgis._datasets import DatasetResource, DSID # type: ignore from datacube.index.postgis._metadata_types import MetadataTypeResource from datacube.index.postgis._products import ProductResource from datacube.index.postgis._users import UserResource -from datacube.index.abstract import AbstractIndex, AbstractIndexDriver, default_metadata_type_docs +from datacube.index.abstract import AbstractIndex, AbstractIndexDriver, default_metadata_type_docs, AbstractTransaction from datacube.model import MetadataType +from datacube.utils.geometry import CRS _LOG = logging.getLogger(__name__) @@ -27,8 +31,8 @@ other connections are active. Or else use a separate instance of this class in each process. :ivar datacube.index._datasets.DatasetResource datasets: store and retrieve :class:`datacube.model.Dataset` - :ivar datacube.index._products.ProductResource products: store and retrieve :class:`datacube.model.DatasetType`\ - (should really be called Product) + :ivar datacube.index._products.ProductResource products: store and retrieve :class:`datacube.model.Product`\ + (formerly called DatasetType) :ivar datacube.index._metadata_types.MetadataTypeResource metadata_types: store and retrieve \ :class:`datacube.model.MetadataType` :ivar UserResource users: user management @@ -39,17 +43,26 @@ :type metadata_types: datacube.index._metadata_types.MetadataTypeResource """ + # Postgis driver does not need to support pre-EO3 metadata formats + supports_legacy = False + # Hopefully can reinstate non-geo support, but dropping for now will make progress easier. + supports_nongeo = False + # Hopefully can reinstate a simpler form of lineage support, but leave for now + supports_lineage = False + supports_source_filters = False + supports_transactions = True + def __init__(self, db: PostGisDb) -> None: # POSTGIS driver is not stable with respect to database schema or internal APIs. _LOG.warning("""WARNING: The POSTGIS index driver implementation is considered EXPERIMENTAL. -WARNING: +WARNING: WARNING: Database schema and internal APIs may change significantly between releases. Use at your own risk.""") self._db = db - self._users = UserResource(db) - self._metadata_types = MetadataTypeResource(db) - self._products = ProductResource(db, self.metadata_types) - self._datasets = DatasetResource(db, self.products) + self._users = UserResource(db, self) + self._metadata_types = MetadataTypeResource(db, self) + self._products = ProductResource(db, self) + self._datasets = DatasetResource(db, self) @property def users(self) -> UserResource: @@ -74,14 +87,14 @@ @classmethod def from_config(cls, config, application_name=None, validate_connection=True): db = PostGisDb.from_config(config, application_name=application_name, - validate_connection=validate_connection) + validate_connection=validate_connection) return cls(db) @classmethod def get_dataset_fields(cls, doc): return PostGisDb.get_dataset_fields(doc) - def init_db(self, with_default_types=True, with_permissions=True): + def init_db(self, with_default_types=True, with_permissions=True, with_default_spatial_index=True): is_new = self._db.init(with_permissions=with_permissions) if is_new and with_default_types: @@ -89,6 +102,9 @@ for doc in default_metadata_type_docs(): self.metadata_types.add(self.metadata_types.from_doc(doc), allow_table_lock=True) + if is_new and with_default_spatial_index: + self.create_spatial_index(CRS("EPSG:4326")) + return is_new def close(self): @@ -102,9 +118,72 @@ """ self._db.close() + @property + def index_id(self) -> str: + return self.url + + def transaction(self) -> AbstractTransaction: + return PostgisTransaction(self._db, self.index_id) + + def create_spatial_index(self, crs: CRS) -> bool: + sp_idx = self._db.create_spatial_index(crs) + return sp_idx is not None + + def spatial_indexes(self, refresh=False) -> Iterable[CRS]: + return self._db.spatial_indexes(refresh) + + def update_spatial_index(self, + crses: Sequence[CRS] = [], + product_names: Sequence[str] = [], + dataset_ids: Sequence[DSID] = [] + ) -> int: + with self._active_connection(transaction=True) as conn: + return conn.update_spindex(crses, product_names, dataset_ids) + def __repr__(self): return "Index".format(self._db) + @contextmanager + def _active_connection(self, transaction: bool = False) -> PostgisDbAPI: + """ + Context manager representing a database connection. + + If there is an active transaction for this index in the current thread, the connection object from that + transaction is returned, with the active transaction remaining in control of commit and rollback. + + If there is no active transaction and the transaction argument is True, a new transactionised connection + is returned, with this context manager handling commit and rollback. + + If there is no active transaction and the transaction argument is False (the default), a new connection + is returned with autocommit semantics. + + Note that autocommit behaviour is NOT available if there is an active transaction for the index + and the active thread. + + :param transaction: Use a transaction if one is not already active for the thread. + :return: A PostgresDbAPI object, with the specified transaction semantics. + """ + trans = self.thread_transaction() + closing = False + if trans is not None: + # Use active transaction + yield trans._connection + elif transaction: + closing = True + with self._db._connect() as conn: + conn.begin() + try: + yield conn + conn.commit() + except Exception: # pylint: disable=broad-except + conn.rollback() + raise + else: + closing = True + # Autocommit behaviour: + with self._db._connect() as conn: + yield conn + class DefaultIndexDriver(AbstractIndexDriver): @staticmethod @@ -116,6 +195,7 @@ """ :param definition: """ + # TODO: Validate metadata is ODCv2 compliant - e.g. either non-raster or EO3. MetadataType.validate(definition) # type: ignore return MetadataType(definition, dataset_search_fields=Index.get_dataset_fields(definition)) diff -Nru datacube-1.8.7/datacube/index/postgis/_metadata_types.py datacube-1.8.9/datacube/index/postgis/_metadata_types.py --- datacube-1.8.7/datacube/index/postgis/_metadata_types.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgis/_metadata_types.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,23 +3,25 @@ # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import logging -import warnings from cachetools.func import lru_cache from datacube.index.abstract import AbstractMetadataTypeResource +from datacube.index.postgis._transaction import IndexResourceAddIn from datacube.model import MetadataType from datacube.utils import jsonify_document, changes, _readable_offset from datacube.utils.changes import check_doc_unchanged, get_doc_changes _LOG = logging.getLogger(__name__) -class MetadataTypeResource(AbstractMetadataTypeResource): - def __init__(self, db): + +class MetadataTypeResource(AbstractMetadataTypeResource, IndexResourceAddIn): + def __init__(self, db, index): """ :type db: datacube.drivers.postgis._connections.PostgresDb """ self._db = db + self._index = index self.get_unsafe = lru_cache()(self.get_unsafe) self.get_by_name_unsafe = lru_cache()(self.get_by_name_unsafe) @@ -60,13 +62,14 @@ existing = self.get_by_name(metadata_type.name) if existing: # They've passed us the same one again. Make sure it matches what is stored. + _LOG.warning(f"Metadata Type {metadata_type.name} is already in the database, checking for differences") check_doc_unchanged( existing.definition, jsonify_document(metadata_type.definition), 'Metadata Type {}'.format(metadata_type.name) ) else: - with self._db.connect() as connection: + with self._db_connection() as connection: connection.insert_metadata_type( name=metadata_type.name, definition=metadata_type.definition, @@ -127,7 +130,7 @@ can_update, safe_changes, unsafe_changes = self.can_update(metadata_type, allow_unsafe_updates) if not safe_changes and not unsafe_changes: - _LOG.info("No changes detected for metadata type %s", metadata_type.name) + _LOG.warning("No changes detected for metadata type %s", metadata_type.name) return self.get_by_name(metadata_type.name) if not can_update: @@ -140,7 +143,7 @@ _LOG.info("Updating metadata type %s", metadata_type.name) - with self._db.connect() as connection: + with self._db_connection() as connection: connection.update_metadata_type( name=metadata_type.name, definition=metadata_type.definition, @@ -166,7 +169,7 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_unsafe(self, id_): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: record = connection.get_metadata_type(id_) if record is None: raise KeyError('%s is not a valid MetadataType id') @@ -175,13 +178,13 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_by_name_unsafe(self, name): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: record = connection.get_metadata_type_by_name(name) if not record: raise KeyError('%s is not a valid MetadataType name' % name) return self._make_from_query_row(record) - def check_field_indexes(self, allow_table_lock=False, rebuild_all=None, + def check_field_indexes(self, allow_table_lock=False, rebuild_views=False, rebuild_indexes=False): """ Create or replace per-field indexes and views. @@ -191,14 +194,7 @@ If false, creation will be slightly slower and cannot be done in a transaction. """ - if rebuild_all is not None: - warnings.warn( - "The rebuild_all option of check_field_indexes() is deprecated.", - "Instead, use rebuild_views=True or rebuild_indexes=True as needed.", - DeprecationWarning) - rebuild_views = rebuild_indexes = rebuild_all - - with self._db.connect() as connection: + with self._db_connection() as connection: connection.check_dynamic_fields( concurrently=not allow_table_lock, rebuild_indexes=rebuild_indexes, @@ -211,7 +207,7 @@ :rtype: iter[datacube.model.MetadataType] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return self._make_many(connection.get_all_metadata_types()) def _make_many(self, query_rows): diff -Nru datacube-1.8.7/datacube/index/postgis/_products.py datacube-1.8.9/datacube/index/postgis/_products.py --- datacube-1.8.7/datacube/index/postgis/_products.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgis/_products.py 2022-11-17 00:47:28.000000000 +0000 @@ -8,7 +8,8 @@ from datacube.index import fields from datacube.index.abstract import AbstractProductResource -from datacube.model import DatasetType, MetadataType +from datacube.index.postgis._transaction import IndexResourceAddIn +from datacube.model import Product, MetadataType from datacube.utils import jsonify_document, changes, _readable_offset from datacube.utils.changes import check_doc_unchanged, get_doc_changes @@ -17,19 +18,20 @@ _LOG = logging.getLogger(__name__) -class ProductResource(AbstractProductResource): +class ProductResource(AbstractProductResource, IndexResourceAddIn): """ :type _db: datacube.drivers.postgis._connections.PostgresDb :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource """ - def __init__(self, db, metadata_type_resource): + def __init__(self, db, index): """ :type db: datacube.drivers.postgis._connections.PostgresDb :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource """ self._db = db - self.metadata_type_resource = metadata_type_resource + self._index = index + self.metadata_type_resource = self._index.metadata_types self.get_unsafe = lru_cache()(self.get_unsafe) self.get_by_name_unsafe = lru_cache()(self.get_by_name_unsafe) @@ -55,13 +57,14 @@ This will halt other user's requests until completed. If false, creation will be slightly slower and cannot be done in a transaction. - :param DatasetType product: Product to add - :rtype: DatasetType + :param Product product: Product to add + :rtype: Product """ - DatasetType.validate(product.definition) + Product.validate(product.definition) existing = self.get_by_name(product.name) if existing: + _LOG.warning(f"Product {product.name} is already in the database, checking for differences") check_doc_unchanged( existing.definition, jsonify_document(product.definition), @@ -73,7 +76,7 @@ _LOG.warning('Adding metadata_type "%s" as it doesn\'t exist.', product.metadata_type.name) metadata_type = self.metadata_type_resource.add(product.metadata_type, allow_table_lock=allow_table_lock) - with self._db.connect() as connection: + with self._db_connection() as connection: connection.insert_product( name=product.name, metadata=product.metadata_doc, @@ -91,11 +94,11 @@ (An unsafe change is anything that may potentially make the product incompatible with existing datasets of that type) - :param DatasetType product: Product to update + :param Product product: Product to update :param bool allow_unsafe_updates: Allow unsafe changes. Use with caution. :rtype: bool,list[change],list[change] """ - DatasetType.validate(product.definition) + Product.validate(product.definition) existing = self.get_by_name(product.name) if not existing: @@ -128,27 +131,27 @@ return allow_unsafe_updates or not bad_changes, good_changes, bad_changes - def update(self, product: DatasetType, allow_unsafe_updates=False, allow_table_lock=False): + def update(self, product: Product, allow_unsafe_updates=False, allow_table_lock=False): """ Update a product. Unsafe changes will throw a ValueError by default. (An unsafe change is anything that may potentially make the product incompatible with existing datasets of that type) - :param DatasetType product: Product to update + :param Product product: Product to update :param bool allow_unsafe_updates: Allow unsafe changes. Use with caution. :param allow_table_lock: Allow an exclusive lock to be taken on the table while creating the indexes. This will halt other user's requests until completed. If false, creation will be slower and cannot be done in a transaction. - :rtype: DatasetType + :rtype: Product """ can_update, safe_changes, unsafe_changes = self.can_update(product, allow_unsafe_updates) if not safe_changes and not unsafe_changes: - _LOG.info("No changes detected for product %s", product.name) + _LOG.warning("No changes detected for product %s", product.name) return self.get_by_name(product.name) if not can_update: @@ -161,7 +164,7 @@ _LOG.info("Updating product %s", product.name) - existing = cast(DatasetType, self.get_by_name(product.name)) + existing = cast(Product, self.get_by_name(product.name)) changing_metadata_type = product.metadata_type.name != existing.metadata_type.name if changing_metadata_type: raise ValueError("Unsafe change: cannot (currently) switch metadata types for a product") @@ -182,7 +185,7 @@ metadata_type = self.metadata_type_resource.get_by_name(product.metadata_type.name) # TODO: should we add metadata type here? assert metadata_type, "TODO: should we add metadata type here?" - with self._db.connect() as conn: + with self._db_connection() as conn: conn.update_product( name=product.name, metadata=product.metadata_doc, @@ -208,7 +211,7 @@ This will halt other user's requests until completed. If false, creation will be slower and cannot be done in a transaction. - :rtype: DatasetType + :rtype: Product """ type_ = self.from_doc(definition) return self.update( @@ -220,7 +223,7 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_unsafe(self, id_): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: result = connection.get_product(id_) if not result: raise KeyError('"%s" is not a valid Product id' % id_) @@ -229,7 +232,7 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_by_name_unsafe(self, name): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: result = connection.get_product_by_name(name) if not result: raise KeyError('"%s" is not a valid Product name' % name) @@ -240,7 +243,7 @@ Return dataset types that have all the given fields. :param tuple[str] field_names: - :rtype: __generator[DatasetType] + :rtype: __generator[Product] """ for type_ in self.get_all(): for name in field_names: @@ -254,7 +257,7 @@ Return dataset types that match match-able fields and dict of remaining un-matchable fields. :param dict query: - :rtype: __generator[(DatasetType, dict)] + :rtype: __generator[(Product, dict)] """ def _listify(v): @@ -276,6 +279,9 @@ # Check that all the keys they specified match this product. for key, value in list(remaining_matchable.items()): + if key == "geometry": + # Geometry field is handled elsewhere by index drivers that support spatial indexes. + continue field = type_.metadata_type.dataset_fields.get(key) if not field: # This type doesn't have that field, so it cannot match. @@ -297,18 +303,18 @@ else: yield type_, remaining_matchable - def get_all(self) -> Iterable[DatasetType]: + def get_all(self) -> Iterable[Product]: """ Retrieve all Products """ - with self._db.connect() as connection: + with self._db_connection() as connection: return (self._make(record) for record in connection.get_all_products()) def _make_many(self, query_rows): return (self._make(c) for c in query_rows) - def _make(self, query_row) -> DatasetType: - return DatasetType( + def _make(self, query_row) -> Product: + return Product( definition=query_row['definition'], metadata_type=cast(MetadataType, self.metadata_type_resource.get(query_row['metadata_type_ref'])), id_=query_row['id'], diff -Nru datacube-1.8.7/datacube/index/postgis/_transaction.py datacube-1.8.9/datacube/index/postgis/_transaction.py --- datacube-1.8.7/datacube/index/postgis/_transaction.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgis/_transaction.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,62 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2022 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 + +from contextlib import contextmanager +from sqlalchemy import text +from typing import Any + +from datacube.drivers.postgis import PostGisDb +from datacube.drivers.postgis._api import PostgisDbAPI +from datacube.index.abstract import AbstractTransaction + + +class PostgisTransaction(AbstractTransaction): + def __init__(self, db: PostGisDb, idx_id: str) -> None: + super().__init__(idx_id) + self._db = db + + def _new_connection(self) -> Any: + dbconn = self._db.give_me_a_connection() + dbconn.execute(text('BEGIN')) + conn = PostgisDbAPI(self._db, dbconn) + return conn + + def _commit(self) -> None: + self._connection.commit() + + def _rollback(self) -> None: + self._connection.rollback() + + def _release_connection(self) -> None: + self._connection._connection.close() + self._connection._connection = None + + +class IndexResourceAddIn: + @contextmanager + def _db_connection(self, transaction: bool = False) -> PostgisDbAPI: + """ + Context manager representing a database connection. + + If there is an active transaction for this index in the current thread, the connection object from that + transaction is returned, with the active transaction remaining in control of commit and rollback. + + If there is no active transaction and the transaction argument is True, a new transactionised connection + is returned, with this context manager handling commit and rollback. + + If there is no active transaction and the transaction argument is False (the default), a new connection + is returned with autocommit semantics. + + Note that autocommit behaviour is NOT available if there is an active transaction for the index + and the active thread. + + In Resource Manager code replace self._db.connect() with self.db_connection(), and replace + self._db.begin() with self.db_connection(transaction=True). + + :param transaction: Use a transaction if one is not already active for the thread. + :return: A PostgresDbAPI object, with the specified transaction semantics. + """ + with self._index._active_connection(transaction=transaction) as conn: + yield conn diff -Nru datacube-1.8.7/datacube/index/postgis/_users.py datacube-1.8.9/datacube/index/postgis/_users.py --- datacube-1.8.7/datacube/index/postgis/_users.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgis/_users.py 2022-11-17 00:47:28.000000000 +0000 @@ -4,20 +4,26 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Iterable, Optional, Tuple from datacube.index.abstract import AbstractUserResource +from datacube.index.postgis._transaction import IndexResourceAddIn from datacube.drivers.postgis import PostGisDb -class UserResource(AbstractUserResource): - def __init__(self, db: PostGisDb) -> None: + +class UserResource(AbstractUserResource, IndexResourceAddIn): + def __init__(self, + db: PostGisDb, + index: "datacube.index.postgis.index.Index" # noqa: F821 + ) -> None: """ :type db: datacube.drivers.postgis.PostGisDb """ self._db = db + self._index = index def grant_role(self, role: str, *usernames: str) -> None: """ Grant a role to users """ - with self._db.connect() as connection: + with self._db_connection() as connection: connection.grant_role(role, usernames) def create_user(self, username: str, password: str, @@ -25,14 +31,14 @@ """ Create a new user. """ - with self._db.connect() as connection: + with self._db_connection() as connection: connection.create_user(username, password, role, description=description) def delete_user(self, *usernames: str) -> None: """ Delete a user """ - with self._db.connect() as connection: + with self._db_connection() as connection: connection.drop_users(usernames) def list_users(self) -> Iterable[Tuple[str, str, Optional[str]]]: @@ -40,6 +46,6 @@ :return: list of (role, user, description) :rtype: list[(str, str, str)] """ - with self._db.connect() as connection: + with self._db_connection() as connection: for role, user, description in connection.list_users(): yield role, user, description diff -Nru datacube-1.8.7/datacube/index/postgres/_datasets.py datacube-1.8.9/datacube/index/postgres/_datasets.py --- datacube-1.8.7/datacube/index/postgres/_datasets.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgres/_datasets.py 2022-11-17 00:47:28.000000000 +0000 @@ -1,6 +1,6 @@ # This file is part of the Open Data Cube, see https://opendatacube.org for more information # -# Copyright (c) 2015-2020 ODC Contributors +# Copyright (c) 2015-2022 ODC Contributors # SPDX-License-Identifier: Apache-2.0 """ API for dataset indexing, access and search. @@ -9,7 +9,7 @@ import logging import warnings from collections import namedtuple -from typing import Iterable, Tuple, Union, List, Optional +from typing import Iterable, Union, List from uuid import UUID from sqlalchemy import select, func @@ -17,6 +17,7 @@ from datacube.drivers.postgres._fields import SimpleDocField, DateDocField from datacube.drivers.postgres._schema import DATASET from datacube.index.abstract import AbstractDatasetResource, DatasetSpatialMixin, DSID +from datacube.index.postgres._transaction import IndexResourceAddIn from datacube.model import Dataset, DatasetType from datacube.model.fields import Field from datacube.model.utils import flatten_datasets @@ -29,21 +30,20 @@ # It's a public api, so we can't reorganise old methods. # pylint: disable=too-many-public-methods, too-many-lines - - -class DatasetResource(AbstractDatasetResource): +class DatasetResource(AbstractDatasetResource, IndexResourceAddIn): """ :type _db: datacube.drivers.postgres._connections.PostgresDb :type types: datacube.index._products.ProductResource """ - def __init__(self, db, dataset_type_resource): + def __init__(self, db, index): """ :type db: datacube.drivers.postgres._connections.PostgresDb :type dataset_type_resource: datacube.index._products.ProductResource """ self._db = db - self.types = dataset_type_resource + self._index = index + self.types = self._index.products def get(self, id_: Union[str, UUID], include_sources=False): """ @@ -56,7 +56,7 @@ if isinstance(id_, str): id_ = UUID(id_) - with self._db.connect() as connection: + with self._db_connection() as connection: if not include_sources: dataset = connection.get_dataset(id_) return self._make(dataset, full_info=True) if dataset else None @@ -85,7 +85,7 @@ ids = [to_uuid(i) for i in ids] - with self._db.connect() as connection: + with self._db_connection() as connection: rows = connection.get_datasets(ids) return [self._make(r, full_info=True) for r in rows] @@ -98,7 +98,7 @@ """ if not isinstance(id_, UUID): id_ = UUID(id_) - with self._db.connect() as connection: + with self._db_connection() as connection: return [ self._make(result, full_info=True) for result in connection.get_derived_datasets(id_) @@ -111,7 +111,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: bool """ - with self._db.connect() as connection: + with self._db_connection() as connection: return connection.contains_dataset(id_) def bulk_has(self, ids_): @@ -124,26 +124,24 @@ :rtype: [bool] """ - with self._db.connect() as connection: + with self._db_connection() as connection: existing = set(connection.datasets_intersection(ids_)) return [x in existing for x in map((lambda x: UUID(x) if isinstance(x, str) else x), ids_)] def add(self, dataset: Dataset, - with_lineage: Optional[bool] = None, - **kwargs) -> Dataset: + with_lineage: bool = True) -> Dataset: """ Add ``dataset`` to the index. No-op if it is already present. :param dataset: dataset to add :param with_lineage: - - ``True|None`` attempt adding lineage datasets if missing + - ``True (default)`` attempt adding lineage datasets if missing - ``False`` record lineage relations, but do not attempt adding lineage datasets to the db - :param kwargs: only used to support deprecated behaviour :rtype: Dataset """ @@ -166,16 +164,6 @@ if main_ds.uris is not None: self._ensure_new_locations(main_ds, transaction=transaction) - if with_lineage is None: - policy = kwargs.pop('sources_policy', None) - if policy is not None: - _LOG.debug('Use of sources_policy is deprecated') - with_lineage = (policy != "skip") - if policy == 'verify': - _LOG.debug('Verify is no longer done inside add') - else: - with_lineage = True - _LOG.info('Indexing %s', dataset.id) if with_lineage: @@ -196,7 +184,7 @@ dss = [dataset] - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: process_bunch(dss, dataset, transaction) return dataset @@ -221,7 +209,7 @@ expressions = [product.metadata_type.dataset_fields.get('product') == product.name] - with self._db.connect() as connection: + with self._db_connection() as connection: for record in connection.get_duplicates(group_fields, expressions): dataset_ids = set(record[0]) grouped_fields = tuple(record[1:]) @@ -289,7 +277,7 @@ _LOG.info("Updating dataset %s", dataset.id) product = self.types.get_by_name(dataset.type.name) - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: if not transaction.update_dataset(dataset.metadata_doc_without_lineage(), dataset.id, product.id): raise ValueError("Failed to update dataset %s..." % dataset.id) @@ -308,7 +296,7 @@ # front of a stack for uri in new_uris[::-1]: if transaction is None: - with self._db.begin() as tr: + with self._db_connection(transaction=True) as tr: insert_one(uri, tr) else: insert_one(uri, transaction) @@ -319,7 +307,7 @@ :param Iterable[UUID] ids: list of dataset ids to archive """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: for id_ in ids: transaction.archive_dataset(id_) @@ -329,7 +317,7 @@ :param Iterable[UUID] ids: list of dataset ids to restore """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: for id_ in ids: transaction.restore_dataset(id_) @@ -339,7 +327,7 @@ :param ids: iterable of dataset ids to purge """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: for id_ in ids: transaction.delete_dataset(id_) @@ -353,7 +341,7 @@ :param archived: :rtype: list[UUID] """ - with self._db.begin() as transaction: + with self._db_connection(transaction=True) as transaction: return [dsid[0] for dsid in transaction.all_dataset_ids(archived)] def get_field_names(self, product_name=None): @@ -380,7 +368,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: list[str] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return connection.get_locations(id_) def get_archived_locations(self, id_): @@ -390,7 +378,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: list[str] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return [uri for uri, archived_dt in connection.get_archived_locations(id_)] def get_archived_location_times(self, id_): @@ -400,7 +388,7 @@ :param typing.Union[UUID, str] id_: dataset id :rtype: List[Tuple[str, datetime.datetime]] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return list(connection.get_archived_locations(id_)) def add_location(self, id_, uri): @@ -415,7 +403,7 @@ warnings.warn("Cannot add empty uri. (dataset %s)" % id_) return False - with self._db.connect() as connection: + with self._db_connection() as connection: return connection.insert_dataset_location(id_, uri) def get_datasets_for_location(self, uri, mode=None): @@ -426,7 +414,7 @@ :param str mode: 'exact', 'prefix' or None (to guess) :return: """ - with self._db.connect() as connection: + with self._db_connection() as connection: return (self._make(row) for row in connection.get_datasets_for_location(uri, mode=mode)) def remove_location(self, id_, uri): @@ -437,7 +425,7 @@ :param str uri: fully qualified uri :returns bool: Was one removed? """ - with self._db.connect() as connection: + with self._db_connection() as connection: was_removed = connection.remove_location(id_, uri) return was_removed @@ -449,7 +437,7 @@ :param str uri: fully qualified uri :return bool: location was able to be archived """ - with self._db.connect() as connection: + with self._db_connection() as connection: was_archived = connection.archive_location(id_, uri) return was_archived @@ -461,7 +449,7 @@ :param str uri: fully qualified uri :return bool: location was able to be restored """ - with self._db.connect() as connection: + with self._db_connection() as connection: was_restored = connection.restore_location(id_, uri) return was_restored @@ -502,7 +490,7 @@ :param dict metadata: :rtype: list[Dataset] """ - with self._db.connect() as connection: + with self._db_connection() as connection: for dataset in self._make_many(connection.search_datasets_by_metadata(metadata)): yield dataset @@ -658,7 +646,7 @@ else: select_fields = tuple(dataset_fields[field_name] for field_name in select_field_names) - with self._db.connect() as connection: + with self._db_connection() as connection: yield (product, connection.search_datasets( query_exprs, @@ -674,7 +662,7 @@ for q, product in product_queries: dataset_fields = product.metadata_type.dataset_fields query_exprs = tuple(fields.to_expressions(dataset_fields.get, **q)) - with self._db.connect() as connection: + with self._db_connection() as connection: count = connection.count_datasets(query_exprs) if count > 0: yield product, count @@ -699,7 +687,7 @@ for q, product in product_queries: dataset_fields = product.metadata_type.dataset_fields query_exprs = tuple(fields.to_expressions(dataset_fields.get, **q)) - with self._db.connect() as connection: + with self._db_connection() as connection: yield product, list(connection.count_datasets_through_time( start, end, @@ -744,7 +732,7 @@ offset=max_offset, selection='greatest') - with self._db.connect() as connection: + with self._db_connection() as connection: result = connection.execute( select( [func.min(time_min.alchemy_expression), func.max(time_max.alchemy_expression)] @@ -798,7 +786,7 @@ class DatasetLight(result_type): # type: ignore __slots__ = () - with self._db.connect() as connection: + with self._db_connection() as connection: results = connection.search_unique_datasets( query_exprs, select_fields=select_fields, @@ -895,3 +883,6 @@ custom_exprs.append(fields.as_expression(custom_field, custom_query[key])) return custom_exprs + + def spatial_extent(self, ids, crs=None): + return None diff -Nru datacube-1.8.7/datacube/index/postgres/index.py datacube-1.8.9/datacube/index/postgres/index.py --- datacube-1.8.7/datacube/index/postgres/index.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgres/index.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,14 +3,17 @@ # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import logging +from contextlib import contextmanager -from datacube.drivers.postgres import PostgresDb +from datacube.drivers.postgres import PostgresDb, PostgresDbAPI +from datacube.index.postgres._transaction import PostgresTransaction from datacube.index.postgres._datasets import DatasetResource # type: ignore from datacube.index.postgres._metadata_types import MetadataTypeResource from datacube.index.postgres._products import ProductResource from datacube.index.postgres._users import UserResource -from datacube.index.abstract import AbstractIndex, AbstractIndexDriver, default_metadata_type_docs +from datacube.index.abstract import AbstractIndex, AbstractIndexDriver, default_metadata_type_docs, AbstractTransaction from datacube.model import MetadataType +from datacube.utils.geometry import CRS _LOG = logging.getLogger(__name__) @@ -39,13 +42,15 @@ :type metadata_types: datacube.index._metadata_types.MetadataTypeResource """ + supports_transactions = True + def __init__(self, db: PostgresDb) -> None: self._db = db - self._users = UserResource(db) - self._metadata_types = MetadataTypeResource(db) - self._products = ProductResource(db, self.metadata_types) - self._datasets = DatasetResource(db, self.products) + self._users = UserResource(db, self) + self._metadata_types = MetadataTypeResource(db, self) + self._products = ProductResource(db, self) + self._datasets = DatasetResource(db, self) @property def users(self) -> UserResource: @@ -98,9 +103,60 @@ """ self._db.close() + @property + def index_id(self) -> str: + return f"legacy_{self.url}" + + def transaction(self) -> AbstractTransaction: + return PostgresTransaction(self._db, self.index_id) + + def create_spatial_index(self, crs: CRS) -> None: + _LOG.warning("postgres driver does not support spatio-temporal indexes") + def __repr__(self): return "Index".format(self._db) + @contextmanager + def _active_connection(self, transaction: bool = False) -> PostgresDbAPI: + """ + Context manager representing a database connection. + + If there is an active transaction for this index in the current thread, the connection object from that + transaction is returned, with the active transaction remaining in control of commit and rollback. + + If there is no active transaction and the transaction argument is True, a new transactionised connection + is returned, with this context manager handling commit and rollback. + + If there is no active transaction and the transaction argument is False (the default), a new connection + is returned with autocommit semantics. + + Note that autocommit behaviour is NOT available if there is an active transaction for the index + and the active thread. + + :param transaction: Use a transaction if one is not already active for the thread. + :return: A PostgresDbAPI object, with the specified transaction semantics. + """ + trans = self.thread_transaction() + closing = False + if trans is not None: + # Use active transaction + yield trans._connection + elif transaction: + closing = True + with self._db._connect() as conn: + conn.begin() + try: + yield conn + conn.commit() + except Exception: # pylint: disable=broad-except + conn.rollback() + raise + else: + closing = True + # Autocommit behaviour: + with self._db._connect() as conn: + yield conn + class DefaultIndexDriver(AbstractIndexDriver): aliases = ['postgres'] diff -Nru datacube-1.8.7/datacube/index/postgres/_metadata_types.py datacube-1.8.9/datacube/index/postgres/_metadata_types.py --- datacube-1.8.7/datacube/index/postgres/_metadata_types.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgres/_metadata_types.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,23 +3,25 @@ # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import logging -import warnings from cachetools.func import lru_cache from datacube.index.abstract import AbstractMetadataTypeResource +from datacube.index.postgres._transaction import IndexResourceAddIn from datacube.model import MetadataType from datacube.utils import jsonify_document, changes, _readable_offset from datacube.utils.changes import check_doc_unchanged, get_doc_changes _LOG = logging.getLogger(__name__) -class MetadataTypeResource(AbstractMetadataTypeResource): - def __init__(self, db): + +class MetadataTypeResource(AbstractMetadataTypeResource, IndexResourceAddIn): + def __init__(self, db, index): """ :type db: datacube.drivers.postgres._connections.PostgresDb """ self._db = db + self._index = index self.get_unsafe = lru_cache()(self.get_unsafe) self.get_by_name_unsafe = lru_cache()(self.get_by_name_unsafe) @@ -51,7 +53,8 @@ Allow an exclusive lock to be taken on the table while creating the indexes. This will halt other user's requests until completed. - If false, creation will be slightly slower and cannot be done in a transaction. + If false (and a transaction is not already active), creation will be slightly slower + and cannot be done in a transaction. :rtype: datacube.model.MetadataType """ # This column duplication is getting out of hand: @@ -60,13 +63,14 @@ existing = self.get_by_name(metadata_type.name) if existing: # They've passed us the same one again. Make sure it matches what is stored. + _LOG.warning(f"Metadata Type {metadata_type.name} is already in the database, checking for differences") check_doc_unchanged( existing.definition, jsonify_document(metadata_type.definition), 'Metadata Type {}'.format(metadata_type.name) ) else: - with self._db.connect() as connection: + with self._db_connection(transaction=allow_table_lock) as connection: connection.insert_metadata_type( name=metadata_type.name, definition=metadata_type.definition, @@ -127,7 +131,7 @@ can_update, safe_changes, unsafe_changes = self.can_update(metadata_type, allow_unsafe_updates) if not safe_changes and not unsafe_changes: - _LOG.info("No changes detected for metadata type %s", metadata_type.name) + _LOG.warning("No changes detected for metadata type %s", metadata_type.name) return self.get_by_name(metadata_type.name) if not can_update: @@ -140,7 +144,7 @@ _LOG.info("Updating metadata type %s", metadata_type.name) - with self._db.connect() as connection: + with self._db_connection(transaction=allow_table_lock) as connection: connection.update_metadata_type( name=metadata_type.name, definition=metadata_type.definition, @@ -166,7 +170,7 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_unsafe(self, id_): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: record = connection.get_metadata_type(id_) if record is None: raise KeyError('%s is not a valid MetadataType id') @@ -175,13 +179,13 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_by_name_unsafe(self, name): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: record = connection.get_metadata_type_by_name(name) if not record: raise KeyError('%s is not a valid MetadataType name' % name) return self._make_from_query_row(record) - def check_field_indexes(self, allow_table_lock=False, rebuild_all=None, + def check_field_indexes(self, allow_table_lock=False, rebuild_views=False, rebuild_indexes=False): """ Create or replace per-field indexes and views. @@ -191,14 +195,7 @@ If false, creation will be slightly slower and cannot be done in a transaction. """ - if rebuild_all is not None: - warnings.warn( - "The rebuild_all option of check_field_indexes() is deprecated.", - "Instead, use rebuild_views=True or rebuild_indexes=True as needed.", - DeprecationWarning) - rebuild_views = rebuild_indexes = rebuild_all - - with self._db.connect() as connection: + with self._db_connection(transaction=allow_table_lock) as connection: connection.check_dynamic_fields( concurrently=not allow_table_lock, rebuild_indexes=rebuild_indexes, @@ -211,7 +208,7 @@ :rtype: iter[datacube.model.MetadataType] """ - with self._db.connect() as connection: + with self._db_connection() as connection: return self._make_many(connection.get_all_metadata_types()) def _make_many(self, query_rows): diff -Nru datacube-1.8.7/datacube/index/postgres/_products.py datacube-1.8.9/datacube/index/postgres/_products.py --- datacube-1.8.7/datacube/index/postgres/_products.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgres/_products.py 2022-11-17 00:47:28.000000000 +0000 @@ -8,28 +8,31 @@ from datacube.index import fields from datacube.index.abstract import AbstractProductResource +from datacube.index.postgres._transaction import IndexResourceAddIn from datacube.model import DatasetType, MetadataType from datacube.utils import jsonify_document, changes, _readable_offset from datacube.utils.changes import check_doc_unchanged, get_doc_changes from typing import Iterable, cast + _LOG = logging.getLogger(__name__) -class ProductResource(AbstractProductResource): +class ProductResource(AbstractProductResource, IndexResourceAddIn): """ :type _db: datacube.drivers.postgres._connections.PostgresDb :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource """ - def __init__(self, db, metadata_type_resource): + def __init__(self, db, index): """ :type db: datacube.drivers.postgres._connections.PostgresDb :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource """ self._db = db - self.metadata_type_resource = metadata_type_resource + self._index = index + self.metadata_type_resource = self._index.metadata_types self.get_unsafe = lru_cache()(self.get_unsafe) self.get_by_name_unsafe = lru_cache()(self.get_by_name_unsafe) @@ -54,7 +57,8 @@ Allow an exclusive lock to be taken on the table while creating the indexes. This will halt other user's requests until completed. - If false, creation will be slightly slower and cannot be done in a transaction. + If false (and there is no already active transaction), creation will be slightly slower + and cannot be done in a transaction. :param DatasetType product: Product to add :rtype: DatasetType """ @@ -62,6 +66,7 @@ existing = self.get_by_name(product.name) if existing: + _LOG.warning(f"Product {product.name} is already in the database, checking for differences") check_doc_unchanged( existing.definition, jsonify_document(product.definition), @@ -73,7 +78,7 @@ _LOG.warning('Adding metadata_type "%s" as it doesn\'t exist.', product.metadata_type.name) metadata_type = self.metadata_type_resource.add(product.metadata_type, allow_table_lock=allow_table_lock) - with self._db.connect() as connection: + with self._db_connection(transaction=allow_table_lock) as connection: connection.insert_product( name=product.name, metadata=product.metadata_doc, @@ -148,7 +153,7 @@ can_update, safe_changes, unsafe_changes = self.can_update(product, allow_unsafe_updates) if not safe_changes and not unsafe_changes: - _LOG.info("No changes detected for product %s", product.name) + _LOG.warning("No changes detected for product %s", product.name) return self.get_by_name(product.name) if not can_update: @@ -185,7 +190,7 @@ metadata_type = cast(MetadataType, self.metadata_type_resource.get_by_name(product.metadata_type.name)) # Given we cannot change metadata type because of the check above, and this is an # update method, the metadata type is guaranteed to already exist. - with self._db.connect() as conn: + with self._db_connection(transaction=allow_table_lock) as conn: conn.update_product( name=product.name, metadata=product.metadata_doc, @@ -223,7 +228,7 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_unsafe(self, id_): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: result = connection.get_product(id_) if not result: raise KeyError('"%s" is not a valid Product id' % id_) @@ -232,7 +237,7 @@ # This is memoized in the constructor # pylint: disable=method-hidden def get_by_name_unsafe(self, name): # type: ignore - with self._db.connect() as connection: + with self._db_connection() as connection: result = connection.get_product_by_name(name) if not result: raise KeyError('"%s" is not a valid Product name' % name) @@ -304,7 +309,7 @@ """ Retrieve all Products """ - with self._db.connect() as connection: + with self._db_connection() as connection: return (self._make(record) for record in connection.get_all_products()) def _make_many(self, query_rows): diff -Nru datacube-1.8.7/datacube/index/postgres/_transaction.py datacube-1.8.9/datacube/index/postgres/_transaction.py --- datacube-1.8.7/datacube/index/postgres/_transaction.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgres/_transaction.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,62 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2022 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 + +from contextlib import contextmanager +from sqlalchemy import text +from typing import Any + +from datacube.drivers.postgres import PostgresDb +from datacube.drivers.postgres._api import PostgresDbAPI +from datacube.index.abstract import AbstractTransaction + + +class PostgresTransaction(AbstractTransaction): + def __init__(self, db: PostgresDb, idx_id: str) -> None: + super().__init__(idx_id) + self._db = db + + def _new_connection(self) -> Any: + dbconn = self._db.give_me_a_connection() + dbconn.execute(text('BEGIN')) + conn = PostgresDbAPI(dbconn) + return conn + + def _commit(self) -> None: + self._connection.commit() + + def _rollback(self) -> None: + self._connection.rollback() + + def _release_connection(self) -> None: + self._connection._connection.close() + self._connection._connection = None + + +class IndexResourceAddIn: + @contextmanager + def _db_connection(self, transaction: bool = False) -> PostgresDbAPI: + """ + Context manager representing a database connection. + + If there is an active transaction for this index in the current thread, the connection object from that + transaction is returned, with the active transaction remaining in control of commit and rollback. + + If there is no active transaction and the transaction argument is True, a new transactionised connection + is returned, with this context manager handling commit and rollback. + + If there is no active transaction and the transaction argument is False (the default), a new connection + is returned with autocommit semantics. + + Note that autocommit behaviour is NOT available if there is an active transaction for the index + and the active thread. + + In Resource Manager code replace self._db.connect() with self.db_connection(), and replace + self._db.begin() with self.db_connection(transaction=True). + + :param transaction: Use a transaction if one is not already active for the thread. + :return: A PostgresDbAPI object, with the specified transaction semantics. + """ + with self._index._active_connection(transaction=transaction) as conn: + yield conn diff -Nru datacube-1.8.7/datacube/index/postgres/_users.py datacube-1.8.9/datacube/index/postgres/_users.py --- datacube-1.8.7/datacube/index/postgres/_users.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/index/postgres/_users.py 2022-11-17 00:47:28.000000000 +0000 @@ -1,23 +1,29 @@ # This file is part of the Open Data Cube, see https://opendatacube.org for more information # -# Copyright (c) 2015-2020 ODC Contributors +# Copyright (c) 2015-2022 ODC Contributors # SPDX-License-Identifier: Apache-2.0 from typing import Iterable, Optional, Tuple from datacube.index.abstract import AbstractUserResource +from datacube.index.postgres._transaction import IndexResourceAddIn from datacube.drivers.postgres import PostgresDb -class UserResource(AbstractUserResource): - def __init__(self, db: PostgresDb) -> None: + +class UserResource(AbstractUserResource, IndexResourceAddIn): + def __init__(self, + db: PostgresDb, + index: "datacube.index.postgres.index.Index" # noqa: F821 + ) -> None: """ :type db: datacube.drivers.postgres._connections.PostgresDb """ self._db = db + self._index = index def grant_role(self, role: str, *usernames: str) -> None: """ Grant a role to users """ - with self._db.connect() as connection: + with self._db_connection() as connection: connection.grant_role(role, usernames) def create_user(self, username: str, password: str, @@ -25,14 +31,14 @@ """ Create a new user. """ - with self._db.connect() as connection: + with self._db_connection() as connection: connection.create_user(username, password, role, description=description) def delete_user(self, *usernames: str) -> None: """ Delete a user """ - with self._db.connect() as connection: + with self._db_connection() as connection: connection.drop_users(usernames) def list_users(self) -> Iterable[Tuple[str, str, Optional[str]]]: @@ -40,6 +46,6 @@ :return: list of (role, user, description) :rtype: list[(str, str, str)] """ - with self._db.connect() as connection: + with self._db_connection() as connection: for role, user, description in connection.list_users(): yield role, user, description diff -Nru datacube-1.8.7/datacube/model/__init__.py datacube-1.8.9/datacube/model/__init__.py --- datacube-1.8.7/datacube/model/__init__.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/model/__init__.py 2022-11-17 00:47:28.000000000 +0000 @@ -8,7 +8,6 @@ import logging import math -import warnings from collections import OrderedDict from datetime import datetime from pathlib import Path @@ -20,8 +19,9 @@ from urllib.parse import urlparse from datacube.utils import geometry, without_lineage_sources, parse_time, cached_property, uri_to_local_path, \ schema_validated, DocReader +from datacube.index.eo3 import is_doc_eo3 from .fields import Field, get_dataset_fields -from ._base import Range, ranges_overlap +from ._base import Range, ranges_overlap # noqa: F401 _LOG = logging.getLogger(__name__) @@ -43,14 +43,14 @@ """ def __init__(self, - type_: 'DatasetType', + type_: 'Product', metadata_doc: Dict[str, Any], uris: Optional[List[str]] = None, sources: Optional[Mapping[str, 'Dataset']] = None, indexed_by: Optional[str] = None, indexed_time: Optional[datetime] = None, archived_time: Optional[datetime] = None): - assert isinstance(type_, DatasetType) + assert isinstance(type_, Product) self.type = type_ @@ -74,6 +74,10 @@ self.archived_time = archived_time @property + def is_eo3(self) -> bool: + return is_doc_eo3(self.metadata_doc) + + @property def metadata_type(self) -> 'MetadataType': return self.type.metadata_type @@ -231,28 +235,6 @@ crs = projection.get('spatial_reference', None) if crs: return geometry.CRS(str(crs)) - - # Try to infer CRS - zone_ = projection.get('zone') - datum_ = projection.get('datum') - if zone_ and datum_: - warnings.warn("Using zone/datum to specify CRS is deprecated", - category=DeprecationWarning) - try: - # TODO: really need CRS specified properly in agdc-metadata.yaml - if datum_ == 'GDA94': - return geometry.CRS('EPSG:283' + str(abs(zone_))) - if datum_ == 'WGS84': - if zone_[-1] == 'S': - return geometry.CRS('EPSG:327' + str(abs(int(zone_[:-1])))) - else: - return geometry.CRS('EPSG:326' + str(abs(int(zone_[:-1])))) - except geometry.CRSError: - # We still return None, as they didn't specify a CRS explicitly... - _LOG.warning( - "Can't figure out projection: possibly invalid zone (%r) for datum (%r).", zone_, datum_ - ) - return None @cached_property @@ -392,7 +374,7 @@ @schema_validated(SCHEMA_PATH / 'dataset-type-schema.yaml') -class DatasetType: +class Product: """ Product definition @@ -696,7 +678,7 @@ return row def __str__(self) -> str: - return "DatasetType(name={name!r}, id_={id!r})".format(id=self.id, name=self.name) + return "Product(name={name!r}, id_={id!r})".format(id=self.id, name=self.name) def __repr__(self) -> str: return self.__str__() @@ -716,6 +698,10 @@ return hash(self.name) +# Type alias for backwards compatibility +DatasetType = Product + + @schema_validated(SCHEMA_PATH / 'ingestor-config-type-schema.yaml') class IngestorConfig: """ diff -Nru datacube-1.8.7/datacube/model/schema/ingestor-config-type-schema.yaml datacube-1.8.9/datacube/model/schema/ingestor-config-type-schema.yaml --- datacube-1.8.7/datacube/model/schema/ingestor-config-type-schema.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/model/schema/ingestor-config-type-schema.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -57,8 +57,6 @@ type: string acknowkledgment: type: string - references: - type: string ingestion_bounds: type: object properties: diff -Nru datacube-1.8.7/datacube/model/schema/metadata-type-schema.yaml datacube-1.8.9/datacube/model/schema/metadata-type-schema.yaml --- datacube-1.8.7/datacube/model/schema/metadata-type-schema.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/model/schema/metadata-type-schema.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -53,6 +53,7 @@ - creation_dt - label - sources + - search_fields additionalProperties: false required: - name diff -Nru datacube-1.8.7/datacube/model/utils.py datacube-1.8.9/datacube/model/utils.py --- datacube-1.8.7/datacube/model/utils.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/model/utils.py 2022-11-17 00:47:28.000000000 +0000 @@ -25,7 +25,7 @@ from yaml import SafeDumper # type: ignore -class BadMatch(Exception): +class BadMatch(Exception): # noqa: N818 pass @@ -353,7 +353,7 @@ try: return visit(root) except BadMatch as e: - if root.id not in str(e): + if str(root.id) not in str(e): raise BadMatch(f"Error loading lineage dataset: {e}") from None else: raise diff -Nru datacube-1.8.7/datacube/scripts/cli_app.py datacube-1.8.9/datacube/scripts/cli_app.py --- datacube-1.8.7/datacube/scripts/cli_app.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/scripts/cli_app.py 2022-11-17 00:47:28.000000000 +0000 @@ -10,12 +10,12 @@ from datacube.ui.click import cli -import datacube.scripts.dataset -import datacube.scripts.ingest -import datacube.scripts.product -import datacube.scripts.metadata -import datacube.scripts.system -import datacube.scripts.user +import datacube.scripts.dataset # noqa: F401 +import datacube.scripts.ingest # noqa: F401 +import datacube.scripts.product # noqa: F401 +import datacube.scripts.metadata # noqa: F401 +import datacube.scripts.system # noqa: F401 +import datacube.scripts.user # noqa: F401 if __name__ == '__main__': diff -Nru datacube-1.8.7/datacube/scripts/dataset.py datacube-1.8.9/datacube/scripts/dataset.py --- datacube-1.8.7/datacube/scripts/dataset.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/scripts/dataset.py 2022-11-17 00:47:28.000000000 +0000 @@ -22,7 +22,7 @@ from datacube.index import Index from datacube.model import Dataset from datacube.ui import click as ui -from datacube.ui.click import cli +from datacube.ui.click import cli, print_help_msg from datacube.ui.common import ui_path_doc_stream from datacube.utils import changes, SimpleDocNav from datacube.utils.serialise import SafeDatacubeDumper @@ -143,8 +143,6 @@ 'specified with this option, ' 'you can supply several by repeating this option with a new product name'), multiple=True) -@click.option('--auto-match', '-a', help="Deprecated don't use it, it's a no-op", - is_flag=True, default=False, hidden=True) @click.option('--auto-add-lineage/--no-auto-add-lineage', is_flag=True, default=True, help=('Default behaviour is to automatically add lineage datasets if they are missing from the database, ' 'but this can be disabled if lineage is expected to be present in the DB, ' @@ -164,13 +162,17 @@ @ui.pass_index() def index_cmd(index, product_names, exclude_product_names, - auto_match, auto_add_lineage, verify_lineage, dry_run, ignore_lineage, confirm_ignore_lineage, dataset_paths): + + if not dataset_paths: + print_help_msg(index_cmd) + sys.exit(1) + if confirm_ignore_lineage is False and ignore_lineage is True: if sys.stdin.isatty(): confirmed = click.confirm("Requested to skip lineage information, Are you sure?", default=False) @@ -183,9 +185,6 @@ confirm_ignore_lineage = True - if auto_match is True: - _LOG.warning("--auto-match option is deprecated, update your scripts, behaviour is the same without it") - try: ds_resolve = Doc2Dataset(index, product_names, @@ -241,7 +240,7 @@ default='keep', help=dedent(''' What to do with previously recorded dataset location(s) - + \b - 'keep': keep as alternative location [default] - 'archive': mark as archived @@ -249,6 +248,10 @@ @click.argument('dataset-paths', nargs=-1) @ui.pass_index() def update_cmd(index, keys_that_can_change, dry_run, location_policy, dataset_paths): + if not dataset_paths: + print_help_msg(update_cmd) + sys.exit(1) + def loc_action(action, new_ds, existing_ds, action_name): if len(existing_ds.uris) == 0: return None @@ -412,6 +415,10 @@ f: str, max_depth: int, ids: Iterable[str]) -> None: + if not ids: + print_help_msg(info_cmd) + sys.exit(1) + # Using an array wrapper to get around the lack of "nonlocal" in py2 missing_datasets = [0] @@ -478,6 +485,10 @@ PATHS may be either file paths or URIs """ + if not paths: + print_help_msg(uri_search_cmd) + sys.exit(1) + if search_mode == 'guess': # This is what the API expects. I think it should be changed. search_mode = None @@ -493,16 +504,22 @@ @click.option('--archive-derived', '-d', help='Also recursively archive derived datasets', is_flag=True, default=False) @click.option('--dry-run', help="Don't archive. Display datasets that would get archived", is_flag=True, default=False) -@click.option('--all', "all_ds", help="Ignore id list - archive ALL non-archived datasets (warning: may be slow on large databases)", +@click.option('--all', "all_ds", + help="Ignore id list - archive ALL non-archived datasets (warning: may be slow on large databases)", is_flag=True, default=False) @click.argument('ids', nargs=-1) @ui.pass_index() def archive_cmd(index: Index, archive_derived: bool, dry_run: bool, all_ds: bool, ids: List[str]): + if not ids and not all_ds: + print_help_msg(archive_cmd) + sys.exit(1) + derived_dataset_ids: List[UUID] = [] if all_ds: datasets_for_archive = {dsid: True for dsid in index.datasets.get_all_dataset_ids(archived=False)} else: - datasets_for_archive = {UUID(dataset_id): exists for dataset_id, exists in zip(ids, index.datasets.bulk_has(ids))} + datasets_for_archive = {UUID(dataset_id): exists + for dataset_id, exists in zip(ids, index.datasets.bulk_has(ids))} if False in datasets_for_archive.values(): for dataset_id, exists in datasets_for_archive.items(): @@ -534,11 +551,17 @@ help="Only restore derived datasets that were archived " "this recently to the original dataset", default=10 * 60) -@click.option('--all', "all_ds", help="Ignore id list - restore ALL archived datasets (warning: may be slow on large databases)", +@click.option('--all', "all_ds", + help="Ignore id list - restore ALL archived datasets (warning: may be slow on large databases)", is_flag=True, default=False) @click.argument('ids', nargs=-1) @ui.pass_index() -def restore_cmd(index: Index, restore_derived: bool, derived_tolerance_seconds: int, dry_run: bool, all_ds: bool, ids: List[str]): +def restore_cmd(index: Index, restore_derived: bool, derived_tolerance_seconds: int, + dry_run: bool, all_ds: bool, ids: List[str]): + if not ids and not all_ds: + print_help_msg(restore_cmd) + sys.exit(1) + tolerance = datetime.timedelta(seconds=derived_tolerance_seconds) if all_ds: ids = index.datasets.get_all_dataset_ids(archived=True) # type: ignore[assignment] @@ -576,15 +599,21 @@ @dataset_cmd.command('purge', help="Purge archived datasets") @click.option('--dry-run', help="Don't archive. Display datasets that would get archived", is_flag=True, default=False) -@click.option('--all', "all_ds", help="Ignore id list - purge ALL archived datasets (warning: may be slow on large databases)", +@click.option('--all', "all_ds", + help="Ignore id list - purge ALL archived datasets (warning: may be slow on large databases)", is_flag=True, default=False) @click.argument('ids', nargs=-1) @ui.pass_index() def purge_cmd(index: Index, dry_run: bool, all_ds: bool, ids: List[str]): + if not ids and not all_ds: + print_help_msg(purge_cmd) + sys.exit(1) + if all_ds: datasets_for_archive = {dsid: True for dsid in index.datasets.get_all_dataset_ids(archived=True)} else: - datasets_for_archive = {UUID(dataset_id): exists for dataset_id, exists in zip(ids, index.datasets.bulk_has(ids))} + datasets_for_archive = {UUID(dataset_id): exists + for dataset_id, exists in zip(ids, index.datasets.bulk_has(ids))} # Check for non-existent datasets if False in datasets_for_archive.values(): diff -Nru datacube-1.8.7/datacube/scripts/metadata.py datacube-1.8.9/datacube/scripts/metadata.py --- datacube-1.8.7/datacube/scripts/metadata.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/scripts/metadata.py 2022-11-17 00:47:28.000000000 +0000 @@ -14,7 +14,7 @@ from datacube.index import Index from datacube.ui import click as ui -from datacube.ui.click import cli +from datacube.ui.click import cli, print_help_msg, exit_on_empty_file from datacube.utils import read_documents, InvalidDocException from datacube.utils.serialise import SafeDatacubeDumper @@ -38,6 +38,12 @@ """ Add or update metadata types in the index """ + if not files: + print_help_msg(add_metadata_types) + sys.exit(1) + + exit_on_empty_file(list(read_documents(*files))) + for descriptor_path, parsed_doc in read_documents(*files): try: type_ = index.metadata_types.from_doc(parsed_doc) @@ -68,6 +74,12 @@ (An unsafe change is anything that may potentially make the metadata type incompatible with existing types of the same name) """ + if not files: + print_help_msg(update_metadata_types) + sys.exit(1) + + exit_on_empty_file(list(read_documents(*files))) + for descriptor_path, parsed_doc in read_documents(*files): try: type_ = index.metadata_types.from_doc(parsed_doc) diff -Nru datacube-1.8.7/datacube/scripts/product.py datacube-1.8.9/datacube/scripts/product.py --- datacube-1.8.7/datacube/scripts/product.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/scripts/product.py 2022-11-17 00:47:28.000000000 +0000 @@ -17,7 +17,7 @@ from datacube.index import Index from datacube.ui import click as ui -from datacube.ui.click import cli +from datacube.ui.click import cli, print_help_msg, exit_on_empty_file from datacube.utils import read_documents, InvalidDocException from datacube.utils.serialise import SafeDatacubeDumper @@ -41,14 +41,20 @@ """ Add or update products in the generic index. """ + if not files: + print_help_msg(add_products) + sys.exit(1) + def on_ctrlc(sig, frame): - echo(f'''Can not abort `product add` without leaving database in bad state. + echo('''Can not abort `product add` without leaving database in bad state. This operation requires constructing a bunch of indexes and this takes time, the bigger your database the longer it will take. Just wait a bit.''') signal.signal(signal.SIGINT, on_ctrlc) + exit_on_empty_file(list(read_documents(*files))) + for descriptor_path, parsed_doc in read_documents(*files): try: type_ = index.products.from_doc(parsed_doc) @@ -81,6 +87,12 @@ (An unsafe change is anything that may potentially make the product incompatible with existing datasets of that type) """ + if not files: + print_help_msg(update_products) + sys.exit(1) + + exit_on_empty_file(list(read_documents(*files))) + failures = 0 for descriptor_path, parsed_doc in read_documents(*files): try: @@ -151,15 +163,15 @@ echo('No products discovered :(') return - output_columns=('id', 'name', 'description', 'ancillary_quality', - 'product_type', 'gqa_abs_iterative_mean_xy', - 'gqa_ref_source', 'sat_path', - 'gqa_iterative_stddev_xy', 'time', 'sat_row', - 'orbit', 'gqa', 'instrument', 'gqa_abs_xy', 'crs', - 'resolution', 'tile_size', 'spatial_dimensions') + output_columns = ('id', 'name', 'description', 'ancillary_quality', + 'product_type', 'gqa_abs_iterative_mean_xy', + 'gqa_ref_source', 'sat_path', + 'gqa_iterative_stddev_xy', 'time', 'sat_row', + 'orbit', 'gqa', 'instrument', 'gqa_abs_xy', 'crs', + 'resolution', 'tile_size', 'spatial_dimensions') # If the intersection of desired columns with available columns is empty, just use whatever IS in df - output_columns=tuple(col for col in output_columns if col in df.columns) or df.columns - echo(df.to_string(columns=output_columns,justify='left',index=False)) + output_columns = tuple(col for col in output_columns if col in df.columns) or df.columns + echo(df.to_string(columns=output_columns, justify='left', index=False)) def _default_lister(products): diff -Nru datacube-1.8.7/datacube/scripts/search_tool.py datacube-1.8.9/datacube/scripts/search_tool.py --- datacube-1.8.7/datacube/scripts/search_tool.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/scripts/search_tool.py 2022-11-17 00:47:28.000000000 +0000 @@ -56,6 +56,7 @@ """ Output as a CSV. """ + search_results = list(search_results) writer = csv.DictWriter(out_f, tuple(sorted(field_names))) writer.writeheader() writer.writerows( diff -Nru datacube-1.8.7/datacube/storage/_base.py datacube-1.8.9/datacube/storage/_base.py --- datacube-1.8.7/datacube/storage/_base.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/storage/_base.py 2022-11-17 00:47:28.000000000 +0000 @@ -2,7 +2,7 @@ # # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 -from typing import Optional, Dict, Any, Tuple +from typing import Optional, Dict, Any, Tuple, Callable from urllib.parse import urlparse from datacube.model import Dataset @@ -82,7 +82,8 @@ ds: Dataset, band: str, uri_scheme: Optional[str] = None, - extra_dim_index: Optional[int] = None): + extra_dim_index: Optional[int] = None, + patch_url: Optional[Callable[[str], str]] = None): try: mp, = ds.type.lookup_measurements([band]).values() except KeyError: @@ -97,11 +98,13 @@ raise ValueError('No uris defined on a dataset') base_uri = pick_uri(ds.uris, uri_scheme) + uri = uri_resolve(base_uri, mm.get('path')) + if patch_url is not None: + uri = patch_url(uri) bint, layer = _get_band_and_layer(mm) - + self.uri = uri self.name = band - self.uri = uri_resolve(base_uri, mm.get('path')) self.band = bint self.layer = layer self.dtype = mp.dtype diff -Nru datacube-1.8.7/datacube/storage/masking.py datacube-1.8.9/datacube/storage/masking.py --- datacube-1.8.7/datacube/storage/masking.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/storage/masking.py 2022-11-17 00:47:28.000000000 +0000 @@ -2,9 +2,9 @@ # # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 +from datacube.utils.masking import * # noqa: F401, F403 + import warnings warnings.warn("datacube.storage.masking has moved to datacube.utils.masking", category=DeprecationWarning) - -from datacube.utils.masking import * diff -Nru datacube-1.8.7/datacube/storage/_rio.py datacube-1.8.9/datacube/storage/_rio.py --- datacube-1.8.7/datacube/storage/_rio.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/storage/_rio.py 2022-11-17 00:47:28.000000000 +0000 @@ -6,7 +6,6 @@ Driver implementation for Rasterio based reader. """ import logging -import warnings import contextlib from contextlib import contextmanager from threading import RLock @@ -84,55 +83,6 @@ return self.source.ds.read(indexes=self.source.bidx, window=window, out_shape=out_shape) -class OverrideBandDataSource(GeoRasterReader): - """Wrapper for a rasterio.Band object that overrides nodata, CRS and transform - - This is useful for files with malformed or missing properties. - - - :type source: rasterio.Band - """ - - def __init__(self, - source: rasterio.Band, - nodata, - crs: geometry.CRS, - transform: Affine, - lock: Optional[RLock] = None): - self.source = source - self._nodata = num2numpy(nodata, source.dtype) - self._crs = crs - self._transform = transform - self._lock = lock - - @property - def crs(self) -> geometry.CRS: - return self._crs - - @property - def transform(self) -> Affine: - return self._transform - - @property - def nodata(self): - return self._nodata - - @property - def dtype(self) -> np.dtype: - return np.dtype(self.source.dtype) - - @property - def shape(self) -> RasterShape: - return self.source.shape - - def read(self, window: Optional[RasterWindow] = None, - out_shape: Optional[RasterShape] = None) -> Optional[np.ndarray]: - """Read data in the native format, returning a native array - """ - with maybe_lock(self._lock): - return self.source.ds.read(indexes=self.source.bidx, window=window, out_shape=out_shape) - - class RasterioDataSource(DataSource): """ Abstract class used by fuse_sources and :func:`read_from_source` @@ -188,14 +138,8 @@ lock.release() if override: - warnings.warn(f"""Broken/missing geospatial data was found in file: -"{self.filename}" -Will use approximate metadata for backwards compatibility reasons (#673). -This behaviour is deprecated. Future versions will raise an error.""", - category=DeprecationWarning) - yield OverrideBandDataSource(band, nodata=nodata, crs=crs, transform=transform, lock=lock) - else: - yield BandDataSource(band, nodata=nodata, lock=lock) + raise RuntimeError(f'Broken/missing geospatial data was found in file "{self.filename}"') + yield BandDataSource(band, nodata=nodata, lock=lock) except Exception as e: _LOG.error("Error opening source dataset: %s", self.filename) diff -Nru datacube-1.8.7/datacube/testutils/geom.py datacube-1.8.9/datacube/testutils/geom.py --- datacube-1.8.7/datacube/testutils/geom.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/testutils/geom.py 2022-11-17 00:47:28.000000000 +0000 @@ -41,7 +41,7 @@ ''' -def mkA(rot=0, scale=(1, 1), shear=0, translation=(0, 0)): +def mkA(rot=0, scale=(1, 1), shear=0, translation=(0, 0)): # noqa: N802 return Affine.translation(*translation)*Affine.rotation(rot)*Affine.shear(shear)*Affine.scale(*scale) @@ -85,7 +85,7 @@ return (s, -vmin*s) - A_rot = Affine.rotation(deg) + A_rot = Affine.rotation(deg) # noqa: N806 x, y = apply_affine(A_rot, x, y) sx, tx = norm_v(x) diff -Nru datacube-1.8.7/datacube/testutils/__init__.py datacube-1.8.9/datacube/testutils/__init__.py --- datacube-1.8.7/datacube/testutils/__init__.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/testutils/__init__.py 2022-11-17 00:47:28.000000000 +0000 @@ -27,8 +27,6 @@ from datacube.utils import read_documents, SimpleDocNav from datacube.utils.geometry import GeoBox, CRS -from datacube.model.fields import parse_search_field - _DEFAULT = object() @@ -385,6 +383,7 @@ base_folder, prefix='', timestamp='2018-07-19', + base_folder_of_record=None, **kwargs): """ each band: @@ -397,6 +396,9 @@ from .io import write_gtiff from pathlib import Path + if base_folder_of_record is None: + base_folder_of_record = base_folder + if not isinstance(bands, Sequence): bands = (bands,) @@ -418,7 +420,7 @@ layer=1, dtype=meta.dtype)) - uri = Path(base_folder/'metadata.yaml').absolute().as_uri() + uri = Path(base_folder_of_record/'metadata.yaml').absolute().as_uri() ds = mk_sample_dataset(mm, uri=uri, timestamp=timestamp, diff -Nru datacube-1.8.7/datacube/testutils/io.py datacube-1.8.9/datacube/testutils/io.py --- datacube-1.8.7/datacube/testutils/io.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/testutils/io.py 2022-11-17 00:47:28.000000000 +0000 @@ -12,7 +12,7 @@ from ..storage._read import rdr_geobox from ..utils.geometry import GeoBox from ..utils.geometry import gbox as gbx -from ..index.eo3 import is_doc_eo3, _norm_grid # type: ignore[attr-defined] +from ..index.eo3 import is_doc_eo3, EO3Grid # type: ignore[attr-defined] from types import SimpleNamespace @@ -68,11 +68,11 @@ crs = ds.crs doc_path = ('grids', mm.get('grid', 'default')) - grid = toolz.get_in(doc_path, ds.metadata_doc) - if crs is None or grid is None: + grid_spec = toolz.get_in(doc_path, ds.metadata_doc) + if crs is None or grid_spec is None: raise ValueError('Not a valid EO3 dataset') - grid = _norm_grid(grid) + grid = EO3Grid(grid_spec) h, w = grid.shape return GeoBox(w, h, grid.transform, crs) diff -Nru datacube-1.8.7/datacube/ui/click.py datacube-1.8.9/datacube/ui/click.py --- datacube-1.8.7/datacube/ui/click.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/ui/click.py 2022-11-17 00:47:28.000000000 +0000 @@ -16,7 +16,7 @@ from datacube import config, __version__ from datacube.api.core import Datacube -from datacube.executor import get_executor, mk_celery_executor # type: ignore[attr-defined] +from datacube.executor import get_executor # type: ignore[attr-defined] from datacube.index import index_connect from datacube.ui.expression import parse_expressions @@ -77,7 +77,7 @@ try: msg = self.format(record) click.echo(msg, err=True) - except: # noqa: E772 pylint: disable=bare-except + except: # pylint: disable=bare-except # noqa: E722 self.handleError(record) @@ -270,7 +270,6 @@ 'serial': lambda _: get_executor(None, None), 'multiproc': lambda workers: get_executor(None, int(workers)), 'distributed': lambda addr: get_executor(addr, True), - 'celery': lambda addr: mk_celery_executor(*parse_endpoint(addr)) } EXECUTOR_TYPES['dask'] = EXECUTOR_TYPES['distributed'] # Add alias "dask" for distributed @@ -361,3 +360,14 @@ f = click.argument('expressions', callback=my_parse, nargs=-1)(f) return f + + +def print_help_msg(command): + with click.Context(command) as ctx: + click.echo(command.get_help(ctx)) + + +def exit_on_empty_file(read_files_list): + if len(read_files_list) == 0: + click.echo("All files are empty, exit") + sys.exit(1) diff -Nru datacube-1.8.7/datacube/utils/changes.py datacube-1.8.9/datacube/utils/changes.py --- datacube-1.8.7/datacube/utils/changes.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/changes.py 2022-11-17 00:47:28.000000000 +0000 @@ -5,8 +5,10 @@ """ Validation of document/dictionary changes. """ +import numpy + from itertools import zip_longest -from typing import Any, Callable, List, Mapping, Sequence, Tuple, Union +from typing import cast, Any, Callable, List, Mapping, Sequence, Tuple, Union # Type that can be checked for changes. # (MyPy approximation without recursive references) @@ -14,6 +16,7 @@ # More accurate recursive definition: # Changable = Union[str, int, None, Sequence["Changable"], Mapping[str, "Changable"]] + def contains(v1: Changable, v2: Changable, case_sensitive: bool = False) -> bool: """ Check that v1 is a superset of v2. @@ -59,7 +62,7 @@ def get_doc_changes(original: Changable, new: Changable, base_prefix: Offset = () - ) -> List[Change]: + ) -> List[Change]: """ Return a list of `changed fields` between two dict structures. @@ -89,6 +92,9 @@ elif isinstance(original, list) and isinstance(new, list): for idx, (orig_item, new_item) in enumerate(zip_longest(original, new)): changed_fields.extend(get_doc_changes(orig_item, new_item, base_prefix + (idx, ))) + elif isinstance(original, tuple) or isinstance(new, tuple): + if not numpy.array_equal(cast(Sequence[Any], original), cast(Sequence[Any], new)): + changed_fields.append((base_prefix, original, new)) else: changed_fields.append((base_prefix, original, new)) @@ -120,6 +126,7 @@ AllowPolicy = Callable[[Offset, Offset, ChangedValue, ChangedValue], bool] + def allow_truncation(key: Offset, offset: Offset, old_value: ChangedValue, new_value: ChangedValue) -> bool: return bool(offset) and key == offset[:-1] and new_value == MISSING @@ -146,7 +153,7 @@ def classify_changes(changes: List[Change], allowed_changes: Mapping[Offset, AllowPolicy] - ) -> Tuple[List[Change], List[Change]]: + ) -> Tuple[List[Change], List[Change]]: """ Classify list of changes into good(allowed) and bad(not allowed) based on allowed changes. diff -Nru datacube-1.8.7/datacube/utils/dask.py datacube-1.8.9/datacube/utils/dask.py --- datacube-1.8.7/datacube/utils/dask.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/dask.py 2022-11-17 00:47:28.000000000 +0000 @@ -257,12 +257,11 @@ def _save_blob_to_s3(data: Union[bytes, str], url: str, profile: Optional[str] = None, - creds = None, + creds=None, region_name: Optional[str] = None, with_deps=None, **kw) -> Tuple[str, bool]: from botocore.errorfactory import ClientError - from botocore.credentials import ReadOnlyCredentials from botocore.exceptions import BotoCoreError from .aws import s3_dump, s3_client diff -Nru datacube-1.8.7/datacube/utils/dates.py datacube-1.8.9/datacube/utils/dates.py --- datacube-1.8.7/datacube/utils/dates.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/dates.py 2022-11-17 00:47:28.000000000 +0000 @@ -135,7 +135,7 @@ def _mk_parse_time() -> Callable[[Union[str, datetime]], datetime]: try: - import ciso8601 # pylint: disable=wrong-import-position + import ciso8601 # pylint: disable=wrong-import-position # noqa: F401 return _parse_time_ciso8601 except ImportError: # pragma: no cover return _parse_time_generic # pragma: no cover diff -Nru datacube-1.8.7/datacube/utils/documents.py datacube-1.8.9/datacube/utils/documents.py --- datacube-1.8.7/datacube/utils/documents.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/documents.py 2022-11-17 00:47:28.000000000 +0000 @@ -18,6 +18,7 @@ from urllib.request import urlopen from typing import Dict, Any, Mapping from copy import deepcopy +from uuid import UUID import numpy import toolz # type: ignore[import] @@ -262,7 +263,7 @@ NoDatesSafeLoader.remove_implicit_resolver('tag:yaml.org,2002:timestamp') -class InvalidDocException(Exception): +class InvalidDocException(Exception): # noqa: N818 pass @@ -372,6 +373,7 @@ self._doc_without = None self._sources_path = ('lineage', 'source_datasets') self._sources = None + self._doc_uuid = None @property def doc(self): @@ -386,7 +388,11 @@ @property def id(self): - return self._doc.get('id', None) + if not self._doc_uuid: + doc_id = self._doc.get('id', None) + if doc_id: + self._doc_uuid = doc_id if isinstance(doc_id, UUID) else UUID(doc_id) + return self._doc_uuid @property def sources(self): diff -Nru datacube-1.8.7/datacube/utils/geometry/_base.py datacube-1.8.9/datacube/utils/geometry/_base.py --- datacube-1.8.7/datacube/utils/geometry/_base.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/geometry/_base.py 2022-11-17 00:47:28.000000000 +0000 @@ -116,25 +116,25 @@ (p1[1], p2[1])) -def _make_crs_key(crs_spec: Union[str, _CRS]) -> str: +def _make_crs_key(crs_spec: Union[str, int, _CRS]) -> str: if isinstance(crs_spec, str): normed_epsg = crs_spec.upper() if normed_epsg.startswith("EPSG:"): return normed_epsg return crs_spec + if isinstance(crs_spec, int): + return f"EPSG:{crs_spec}" return crs_spec.to_wkt() @cachetools.cached({}, key=_make_crs_key) # type: ignore[misc] -def _make_crs(crs: Union[str, _CRS]) -> Tuple[_CRS, str, Optional[int]]: - if isinstance(crs, str): - crs = _CRS.from_user_input(crs) - epsg = crs.to_epsg() - if epsg is not None: - crs_str = f"EPSG:{epsg}" - else: - crs_str = crs.to_wkt() - return (crs, crs_str, crs.to_epsg()) +def _make_crs(crs: Union[str, int, _CRS]) -> Tuple[_CRS, str, Optional[int]]: + epsg = False + crs = _CRS.from_user_input(crs) + crs_str = crs.srs + if crs_str.upper().startswith("EPSG:"): + epsg = int(crs_str.split(":", maxsplit=1)[-1]) + return (crs, crs_str, epsg) def _make_crs_transform_key(from_crs, to_crs, always_xy): @@ -172,19 +172,19 @@ self._crs, self._str, self._epsg = _make_crs(_CRS.from_dict(crs_spec)) else: try: - epsg = crs_spec.to_epsg() - except AttributeError: - epsg = None - if epsg is not None: - self._crs, self._str, self._epsg = _make_crs(f"EPSG:{epsg}") - return - try: wkt = crs_spec.to_wkt() except AttributeError: wkt = None if wkt is not None: self._crs, self._str, self._epsg = _make_crs(wkt) return + try: + epsg = crs_spec.to_epsg() + except AttributeError: + epsg = None + if epsg is not None: + self._crs, self._str, self._epsg = _make_crs(epsg) + return raise CRSError( "Expect string or any object with `.to_epsg()` or `.to_wkt()` methods" @@ -213,11 +213,14 @@ """ EPSG Code of the CRS or None """ + if self._epsg is not False: + return self._epsg + self._epsg = self._crs.to_epsg() return self._epsg @property def epsg(self) -> Optional[int]: - return self._epsg + return self.to_epsg() @property def semi_major_axis(self): diff -Nru datacube-1.8.7/datacube/utils/geometry/tools.py datacube-1.8.9/datacube/utils/geometry/tools.py --- datacube-1.8.7/datacube/utils/geometry/tools.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/geometry/tools.py 2022-11-17 00:47:28.000000000 +0000 @@ -380,7 +380,7 @@ return pt_tr -def compute_axis_overlap(Ns: int, Nd: int, s: float, t: float) -> Tuple[slice, slice]: +def compute_axis_overlap(Ns: int, Nd: int, s: float, t: float) -> Tuple[slice, slice]: # noqa: N803 """ s, t define linear transform from destination coordinate space to source >> x_s = s * x_d + t @@ -435,7 +435,7 @@ return (src, dst) -def box_overlap(src_shape, dst_shape, ST, tol): +def box_overlap(src_shape, dst_shape, ST, tol): # noqa: N803 """ Given two image planes whose coordinate systems are related via scale and translation only, find overlapping regions within both. diff -Nru datacube-1.8.7/datacube/utils/_misc.py datacube-1.8.9/datacube/utils/_misc.py --- datacube-1.8.7/datacube/utils/_misc.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/_misc.py 2022-11-17 00:47:28.000000000 +0000 @@ -8,7 +8,7 @@ import os -class DatacubeException(Exception): +class DatacubeException(Exception): # noqa: N818 """Your Data Cube has malfunctioned""" pass diff -Nru datacube-1.8.7/datacube/utils/py.py datacube-1.8.9/datacube/utils/py.py --- datacube-1.8.7/datacube/utils/py.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/utils/py.py 2022-11-17 00:47:28.000000000 +0000 @@ -43,7 +43,7 @@ yield -class cached_property(object): # pylint: disable=invalid-name +class cached_property(object): # pylint: disable=invalid-name # noqa: N801 """ A property that is only computed once per instance and then replaces itself with an ordinary attribute. Deleting the attribute resets the diff -Nru datacube-1.8.7/datacube/_version.py datacube-1.8.9/datacube/_version.py --- datacube-1.8.7/datacube/_version.py 2022-06-07 00:40:30.000000000 +0000 +++ datacube-1.8.9/datacube/_version.py 2022-11-17 00:48:13.000000000 +0000 @@ -1,5 +1,5 @@ # coding: utf-8 # file generated by setuptools_scm # don't change, don't track in version control -version = '1.8.7' -version_tuple = (1, 8, 7) +__version__ = version = '1.8.9' +__version_tuple__ = version_tuple = (1, 8, 9) diff -Nru datacube-1.8.7/datacube/virtual/expr.py datacube-1.8.9/datacube/virtual/expr.py --- datacube-1.8.7/datacube/virtual/expr.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/virtual/expr.py 2022-11-17 00:47:28.000000000 +0000 @@ -67,11 +67,11 @@ @lark.v_args(inline=True) class FormulaEvaluator(lark.Transformer): - from operator import not_, or_, and_, xor # type: ignore[misc] - from operator import eq, ne, le, ge, lt, gt # type: ignore[misc] - from operator import add, sub, mul, truediv, floordiv # type: ignore[misc] - from operator import neg, pos, inv # type: ignore[misc] - from operator import mod, pow, lshift, rshift # type: ignore[misc] + from operator import not_, or_, and_, xor # type: ignore[misc] + from operator import eq, ne, le, ge, lt, gt # type: ignore[misc] + from operator import add, sub, mul, truediv, floordiv # type: ignore[misc] + from operator import neg, pos, inv # type: ignore[misc] + from operator import mod, pow, lshift, rshift # type: ignore[misc] float_literal = float int_literal = int @@ -114,6 +114,7 @@ return TypeEvaluator().transform(parser.parse(formula)) + def evaluate_data(formula, env, parser, evaluator): """ Evaluates a formula given a parser, a corresponding evaluator class, and an environment. diff -Nru datacube-1.8.7/datacube/virtual/impl.py datacube-1.8.9/datacube/virtual/impl.py --- datacube-1.8.7/datacube/virtual/impl.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/virtual/impl.py 2022-11-17 00:47:28.000000000 +0000 @@ -10,7 +10,7 @@ from abc import ABC, abstractmethod from collections.abc import Mapping, Sequence -from functools import reduce + from typing import Any, Dict, List, Optional, cast from typing import Mapping as TypeMapping @@ -22,7 +22,7 @@ import yaml from datacube import Datacube -from datacube.api.core import select_datasets_inside_polygon, output_geobox +from datacube.api.core import output_geobox from datacube.api.grid_workflow import _fast_slice from datacube.api.query import Query, query_group_by from datacube.model import Measurement, DatasetType @@ -38,7 +38,7 @@ from .utils import select_unique, select_keys, reject_keys, merge_search_terms -class VirtualProductException(Exception): +class VirtualProductException(Exception): # noqa: N818 """ Raised if the construction of the virtual product cannot be validated. """ diff -Nru datacube-1.8.7/datacube/virtual/transformations.py datacube-1.8.9/datacube/virtual/transformations.py --- datacube-1.8.7/datacube/virtual/transformations.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/virtual/transformations.py 2022-11-17 00:47:28.000000000 +0000 @@ -439,6 +439,7 @@ def year(time): return time.astype('datetime64[Y]') + def fiscal_year(time): """" This function supports group-by financial years @@ -448,10 +449,10 @@ return df.apply(lambda x: numpy.datetime64(str(x.to_period('Q-JUN').qyear))).values ds = xarray.apply_ufunc(convert_to_quarters, - time, - input_core_dims=[["time"]], - output_core_dims=[["time"]], - vectorize=True) + time, + input_core_dims=[["time"]], + output_core_dims=[["time"]], + vectorize=True) df = time['time'].to_series() years = df.apply(lambda x: numpy.datetime64(str(x.to_period('Q-JUN').qyear))).values diff -Nru datacube-1.8.7/datacube/virtual/utils.py datacube-1.8.9/datacube/virtual/utils.py --- datacube-1.8.7/datacube/virtual/utils.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/datacube/virtual/utils.py 2022-11-17 00:47:28.000000000 +0000 @@ -5,7 +5,6 @@ """ Utilities to facilitate virtual product implementation. """ import warnings -import math def select_unique(things): diff -Nru datacube-1.8.7/datacube.egg-info/PKG-INFO datacube-1.8.9/datacube.egg-info/PKG-INFO --- datacube-1.8.7/datacube.egg-info/PKG-INFO 2022-06-07 00:40:30.000000000 +0000 +++ datacube-1.8.9/datacube.egg-info/PKG-INFO 2022-11-17 00:48:13.000000000 +0000 @@ -1,148 +1,12 @@ Metadata-Version: 2.1 Name: datacube -Version: 1.8.7 +Version: 1.8.9 Summary: An analysis environment for satellite and other earth observation data Home-page: https://github.com/opendatacube/datacube-core Author: Open Data Cube Maintainer: Open Data Cube Maintainer-email: License: Apache License 2.0 -Description: Open Data Cube Core - =================== - - |Build Status| |Coverage Status| |Documentation Status| - - Overview - ======== - - The Open Data Cube Core provides an integrated gridded data - analysis environment for decades of analysis ready earth observation - satellite and related data from multiple satellite and other acquisition - systems. - - Documentation - ============= - - See the `user guide `__ for - installation and usage of the datacube, and for documentation of the API. - - `Join our Slack `__ if you need help - setting up or using the Open Data Cube. - - Please help us to keep the Open Data Cube community open and inclusive by - reading and following our `Code of Conduct `__. - - Requirements - ============ - - System - ~~~~~~ - - - PostgreSQL 10+ - - Python 3.8+ - - Developer setup - =============== - - 1. Clone: - - - ``git clone https://github.com/opendatacube/datacube-core.git`` - - 2. Create a Python environment for using the ODC. We recommend `conda `__ as the - easiest way to handle Python dependencies. - - :: - - conda create -n odc -c conda-forge python=3.8 datacube pre_commit - conda activate odc - - 3. Install a develop version of datacube-core. - - :: - - cd datacube-core - pip install --upgrade -e . - - 4. Install the `pre-commit `__ hooks to help follow ODC coding - conventions when committing with git. - - :: - - pre-commit install - - 5. Run unit tests + PyLint - ``./check-code.sh`` - - (this script approximates what is run by Travis. You can - alternatively run ``pytest`` yourself). Some test dependencies may need to be installed, attempt to install these using: - - ``pip install --upgrade -e '.[test]'`` - - If install for these fails please lodge them as issues. - - 6. **(or)** Run all tests, including integration tests. - - ``./check-code.sh integration_tests`` - - - Assumes a password-less Postgres database running on localhost called - - ``agdcintegration`` - - - Otherwise copy ``integration_tests/agdcintegration.conf`` to - ``~/.datacube_integration.conf`` and edit to customise. - - - Alternatively one can use the ``opendatacube/datacube-tests`` docker image to run - tests. This docker includes database server pre-configured for running - integration tests. Add ``--with-docker`` command line option as a first argument - to ``./check-code.sh`` script. - - :: - - ./check-code.sh --with-docker integration_tests - - - Developer setup on Ubuntu - ~~~~~~~~~~~~~~~~~~~~~~~~~ - - Building a Python virtual environment on Ubuntu suitable for development work. - - Install dependencies: - - :: - - sudo apt-get update - sudo apt-get install -y \ - autoconf automake build-essential make cmake \ - graphviz \ - python3-venv \ - python3-dev \ - libpq-dev \ - libyaml-dev \ - libnetcdf-dev \ - libudunits2-dev - - - Build the python virtual environment: - - :: - - pyenv="${HOME}/.envs/odc" # Change to suit your needs - mkdir -p "${pyenv}" - python3 -m venv "${pyenv}" - source "${pyenv}/bin/activate" - pip install -U pip wheel cython numpy - pip install -e '.[dev]' - pip install flake8 mypy pylint autoflake black - - - .. |Build Status| image:: https://github.com/opendatacube/datacube-core/workflows/build/badge.svg - :target: https://github.com/opendatacube/datacube-core/actions - .. |Coverage Status| image:: https://codecov.io/gh/opendatacube/datacube-core/branch/develop/graph/badge.svg - :target: https://codecov.io/gh/opendatacube/datacube-core - .. |Documentation Status| image:: https://readthedocs.org/projects/datacube-core/badge/?version=latest - :target: http://datacube-core.readthedocs.org/en/latest/ - Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Intended Audience :: Developers @@ -158,12 +22,12 @@ Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 Classifier: Topic :: Scientific/Engineering :: GIS Classifier: Topic :: Scientific/Engineering :: Information Analysis Requires-Python: >=3.8.0 Description-Content-Type: text/x-rst Provides-Extra: all -Provides-Extra: celery Provides-Extra: cf Provides-Extra: dev Provides-Extra: distributed @@ -171,3 +35,151 @@ Provides-Extra: performance Provides-Extra: s3 Provides-Extra: test +License-File: LICENSE + +Open Data Cube Core +=================== + +.. image:: https://github.com/opendatacube/datacube-core/workflows/build/badge.svg + :alt: Build Status + :target: https://github.com/opendatacube/datacube-core/actions + +.. image:: https://codecov.io/gh/opendatacube/datacube-core/branch/develop/graph/badge.svg + :alt: Coverage Status + :target: https://codecov.io/gh/opendatacube/datacube-core + +.. image:: https://readthedocs.org/projects/datacube-core/badge/?version=latest + :alt: Documentation Status + :target: http://datacube-core.readthedocs.org/en/latest/ + +Overview +======== + +The Open Data Cube Core provides an integrated gridded data +analysis environment for decades of analysis ready earth observation +satellite and related data from multiple satellite and other acquisition +systems. + +Documentation +============= + +See the `user guide `__ for +installation and usage of the datacube, and for documentation of the API. + +`Join our Slack `__ if you need help +setting up or using the Open Data Cube. + +Please help us to keep the Open Data Cube community open and inclusive by +reading and following our `Code of Conduct `__. + +Requirements +============ + +System +~~~~~~ + +- PostgreSQL 10+ +- Python 3.8+ + +Developer setup +=============== + +1. Clone: + + - ``git clone https://github.com/opendatacube/datacube-core.git`` + +2. Create a Python environment for using the ODC. We recommend `conda `__ as the + easiest way to handle Python dependencies. + +:: + + conda create -n odc -c conda-forge python=3.8 datacube pre_commit + conda activate odc + +3. Install a develop version of datacube-core. + +:: + + cd datacube-core + pip install --upgrade -e . + +4. Install the `pre-commit `__ hooks to help follow ODC coding + conventions when committing with git. + +:: + + pre-commit install + +5. Run unit tests + PyLint + ``./check-code.sh`` + + (this script approximates what is run by Travis. You can + alternatively run ``pytest`` yourself). Some test dependencies may need to be installed, attempt to install these using: + + ``pip install --upgrade -e '.[test]'`` + + If install for these fails please lodge them as issues. + +6. **(or)** Run all tests, including integration tests. + + ``./check-code.sh integration_tests`` + + - Assumes a password-less Postgres database running on localhost called + + ``agdcintegration`` + + - Otherwise copy ``integration_tests/agdcintegration.conf`` to + ``~/.datacube_integration.conf`` and edit to customise. + + +Alternatively one can use the ``opendatacube/datacube-tests`` docker image to run +tests. This docker includes database server pre-configured for running +integration tests. Add ``--with-docker`` command line option as a first argument +to ``./check-code.sh`` script. + +:: + + ./check-code.sh --with-docker integration_tests + + +To run individual test in docker container + +:: + + docker run -ti -v /home/ubuntu/datacube-core:/code opendatacube/datacube-tests:latest pytest integration_tests/test_filename.py::test_function_name + + +Developer setup on Ubuntu +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Building a Python virtual environment on Ubuntu suitable for development work. + +Install dependencies: + +:: + + sudo apt-get update + sudo apt-get install -y \ + autoconf automake build-essential make cmake \ + graphviz \ + python3-venv \ + python3-dev \ + libpq-dev \ + libyaml-dev \ + libnetcdf-dev \ + libudunits2-dev + + +Build the python virtual environment: + +:: + + pyenv="${HOME}/.envs/odc" # Change to suit your needs + mkdir -p "${pyenv}" + python3 -m venv "${pyenv}" + source "${pyenv}/bin/activate" + pip install -U pip wheel cython numpy + pip install -e '.[dev]' + pip install flake8 mypy pylint autoflake black + + diff -Nru datacube-1.8.7/datacube.egg-info/requires.txt datacube-1.8.9/datacube.egg-info/requires.txt --- datacube-1.8.7/datacube.egg-info/requires.txt 2022-06-07 00:40:30.000000000 +0000 +++ datacube-1.8.9/datacube.egg-info/requires.txt 2022-11-17 00:48:13.000000000 +0000 @@ -1,3 +1,4 @@ +GeoAlchemy2 affine cachetools click>=5.0 @@ -5,7 +6,7 @@ dask[array] distributed jsonschema -lark-parser>=0.6.7 +lark netcdf4 numpy pandas @@ -13,24 +14,22 @@ pyproj>=2.5 python-dateutil pyyaml -rasterio>=1.0.2 +rasterio>=1.3.2 shapely>=1.6.4 sqlalchemy toolz -xarray>=0.9 +xarray!=2022.6.0,>=0.9 [all] Sphinx boto3 botocore bottleneck -celery<5,>=4 ciso8601 compliance-checker>=4.0.0 dask[distributed] distributed hypothesis -kombu moto pycodestyle pylint @@ -39,18 +38,12 @@ pytest-httpserver pytest-timeout recommonmark -redis setuptools setuptools_scm[toml] sphinx-click sphinx_autodoc_typehints sphinx_rtd_theme -[celery] -celery<5,>=4 -kombu -redis - [cf] compliance-checker>=4.0.0 diff -Nru datacube-1.8.7/datacube.egg-info/SOURCES.txt datacube-1.8.9/datacube.egg-info/SOURCES.txt --- datacube-1.8.7/datacube.egg-info/SOURCES.txt 2022-06-07 00:40:30.000000000 +0000 +++ datacube-1.8.9/datacube.egg-info/SOURCES.txt 2022-11-17 00:48:13.000000000 +0000 @@ -1,5 +1,6 @@ .coveragerc .dockerignore +.doctor-rst.yaml .editorconfig .gitattributes .gitignore @@ -22,13 +23,15 @@ readthedocs.yml setup.cfg setup.py +spellcheck.yaml +wordlist.txt datacube/__init__.py datacube/__main__.py -datacube/_celery_runner.py datacube/_version.py datacube/config.py datacube/executor.py datacube/helpers.py +datacube/py.typed datacube.egg-info/PKG-INFO datacube.egg-info/SOURCES.txt datacube.egg-info/dependency_links.txt @@ -59,6 +62,7 @@ datacube/drivers/postgis/_dynamic.py datacube/drivers/postgis/_fields.py datacube/drivers/postgis/_schema.py +datacube/drivers/postgis/_spatial.py datacube/drivers/postgis/sql.py datacube/drivers/postgis/samples/range-tests-explicit.sql datacube/drivers/postgis/samples/range-tests-scalar.sql @@ -103,12 +107,14 @@ datacube/index/postgis/_datasets.py datacube/index/postgis/_metadata_types.py datacube/index/postgis/_products.py +datacube/index/postgis/_transaction.py datacube/index/postgis/_users.py datacube/index/postgis/index.py datacube/index/postgres/__init__.py datacube/index/postgres/_datasets.py datacube/index/postgres/_metadata_types.py datacube/index/postgres/_products.py +datacube/index/postgres/_transaction.py datacube/index/postgres/_users.py datacube/index/postgres/index.py datacube/model/__init__.py @@ -189,7 +195,7 @@ integration_tests/example-ls5-nbar_606.yaml integration_tests/extensive-eo-metadata.yaml integration_tests/test_3d.py -integration_tests/test_celery_runner.py +integration_tests/test_cli_output.py integration_tests/test_config_tool.py integration_tests/test_dataset_add.py integration_tests/test_double_ingestion.py @@ -204,23 +210,33 @@ integration_tests/data/dataset_add/datasets.yml integration_tests/data/dataset_add/datasets_bad1.yml integration_tests/data/dataset_add/datasets_eo3.yml +integration_tests/data/dataset_add/datasets_no_id.yml +integration_tests/data/dataset_add/empty_file.yml integration_tests/data/dataset_add/metadata.yml integration_tests/data/dataset_add/products.yml +integration_tests/data/eo3/ard_ls8.odc-product.yaml +integration_tests/data/eo3/eo3_landsat_ard.odc-type.yaml +integration_tests/data/eo3/ga_ls_wo_3.odc-product.yaml +integration_tests/data/eo3/ls8_dataset.yaml +integration_tests/data/eo3/ls8_dataset2.yaml +integration_tests/data/eo3/ls8_dataset3.yaml +integration_tests/data/eo3/ls8_dataset4.yaml +integration_tests/data/eo3/s2_africa_dataset.yaml +integration_tests/data/eo3/s2_africa_product.yaml +integration_tests/data/eo3/wo_dataset.yaml +integration_tests/data/eo3/wo_ds_with_lineage.odc-metadata.yaml integration_tests/data/ingester/invalid_config.yaml integration_tests/data/ingester/invalid_src_name.yaml -integration_tests/data/memory/ard_ls8.odc-product.yaml -integration_tests/data/memory/eo3_landsat_ard.odc-type.yaml -integration_tests/data/memory/ga_ls_wo_3.odc-product.yaml -integration_tests/data/memory/ls8_dataset.yaml -integration_tests/data/memory/wo_dataset.yaml -integration_tests/data/memory/wo_ds_with_lineage.odc-metadata.yaml integration_tests/index/__init__.py +integration_tests/index/search_utils.py integration_tests/index/test_config_docs.py integration_tests/index/test_index_data.py integration_tests/index/test_memory_index.py integration_tests/index/test_null_index.py integration_tests/index/test_pluggable_indexes.py -integration_tests/index/test_search.py +integration_tests/index/test_postgis_index.py +integration_tests/index/test_search_eo3.py +integration_tests/index/test_search_legacy.py integration_tests/index/test_update_columns.py tests/__init__.py tests/conftest.py @@ -253,6 +269,9 @@ tests/api/test_masking.py tests/api/test_query.py tests/api/test_virtual.py +tests/data/ds_eo.yaml +tests/data/ds_eo3.yml +tests/data/ds_non-geo.yaml tests/data/eo3.yaml tests/data/ingest_config.yaml tests/data/multi_doc.nc @@ -309,6 +328,7 @@ tests/index/__init__.py tests/index/test_api_index_dataset.py tests/index/test_fields.py +tests/index/test_hl_index.py tests/index/test_query.py tests/index/test_validate_dataset_type.py tests/scripts/__init__.py diff -Nru datacube-1.8.7/debian/changelog datacube-1.8.9/debian/changelog --- datacube-1.8.7/debian/changelog 2022-07-31 10:00:00.000000000 +0000 +++ datacube-1.8.9/debian/changelog 2023-03-18 12:00:00.000000000 +0000 @@ -1,3 +1,15 @@ +datacube (1.8.9-0~jammy1) jammy; urgency=medium + + * Fix lark dependency. + + -- Angelos Tzotsos Sat, 18 Mar 2023 14:00:00 +0200 + +datacube (1.8.9-0~jammy0) jammy; urgency=medium + + * New upstream version. + + -- Angelos Tzotsos Sat, 18 Mar 2023 14:00:00 +0200 + datacube (1.8.7-0~jammy1) jammy; urgency=medium * Fix dependency to lark-parser. diff -Nru datacube-1.8.7/debian/control datacube-1.8.9/debian/control --- datacube-1.8.7/debian/control 2022-07-31 10:00:00.000000000 +0000 +++ datacube-1.8.9/debian/control 2023-03-18 12:00:00.000000000 +0000 @@ -25,7 +25,7 @@ python3-netcdf4, python3-numpy, python3-psycopg2, - python3-lark-parser | python3-lark, + python3-lark, python3-dateutil, python3-yaml, python3-rasterio, diff -Nru datacube-1.8.7/.dockerignore datacube-1.8.9/.dockerignore --- datacube-1.8.7/.dockerignore 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/.dockerignore 2022-11-17 00:47:28.000000000 +0000 @@ -79,4 +79,4 @@ #Local Visual Studio Code configurations .vscode/ -.env \ No newline at end of file +.env diff -Nru datacube-1.8.7/.doctor-rst.yaml datacube-1.8.9/.doctor-rst.yaml --- datacube-1.8.7/.doctor-rst.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/.doctor-rst.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,105 @@ +rules: + avoid_repetetive_words: ~ + blank_line_after_anchor: ~ + blank_line_after_directive: ~ + blank_line_before_directive: ~ + composer_dev_option_not_at_the_end: ~ + correct_code_block_directive_based_on_the_content: ~ + deprecated_directive_should_have_version: ~ + ensure_exactly_one_space_between_link_definition_and_link: ~ + ensure_link_definition_contains_valid_url: ~ + ensure_order_of_code_blocks_in_configuration_block: ~ + extend_abstract_controller: ~ + # extension_xlf_instead_of_xliff: ~ + indention: ~ + lowercase_as_in_use_statements: ~ + max_blank_lines: + max: 2 + max_colons: ~ + no_app_console: ~ + no_blank_line_after_filepath_in_php_code_block: ~ + no_blank_line_after_filepath_in_twig_code_block: ~ + no_blank_line_after_filepath_in_xml_code_block: ~ + no_blank_line_after_filepath_in_yaml_code_block: ~ + no_brackets_in_method_directive: ~ + no_composer_req: ~ + no_directive_after_shorthand: ~ + no_explicit_use_of_code_block_php: ~ + no_inheritdoc: ~ + no_namespace_after_use_statements: ~ + no_php_open_tag_in_code_block_php_directive: ~ + no_space_before_self_xml_closing_tag: ~ + only_backslashes_in_namespace_in_php_code_block: ~ + only_backslashes_in_use_statements_in_php_code_block: ~ + ordered_use_statements: ~ + php_prefix_before_bin_console: ~ + replace_code_block_types: ~ + short_array_syntax: ~ + space_between_label_and_link_in_doc: ~ + space_between_label_and_link_in_ref: ~ + string_replacement: ~ + typo: ~ + unused_links: ~ + use_deprecated_directive_instead_of_versionadded: ~ + use_https_xsd_urls: ~ + valid_inline_highlighted_namespaces: ~ + valid_use_statements: ~ + versionadded_directive_should_have_version: ~ + yaml_instead_of_yml_suffix: ~ + yarn_dev_option_at_the_end: ~ +# no_app_bundle: ~ + + # master + versionadded_directive_major_version: + major_version: 6 + + versionadded_directive_min_version: + min_version: '6.0' + + deprecated_directive_major_version: + major_version: 6 + + deprecated_directive_min_version: + min_version: '6.0' + +# do not report as violation +whitelist: + regex: + - '/FOSUserBundle(.*)\.yml/' + - '/``.yml``/' + - '/(.*)\.orm\.yml/' # currently DoctrineBundle only supports .yml + - '/rst-class/' + - /docker-compose\.yml/ + lines: + - 'in config files, so the old ``app/config/config_dev.yml`` goes to' + - '#. The most important config file is ``app/config/services.yml``, which now is' + - 'code in production without a proxy, it becomes trivially easy to abuse your' + - '.. _`EasyDeployBundle`: https://github.com/EasyCorp/easy-deploy-bundle' + - 'The bin/console Command' + - '# username is your full Gmail or Google Apps email address' + - '.. _`LDAP injection`: http://projects.webappsec.org/w/page/13246947/LDAP%20Injection' + - '.. versionadded:: 1.9.0' # Encore + - '.. versionadded:: 0.28.4' # Encore + - '.. versionadded:: 2.4.0' # SwiftMailer + - '.. versionadded:: 1.30' # Twig + - '.. versionadded:: 1.35' # Twig + - '.. versionadded:: 1.11' # Messenger (Middleware / DoctrineBundle) + - '.. versionadded:: 1.18' # Flex in setup/upgrade_minor.rst + - '.. versionadded:: 1.0.0' # Encore + - '0 => 123' # assertion for var_dumper - components/var_dumper.rst + - '1 => "foo"' # assertion for var_dumper - components/var_dumper.rst + - '123,' # assertion for var_dumper - components/var_dumper.rst + - '"foo",' # assertion for var_dumper - components/var_dumper.rst + - '$var .= "Because of this `\xE9` octet (\\xE9),\n";' + - "`Deploying Symfony 4 Apps on Heroku`_." + - ".. _`Deploying Symfony 4 Apps on Heroku`: https://devcenter.heroku.com/articles/deploying-symfony4" + - "// 224, 165, 141, 224, 164, 164, 224, 165, 135])" + - '.. versionadded:: 0.2' # MercureBundle + - 'provides a ``loginUser()`` method to simulate logging in in your functional' + - '.. code-block:: twig' + - '.. versionadded:: 3.6' # MonologBundle + - '// bin/console' + - 'End to End Tests (E2E)' + - '.. code-block:: php' + - '.. _`a feature to test applications using Mercure`: https://github.com/symfony/panther#creating-isolated-browsers-to-test-apps-using-mercure-or-websocket' + - '.. End to End Tests (E2E)' diff -Nru datacube-1.8.7/integration_tests/conftest.py datacube-1.8.9/integration_tests/conftest.py --- datacube-1.8.7/integration_tests/conftest.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/conftest.py 2022-11-17 00:47:28.000000000 +0000 @@ -56,28 +56,185 @@ ) settings.load_profile('opendatacube') -MEMORY_DRIVER_TESTDIR = INTEGRATION_TESTS_DIR / 'data' / 'memory' +EO3_TESTDIR = INTEGRATION_TESTS_DIR / 'data' / 'eo3' -def get_memory_test_data_doc(path): +def get_eo3_test_data_doc(path): from datacube.utils import read_documents - for path, doc in read_documents(MEMORY_DRIVER_TESTDIR / path): + for path, doc in read_documents(EO3_TESTDIR / path): return doc @pytest.fixture +def dataset_with_lineage_doc(): + return ( + get_eo3_test_data_doc("wo_ds_with_lineage.odc-metadata.yaml"), + 's3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/090/086/2016/05/12/' + 'ga_ls_wo_3_090086_2016-05-12_final.stac-item.json' + ) + + +@pytest.fixture +def eo3_ls8_dataset_doc(): + return ( + get_eo3_test_data_doc("ls8_dataset.yaml"), + 's3://dea-public-data/baseline/ga_ls8c_ard_3/090/086/2016/05/12/' + 'ga_ls8c_ard_3-0-0_090086_2016-05-12_final.stac-item.json' + ) + + +@pytest.fixture +def eo3_ls8_dataset2_doc(): + return ( + get_eo3_test_data_doc("ls8_dataset2.yaml"), + 's3://dea-public-data/baseline/ga_ls8c_ard_3/090/086/2016/05/28/' + 'ga_ls8c_ard_3-0-0_090086_2016-05-28_final.stac-item.json' + ) + + +@pytest.fixture +def eo3_ls8_dataset3_doc(): + return ( + get_eo3_test_data_doc("ls8_dataset3.yaml"), + 's3://dea-public-data/baseline/ga_ls8c_ard_3/101/077/2013/04/04/' + 'ga_ls8c_ard_3-0-0_101077_2013-04-04_final.stac-item.json' + ) + + +@pytest.fixture +def eo3_ls8_dataset4_doc(): + return ( + get_eo3_test_data_doc("ls8_dataset4.yaml"), + 's3://dea-public-data/baseline/ga_ls8c_ard_3/101/077/2013/07/21/' + 'ga_ls8c_ard_3-0-0_101077_2013-07-21_final.stac-item.json' + ) + + +@pytest.fixture +def eo3_wo_dataset_doc(): + return ( + get_eo3_test_data_doc("wo_dataset.yaml"), + 's3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/090/086/2016/05/12/' + 'ga_ls_wo_3_090086_2016-05-12_final.stac-item.json' + ) + + +@pytest.fixture +def eo3_africa_dataset_doc(): + return ( + get_eo3_test_data_doc("s2_africa_dataset.yaml"), + 's3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/37/M/CQ/' + '2022/8/S2A_37MCQ_20220808_0_L2A/S2A_37MCQ_20220808_0_L2A.json' + ) + + +@pytest.fixture +def datasets_with_unembedded_lineage_doc(): + return [ + ( + get_eo3_test_data_doc("ls8_dataset.yaml"), + 's3://dea-public-data/baseline/ga_ls8c_ard_3/090/086/2016/05/12/' + 'ga_ls8c_ard_3-0-0_090086_2016-05-12_final.stac-item.json' + ), + ( + get_eo3_test_data_doc("wo_dataset.yaml"), + 's3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/090/086/2016/05/12/' + 'ga_ls_wo_3_090086_2016-05-12_final.stac-item.json' + ), + ] + + +@pytest.fixture def extended_eo3_metadata_type_doc(): - return get_memory_test_data_doc("eo3_landsat_ard.odc-type.yaml") + return get_eo3_test_data_doc("eo3_landsat_ard.odc-type.yaml") @pytest.fixture def extended_eo3_product_doc(): - return get_memory_test_data_doc("ard_ls8.odc-product.yaml") + return get_eo3_test_data_doc("ard_ls8.odc-product.yaml") @pytest.fixture def base_eo3_product_doc(): - return get_memory_test_data_doc("ga_ls_wo_3.odc-product.yaml") + return get_eo3_test_data_doc("ga_ls_wo_3.odc-product.yaml") + + +@pytest.fixture +def africa_s2_product_doc(): + return get_eo3_test_data_doc("s2_africa_product.yaml") + + +def doc_to_ds(index, product_name, ds_doc, ds_path): + from datacube.index.hl import Doc2Dataset + resolver = Doc2Dataset(index, products=[product_name], verify_lineage=False) + ds, err = resolver(ds_doc, ds_path) + assert err is None and ds is not None + index.datasets.add(ds) + return index.datasets.get(ds.id) + + +@pytest.fixture +def extended_eo3_metadata_type(index, extended_eo3_metadata_type_doc): + return index.metadata_types.add( + index.metadata_types.from_doc(extended_eo3_metadata_type_doc) + ) + + +@pytest.fixture +def ls8_eo3_product(index, extended_eo3_metadata_type, extended_eo3_product_doc): + return index.products.add_document(extended_eo3_product_doc) + + +@pytest.fixture +def wo_eo3_product(index, base_eo3_product_doc): + return index.products.add_document(base_eo3_product_doc) + + +@pytest.fixture +def africa_s2_eo3_product(index, africa_s2_product_doc): + return index.products.add_document(africa_s2_product_doc) + + +@pytest.fixture +def ls8_eo3_dataset(index, extended_eo3_metadata_type, ls8_eo3_product, eo3_ls8_dataset_doc): + return doc_to_ds(index, + ls8_eo3_product.name, + *eo3_ls8_dataset_doc) + + +@pytest.fixture +def ls8_eo3_dataset2(index, extended_eo3_metadata_type_doc, ls8_eo3_product, eo3_ls8_dataset2_doc): + return doc_to_ds(index, + ls8_eo3_product.name, + *eo3_ls8_dataset2_doc) + + +@pytest.fixture +def ls8_eo3_dataset3(index, extended_eo3_metadata_type_doc, ls8_eo3_product, eo3_ls8_dataset3_doc): + return doc_to_ds(index, + ls8_eo3_product.name, + *eo3_ls8_dataset3_doc) + + +@pytest.fixture +def ls8_eo3_dataset4(index, extended_eo3_metadata_type_doc, ls8_eo3_product, eo3_ls8_dataset4_doc): + return doc_to_ds(index, + ls8_eo3_product.name, + *eo3_ls8_dataset4_doc) + + +@pytest.fixture +def wo_eo3_dataset(index, wo_eo3_product, eo3_wo_dataset_doc, ls8_eo3_dataset): + return doc_to_ds(index, + wo_eo3_product.name, + *eo3_wo_dataset_doc) + + +@pytest.fixture +def africa_eo3_dataset(index, africa_s2_eo3_product, eo3_africa_dataset_doc): + return doc_to_ds(index, + africa_s2_eo3_product.name, + *eo3_africa_dataset_doc) @pytest.fixture @@ -113,31 +270,6 @@ @pytest.fixture -def dataset_with_lineage_doc(): - return ( - get_memory_test_data_doc("wo_ds_with_lineage.odc-metadata.yaml"), - 's3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/090/086/2016/05/12/' - 'ga_ls_wo_3_090086_2016-05-12_final.stac-item.json' - ) - - -@pytest.fixture -def datasets_with_unembedded_lineage_doc(): - return [ - ( - get_memory_test_data_doc("ls8_dataset.yaml"), - 's3://dea-public-data/baseline/ga_ls8c_ard_3/090/086/2016/05/12/' - 'ga_ls8c_ard_3-0-0_090086_2016-05-12_final.stac-item.json' - ), - ( - get_memory_test_data_doc("wo_dataset.yaml"), - 's3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/090/086/2016/05/12/' - 'ga_ls_wo_3_090086_2016-05-12_final.stac-item.json' - ), - ] - - -@pytest.fixture def global_integration_cli_args(): """ The first arguments to pass to a cli command for integration test configuration. @@ -244,15 +376,6 @@ del index -@pytest.fixture -def initialised_postgres_db(index): - """ - Return a connection to an PostgreSQL database, initialised with the default schema - and tables. - """ - return index._db - - def remove_postgres_dynamic_indexes(): """ Clear any dynamically created postgresql indexes from the schema. @@ -267,8 +390,9 @@ Clear any dynamically created postgis indexes from the schema. """ # Our normal indexes start with "ix_", dynamic indexes with "dix_" - for table in pgis_core.METADATA.tables.values(): - table.indexes.intersection_update([i for i in table.indexes if not i.name.startswith('dix_')]) + # for table in pgis_core.METADATA.tables.values(): + # table.indexes.intersection_update([i for i in table.indexes if not i.name.startswith('dix_')]) + # Dynamic indexes disabled. @pytest.fixture @@ -433,7 +557,7 @@ @pytest.fixture def ga_metadata_type_doc(): - _FULL_EO_METADATA = Path(__file__).parent.joinpath('extensive-eo-metadata.yaml') + _FULL_EO_METADATA = Path(__file__).parent.joinpath('extensive-eo-metadata.yaml') # noqa: N806 [(path, eo_md_type)] = datacube.utils.read_documents(_FULL_EO_METADATA) return eo_md_type @@ -537,5 +661,7 @@ return SimpleNamespace(metadata=str(B / 'metadata.yml'), products=str(B / 'products.yml'), datasets_bad1=str(B / 'datasets_bad1.yml'), + datasets_no_id=str(B / 'datasets_no_id.yml'), datasets_eo3=str(B / 'datasets_eo3.yml'), - datasets=str(B / 'datasets.yml')) + datasets=str(B / 'datasets.yml'), + empty_file=str(B / 'empty_file.yml')) diff -Nru datacube-1.8.7/integration_tests/data/dataset_add/datasets_no_id.yml datacube-1.8.9/integration_tests/data/dataset_add/datasets_no_id.yml --- datacube-1.8.7/integration_tests/data/dataset_add/datasets_no_id.yml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/dataset_add/datasets_no_id.yml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,13 @@ +# Generated with `yaml.safe_dump_all(gen_dataset_test_dag(ii, force_tree=True) for ii in range(1,3))` +# A -> B +# | | +# | v +# +--> C -> D +# | +# +--> E +# +--- +creation_dt: '2017-11-05T00:00:00' +label: A0001 +n: 1 +product_type: A diff -Nru datacube-1.8.7/integration_tests/data/dataset_add/products.yml datacube-1.8.9/integration_tests/data/dataset_add/products.yml --- datacube-1.8.7/integration_tests/data/dataset_add/products.yml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/dataset_add/products.yml 2022-11-17 00:47:28.000000000 +0000 @@ -6,7 +6,7 @@ ## metadata: ## product_type: {{name}} ## {% endfor %}''').render(products='ABCDE') -## +## --- name: A description: test product A diff -Nru datacube-1.8.7/integration_tests/data/eo3/ard_ls8.odc-product.yaml datacube-1.8.9/integration_tests/data/eo3/ard_ls8.odc-product.yaml --- datacube-1.8.7/integration_tests/data/eo3/ard_ls8.odc-product.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/ard_ls8.odc-product.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,180 @@ +--- +name: ga_ls8c_ard_3 +description: Geoscience Australia Landsat 8 Operational Land Imager and Thermal Infra-Red Scanner Analysis Ready Data Collection 3 +metadata_type: eo3_landsat_ard + +license: CC-BY-4.0 + +metadata: + product: + name: ga_ls8c_ard_3 + properties: + eo:platform: landsat-8 + odc:producer: ga.gov.au + odc:product_family: ard + +measurements: + # NBART + - name: nbart_coastal_aerosol + aliases: + - nbart_band01 + - coastal_aerosol + dtype: int16 + nodata: -999 + units: '1' + - name: nbart_blue + aliases: + - nbart_band02 + - blue + dtype: int16 + nodata: -999 + units: '1' + - name: nbart_green + aliases: + - nbart_band03 + - green + dtype: int16 + nodata: -999 + units: '1' + - name: nbart_red + aliases: + - nbart_band04 + - red + dtype: int16 + nodata: -999 + units: '1' + - name: nbart_nir + aliases: + - nbart_band05 + - nir + dtype: int16 + nodata: -999 + units: '1' + - name: nbart_swir_1 + aliases: + - nbart_band06 + - swir_1 + # Requested for backwards compatibility with previous collection + - swir1 + dtype: int16 + nodata: -999 + units: '1' + - name: nbart_swir_2 + aliases: + - nbart_band07 + - swir_2 + # Requested for backwards compatibility with previous collection + - swir2 + dtype: int16 + nodata: -999 + units: '1' + - name: nbart_panchromatic + aliases: + - nbart_band08 + - panchromatic + dtype: int16 + nodata: -999 + units: '1' + + # Observation Attributes + - name: oa_fmask + aliases: + - fmask + dtype: uint8 + nodata: 0 + units: '1' + flags_definition: + fmask: + bits: [0, 1, 2, 3, 4, 5, 6, 7] + description: Fmask + values: + '0': nodata + '1': valid + '2': cloud + '3': shadow + '4': snow + '5': water + - name: oa_nbart_contiguity + aliases: + - nbart_contiguity + dtype: uint8 + nodata: 255 + units: '1' + flags_definition: + contiguous: + bits: [0] + values: + '1': true + '0': false + - name: oa_azimuthal_exiting + aliases: + - azimuthal_exiting + dtype: float32 + nodata: .nan + units: '1' + - name: oa_azimuthal_incident + aliases: + - azimuthal_incident + dtype: float32 + nodata: .nan + units: '1' + - name: oa_combined_terrain_shadow + aliases: + - combined_terrain_shadow + dtype: uint8 + nodata: 255 + units: '1' + - name: oa_exiting_angle + aliases: + - exiting_angle + dtype: float32 + nodata: .nan + units: '1' + - name: oa_incident_angle + aliases: + - incident_angle + dtype: float32 + nodata: .nan + units: '1' + - name: oa_relative_azimuth + aliases: + - relative_azimuth + dtype: float32 + nodata: .nan + units: '1' + - name: oa_relative_slope + aliases: + - relative_slope + dtype: float32 + nodata: .nan + units: '1' + - name: oa_satellite_azimuth + aliases: + - satellite_azimuth + dtype: float32 + nodata: .nan + units: '1' + - name: oa_satellite_view + aliases: + - satellite_view + dtype: float32 + nodata: .nan + units: '1' + - name: oa_solar_azimuth + aliases: + - solar_azimuth + dtype: float32 + nodata: .nan + units: '1' + - name: oa_solar_zenith + aliases: + - solar_zenith + dtype: float32 + nodata: .nan + units: '1' + - name: oa_time_delta + aliases: + - time_delta + dtype: float32 + nodata: .nan + units: '1' diff -Nru datacube-1.8.7/integration_tests/data/eo3/eo3_landsat_ard.odc-type.yaml datacube-1.8.9/integration_tests/data/eo3/eo3_landsat_ard.odc-type.yaml --- datacube-1.8.7/integration_tests/data/eo3/eo3_landsat_ard.odc-type.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/eo3_landsat_ard.odc-type.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,298 @@ +--- +name: eo3_landsat_ard +description: EO3 for ARD Landsat Collection 3 +dataset: + id: [id] # No longer configurable in newer ODCs. + sources: [lineage, source_datasets] # No longer configurable in newer ODCs. + + grid_spatial: [grid_spatial, projection] + measurements: [measurements] + creation_dt: [properties, 'odc:processing_datetime'] + label: [label] + format: [properties, 'odc:file_format'] + + search_fields: + platform: + description: Platform code + offset: [properties, 'eo:platform'] + indexed: false + + instrument: + description: Instrument name + offset: [properties, 'eo:instrument'] + indexed: false + + product_family: + description: Product family code + offset: [properties, 'odc:product_family'] + indexed: false + + region_code: + description: > + Spatial reference code from the provider. + For Landsat region_code is a scene path row: + '{:03d}{:03d}.format(path,row)' + For Sentinel it is MGRS code. + In general it is a unique string identifier + that datasets covering roughly the same spatial + region share. + + offset: [properties, 'odc:region_code'] + + crs_raw: + description: The raw CRS string as it appears in metadata + offset: ['crs'] + indexed: false + + dataset_maturity: + description: One of - final|interim|nrt (near real time) + offset: [properties, 'dea:dataset_maturity'] + + gqa: + description: GQA Circular error probable (90%) + type: double + offset: [properties, 'gqa:cep90'] + + cloud_cover: + description: Cloud cover percentage [0, 100] + type: double + offset: [properties, 'eo:cloud_cover'] + + time: + description: Acquisition time range + type: datetime-range + min_offset: + - [properties, 'dtr:start_datetime'] + - [properties, datetime] + max_offset: + - [properties, 'dtr:end_datetime'] + - [properties, datetime] + + # LonLat bounding box, generated on the fly from: + # `grids`, `crs` and `geometry` of the new metadata format + # + # Bounding box is defined by two ranges: + # [lon.begin, lon.end] -- Longitude + # [lat.begin, lat.end] -- Latitude + # + # Note that STAC is using `bbox` for the same thing as following: + # + # bbox: [left, bottom, right, top] + # 0 1 2 3 + # lon lat lon lat + # + # But MetadataType does not support integer index keys, so... + # BoundingBox: [lon.begin, lat.begin, lon.end, lat.end] + + lon: + description: Longitude range + type: double-range + min_offset: + - [extent, lon, begin] + max_offset: + - [extent, lon, end] + + lat: + description: Latitude range + type: double-range + min_offset: + - [extent, lat, begin] + max_offset: + - [extent, lat, end] + + landsat_product_id: + description: Landsat Product ID + indexed: false + offset: + - properties + - landsat:landsat_product_id + + # semi-auto generated below + eo_gsd: + description: Ground sample distance, meters + indexed: false + offset: + - properties + - eo:gsd + type: double + eo_sun_azimuth: + description: 'TODO: ' + indexed: false + offset: + - properties + - eo:sun_azimuth + type: double + eo_sun_elevation: + description: 'TODO: ' + indexed: false + offset: + - properties + - eo:sun_elevation + type: double + fmask_clear: + description: 'TODO: ' + indexed: false + offset: + - properties + - fmask:clear + type: double + fmask_cloud_shadow: + description: 'TODO: ' + indexed: false + offset: + - properties + - fmask:cloud_shadow + type: double + fmask_snow: + description: 'TODO: ' + indexed: false + offset: + - properties + - fmask:snow + type: double + fmask_water: + description: 'TODO: ' + indexed: false + offset: + - properties + - fmask:water + type: double + gqa_abs_iterative_mean_x: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:abs_iterative_mean_x + type: double + gqa_abs_iterative_mean_xy: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:abs_iterative_mean_xy + type: double + gqa_abs_iterative_mean_y: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:abs_iterative_mean_y + type: double + gqa_abs_x: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:abs_x + type: double + gqa_abs_xy: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:abs_xy + type: double + gqa_abs_y: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:abs_y + type: double + gqa_cep90: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:cep90 + type: double + gqa_iterative_mean_x: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:iterative_mean_x + type: double + gqa_iterative_mean_xy: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:iterative_mean_xy + type: double + gqa_iterative_mean_y: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:iterative_mean_y + type: double + gqa_iterative_stddev_x: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:iterative_stddev_x + type: double + gqa_iterative_stddev_xy: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:iterative_stddev_xy + type: double + gqa_iterative_stddev_y: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:iterative_stddev_y + type: double + gqa_mean_x: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:mean_x + type: double + gqa_mean_xy: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:mean_xy + type: double + gqa_mean_y: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:mean_y + type: double + gqa_stddev_x: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:stddev_x + type: double + gqa_stddev_xy: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:stddev_xy + type: double + gqa_stddev_y: + description: 'TODO: ' + indexed: false + offset: + - properties + - gqa:stddev_y + type: double + landsat_scene_id: + description: Landsat Scene ID + indexed: false + offset: + - properties + - landsat:landsat_scene_id diff -Nru datacube-1.8.7/integration_tests/data/eo3/ga_ls_wo_3.odc-product.yaml datacube-1.8.9/integration_tests/data/eo3/ga_ls_wo_3.odc-product.yaml --- datacube-1.8.7/integration_tests/data/eo3/ga_ls_wo_3.odc-product.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/ga_ls_wo_3.odc-product.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,59 @@ +name: ga_ls_wo_3 +description: Geoscience Australia Landsat Water Observations Collection 3 +metadata_type: eo3 + +license: CC-BY-4.0 + +metadata: + properties: + odc:file_format: GeoTIFF + odc:product_family: wo + product: + name: ga_ls_wo_3 + +measurements: + - name: water + dtype: uint8 + nodata: 1 + units: '1' + flags_definition: + dry: + bits: [7, 6, 5, 4, 3, 2, 1, 0] + description: No water detected + values: {0: true} + nodata: + bits: 0 + description: No data + values: {0: false, 1: true} + noncontiguous: + bits: 1 + description: At least one EO band is missing or saturated + values: {0: false, 1: true} + low_solar_angle: + bits: 2 + description: Low solar incidence angle + values: {0: false, 1: true} + terrain_shadow: + bits: 3 + description: Terrain shadow + values: {0: false, 1: true} + high_slope: + bits: 4 + description: High slope + values: {0: false, 1: true} + cloud_shadow: + bits: 5 + description: Cloud shadow + values: {0: false, 1: true} + cloud: + bits: 6 + description: Cloudy + values: {0: false, 1: true} + water_observed: + bits: 7 + description: Classified as water by the decision tree + values: {0: false, 1: true} + wet: + bits: [7, 6, 5, 4, 3, 2, 1, 0] + description: Clear and Wet + values: {128: true} diff -Nru datacube-1.8.7/integration_tests/data/eo3/ls8_dataset2.yaml datacube-1.8.9/integration_tests/data/eo3/ls8_dataset2.yaml --- datacube-1.8.7/integration_tests/data/eo3/ls8_dataset2.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/ls8_dataset2.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,130 @@ +--- +# Dataset +$schema: https://schemas.opendatacube.org/dataset +id: 4a30d008-4e82-4d67-99af-28bc1629f766 + +label: ga_ls8c_ard_3-0-0_090086_2016-05-28_final +product: + name: ga_ls8c_ard_3 + +crs: epsg:32655 +geometry: + type: Polygon + coordinates: [[[557897.0, -4219717.0], [557843.0, -4219702.0], [557903.0, -4219399.0], [558668.0, -4216459.0], + [570053.0, -4172809.0], [603128.0, -4046644.0], [607268.0, -4030879.0], [607313.0, -4030777.0], + [607383.0, -4030796.0], [607395.0, -4030785.0], [792773.0, -4079206.0], [794286.0, -4079634.0], + [794286.0, -4079636.0], [794403.0, -4079667.0], [794377.0, -4079831.0], [745188.0, -4268613.0], + [745114.0, -4268617.0], [745011.0, -4268590.0], [744938.0, -4268594.0], [740677.0, -4267484.0], + [557895.0, -4219725.0], [557897.0, -4219717.0]]] + +grids: + default: + shape: [7941, 7901] + transform: [30.0, 0.0, 557685.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] + g15m: + shape: [15881, 15801] + transform: [15.0, 0.0, 557692.5, 0.0, -15.0, -4030492.5, 0.0, 0.0, 1.0] + +properties: + datetime: 2016-05-28T23:50:44.734600Z + dea:dataset_maturity: final + dtr:start_datetime: 2016-05-28T23:50:30.172583Z + dtr:end_datetime: 2016-05-28T23:50:59.149573Z + eo:cloud_cover: 50.73881439129734 + eo:gsd: 15.0 # Ground sample distance (m) + eo:instrument: OLI_TIRS2 + eo:platform: landsat-8 + eo:sun_azimuth: 33.12639988 + eo:sun_elevation: 23.36044073 + fmask:clear: 28.783662620590533 + fmask:cloud: 50.73881439129734 + fmask:cloud_shadow: 7.393403159317298 + fmask:snow: 0.6559769944911529 + fmask:water: 12.428142834303676 + gqa:abs_iterative_mean_x: 0.26 + gqa:abs_iterative_mean_xy: 0.33 + gqa:abs_iterative_mean_y: 0.21 + gqa:abs_x: 0.56 + gqa:abs_xy: 0.69 + gqa:abs_y: 0.4 + gqa:cep90: 0.79 + gqa:iterative_mean_x: -0.04 + gqa:iterative_mean_xy: 0.08 + gqa:iterative_mean_y: 0.07 + gqa:iterative_stddev_x: 0.42 + gqa:iterative_stddev_xy: 0.5 + gqa:iterative_stddev_y: 0.27 + gqa:mean_x: -0.25 + gqa:mean_xy: 0.25 + gqa:mean_y: -0.06 + gqa:stddev_x: 2.15 + gqa:stddev_xy: 2.39 + gqa:stddev_y: 1.04 + landsat:collection_category: T1 + landsat:collection_number: 1 + landsat:landsat_product_id: LC08_L1TP_090086_20160528_20170324_01_T1 + landsat:landsat_scene_id: LC80900862016149LGN01 + landsat:wrs_path: 90 + landsat:wrs_row: 86 + odc:dataset_version: 3.0.0 + odc:file_format: GeoTIFF + odc:processing_datetime: 2019-10-07T18:26:31.599312Z + odc:producer: ga.gov.au + odc:product_family: ard + odc:region_code: '090086' + +measurements: + nbart_blue: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band02.tif + nbart_coastal_aerosol: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band01.tif + nbart_green: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band03.tif + nbart_nir: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band05.tif + nbart_panchromatic: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band08.tif + grid: panchromatic + nbart_red: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band04.tif + nbart_swir_1: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band06.tif + nbart_swir_2: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_band07.tif + oa_azimuthal_exiting: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_azimuthal-exiting.tif + oa_azimuthal_incident: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_azimuthal-incident.tif + oa_combined_terrain_shadow: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_combined-terrain-shadow.tif + oa_exiting_angle: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_exiting-angle.tif + oa_fmask: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_fmask.tif + oa_incident_angle: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_incident-angle.tif + oa_nbart_contiguity: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_nbart-contiguity.tif + oa_relative_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_relative-azimuth.tif + oa_relative_slope: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_relative-slope.tif + oa_satellite_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_satellite-azimuth.tif + oa_satellite_view: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_satellite-view.tif + oa_solar_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_solar-azimuth.tif + oa_solar_zenith: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_solar-zenith.tif + oa_time_delta: + path: ga_ls8c_oa_3-0-0_090086_2016-05-28_final_time-delta.tif + +accessories: + thumbnail:nbart: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-28_final_thumbnail.jpg + checksum:sha1: + path: ga_ls8c_ard_3-0-0_090086_2016-05-28_final.sha1 + metadata:processor: + path: ga_ls8c_ard_3-0-0_090086_2016-05-28_final.proc-info.yaml +... diff -Nru datacube-1.8.7/integration_tests/data/eo3/ls8_dataset3.yaml datacube-1.8.9/integration_tests/data/eo3/ls8_dataset3.yaml --- datacube-1.8.7/integration_tests/data/eo3/ls8_dataset3.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/ls8_dataset3.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,146 @@ +--- +# Dataset +# url: https://explorer.dev.dea.ga.gov.au/dataset/1154087c-211c-4834-a1f8-b4b59101b644.odc-metadata.yaml +$schema: https://schemas.opendatacube.org/dataset +id: 1154087c-211c-4834-a1f8-b4b59101b644 + +label: ga_ls8c_ard_3-0-0_101077_2013-04-04_final +product: + name: ga_ls8c_ard_3 + +location: s3://dea-public-data/baseline/ga_ls8c_ard_3/101/077/2013/04/04/ga_ls8c_ard_3-0-0_101077_2013-04-04_final.stac-item.json + +crs: epsg:32653 +geometry: + type: Polygon + coordinates: [[[3.88119e+05, -2.784957e+06], [3.88027e+05, -2.784922e+06], [4.00103e+05, + -2.730604e+06], [4.13333e+05, -2.671339e+06], [4.27838e+05, -2.606539e+06], + [4.27943e+05, -2.606103e+06], [4.28003e+05, -2.605908e+06], [4.28033e+05, -2.605883e+06], + [4.28078e+05, -2.605893e+06], [4.28085e+05, -2.605875e+06], [6.06082e+05, -2.645506e+06], + [6.0922e+05, -2.646226e+06], [6.09521e+05, -2.646293e+06], [6.09887e+05, -2.646379e+06], + [6.10785e+05, -2.646585e+06], [6.10784e+05, -2.64659e+06], [6.10878e+05, -2.646612e+06], + [6.10867e+05, -2.646731e+06], [5.71161e+05, -2.825714e+06], [5.71039e+05, -2.825707e+06], + [5.41783e+05, -2.81919e+06], [3.89678e+05, -2.785334e+06], [3.88208e+05, -2.785004e+06], + [3.88119e+05, -2.784957e+06]]] +grids: + g15m: + shape: [14681, 14881] + transform: [1.5e+01, 0.e+00, 3.878925e+05, 0.e+00, -1.5e+01, -2.6057925e+06, 0.e+00, + 0.e+00, 1.e+00] + default: + shape: [7341, 7441] + transform: [3.e+01, 0.e+00, 3.87885e+05, 0.e+00, -3.e+01, -2.605785e+06, 0.e+00, + 0.e+00, 1.e+00] + +properties: + datetime: '2013-04-04T00:58:48.849536Z' + dea:dataset_maturity: final + dtr:end_datetime: '2013-04-04T00:59:02.900824Z' + dtr:start_datetime: '2013-04-04T00:58:34.682275Z' + eo:cloud_cover: 2.513757420052948e+01 + eo:gsd: 1.5e+01 # Ground sample distance (m) + eo:instrument: OLI_TIRS + eo:platform: landsat-8 + eo:sun_azimuth: 4.912672093e+01 + eo:sun_elevation: 4.723487021e+01 + fmask:clear: 7.140783727458025e+01 + fmask:cloud: 2.513757420052948e+01 + fmask:cloud_shadow: 3.453619235226707e+00 + fmask:snow: 0.e+00 + fmask:water: 9.692896635673229e-04 + gqa:abs_iterative_mean_x: 1.1e-01 + gqa:abs_iterative_mean_xy: 2.1e-01 + gqa:abs_iterative_mean_y: 1.8e-01 + gqa:abs_x: 1.7e-01 + gqa:abs_xy: 3.4e-01 + gqa:abs_y: 2.9e-01 + gqa:cep90: 4.5e-01 + gqa:iterative_mean_x: -7.e-02 + gqa:iterative_mean_xy: 1.4e-01 + gqa:iterative_mean_y: 1.2e-01 + gqa:iterative_stddev_x: 1.2e-01 + gqa:iterative_stddev_xy: 2.4e-01 + gqa:iterative_stddev_y: 2.e-01 + gqa:mean_x: -7.e-02 + gqa:mean_xy: 1.4e-01 + gqa:mean_y: 1.2e-01 + gqa:stddev_x: 4.7e-01 + gqa:stddev_xy: 9.9e-01 + gqa:stddev_y: 8.7e-01 + landsat:collection_category: T1 + landsat:collection_number: 1 + landsat:landsat_product_id: LC08_L1TP_101077_20130404_20170505_01_T1 + landsat:landsat_scene_id: LC81010772013094LGN02 + landsat:wrs_path: 101 + landsat:wrs_row: 77 + odc:dataset_version: 3.0.0 + odc:file_format: GeoTIFF + odc:processing_datetime: '2019-10-10T11:15:01.412058Z' + odc:producer: ga.gov.au + odc:product: ga_ls8c_ard_3 + odc:product_family: ard + odc:region_code: '101077' + proj:epsg: 32653 + proj:shape: + - 7341 + - 7441 + proj:transform: + - 3.e+01 + - 0.e+00 + - 3.87885e+05 + - 0.e+00 + - -3.e+01 + - -2.605785e+06 + - 0.e+00 + - 0.e+00 + - 1.e+00 + +measurements: + oa_fmask: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_fmask.tif + nbart_nir: + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band05.tif + nbart_red: + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band04.tif + nbart_blue: + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band02.tif + nbart_green: + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band03.tif + nbart_swir_1: + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band06.tif + nbart_swir_2: + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band07.tif + oa_time_delta: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_time-delta.tif + oa_solar_zenith: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_solar-zenith.tif + oa_exiting_angle: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_exiting-angle.tif + oa_solar_azimuth: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_solar-azimuth.tif + oa_incident_angle: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_incident-angle.tif + oa_relative_slope: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_relative-slope.tif + oa_satellite_view: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_satellite-view.tif + nbart_panchromatic: + grid: g15m + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band08.tif + oa_nbart_contiguity: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_nbart-contiguity.tif + oa_relative_azimuth: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_relative-azimuth.tif + oa_azimuthal_exiting: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_azimuthal-exiting.tif + oa_satellite_azimuth: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_satellite-azimuth.tif + nbart_coastal_aerosol: + path: ga_ls8c_nbart_3-0-0_101077_2013-04-04_final_band01.tif + oa_azimuthal_incident: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_azimuthal-incident.tif + oa_combined_terrain_shadow: + path: ga_ls8c_oa_3-0-0_101077_2013-04-04_final_combined-terrain-shadow.tif + +lineage: {} +... diff -Nru datacube-1.8.7/integration_tests/data/eo3/ls8_dataset4.yaml datacube-1.8.9/integration_tests/data/eo3/ls8_dataset4.yaml --- datacube-1.8.7/integration_tests/data/eo3/ls8_dataset4.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/ls8_dataset4.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,145 @@ +--- +# Dataset +# url: https://explorer.dev.dea.ga.gov.au/dataset/0ee5fe0a-6acd-4583-8554-36ad963bf40b.odc-metadata.yaml +$schema: https://schemas.opendatacube.org/dataset +id: 0ee5fe0a-6acd-4583-8554-36ad963bf40b + +label: ga_ls8c_ard_3-0-0_101077_2013-07-21_final +product: + name: ga_ls8c_ard_3 + +location: s3://dea-public-data/baseline/ga_ls8c_ard_3/101/077/2013/07/21/ga_ls8c_ard_3-0-0_101077_2013-07-21_final.stac-item.json + +crs: epsg:32653 +geometry: + type: Polygon + coordinates: [[[4.76439e+05, -2.59925e+06], [4.76445e+05, -2.599245e+06], [4.76548e+05, + -2.599268e+06], [4.76636e+05, -2.599283e+06], [5.13608e+05, -2.607641e+06], + [6.62362e+05, -2.641246e+06], [6.63262e+05, -2.641456e+06], [6.63296e+05, -2.64148e+06], + [6.63416e+05, -2.641508e+06], [6.63457e+05, -2.641538e+06], [6.20602e+05, -2.831771e+06], + [6.20452e+05, -2.832431e+06], [6.20423e+05, -2.832472e+06], [6.20327e+05, -2.832456e+06], + [6.20325e+05, -2.832465e+06], [6.20147e+05, -2.832425e+06], [6.20074e+05, -2.832412e+06], + [4.33669e+05, -2.790247e+06], [4.33144e+05, -2.790127e+06], [4.33103e+05, -2.790098e+06], + [4.40423e+05, -2.757619e+06], [4.56563e+05, -2.686279e+06], [4.69658e+05, -2.628544e+06], + [4.76288e+05, -2.599459e+06], [4.76333e+05, -2.599264e+06], [4.76363e+05, -2.599237e+06], + [4.76439e+05, -2.59925e+06]]] +grids: + g15m: + shape: [15561, 15381] + transform: [1.5e+01, 0.e+00, 4.328925e+05, 0.e+00, -1.5e+01, -2.5991925e+06, 0.e+00, + 0.e+00, 1.e+00] + default: + shape: [7781, 7691] + transform: [3.e+01, 0.e+00, 4.32885e+05, 0.e+00, -3.e+01, -2.599185e+06, 0.e+00, + 0.e+00, 1.e+00] + +properties: + datetime: '2013-07-21T00:57:26.432563Z' + dea:dataset_maturity: final + eo:cloud_cover: 1.828773330949106e+01 + eo:gsd: 1.5e+01 # Ground sample distance (m) + eo:instrument: OLI_TIRS + eo:platform: landsat-8 + eo:sun_azimuth: 3.722609952e+01 + eo:sun_elevation: 3.539504069e+01 + fmask:clear: 7.716319419426578e+01 + fmask:cloud: 1.828773330949106e+01 + fmask:cloud_shadow: 4.387244779045838e+00 + fmask:snow: 0.e+00 + fmask:water: 1.618277171973244e-01 + gqa:abs_iterative_mean_x: 1.3e-01 + gqa:abs_iterative_mean_xy: 2.9e-01 + gqa:abs_iterative_mean_y: 2.6e-01 + gqa:abs_x: 2.8e-01 + gqa:abs_xy: 6.2e-01 + gqa:abs_y: 5.5e-01 + gqa:cep90: 5.8e-01 + gqa:iterative_mean_x: -2.e-02 + gqa:iterative_mean_xy: 6.e-02 + gqa:iterative_mean_y: 6.e-02 + gqa:iterative_stddev_x: 1.8e-01 + gqa:iterative_stddev_xy: 4.e-01 + gqa:iterative_stddev_y: 3.6e-01 + gqa:mean_x: 1.e-01 + gqa:mean_xy: 1.8e-01 + gqa:mean_y: -1.4e-01 + gqa:stddev_x: 9.6e-01 + gqa:stddev_xy: 1.93e+00 + gqa:stddev_y: 1.67e+00 + landsat:collection_category: T1 + landsat:collection_number: 1 + landsat:landsat_product_id: LC08_L1TP_101077_20130721_20170503_01_T1 + landsat:landsat_scene_id: LC81010772013202LGN01 + landsat:wrs_path: 101 + landsat:wrs_row: 77 + odc:dataset_version: 3.0.0 + odc:file_format: GeoTIFF + odc:processing_datetime: '2019-10-10T11:54:36.165277Z' + odc:producer: ga.gov.au + odc:product: ga_ls8c_ard_3 + odc:product_family: ard + odc:region_code: '101077' + proj:epsg: 32653 + proj:shape: + - 7781 + - 7691 + proj:transform: + - 3.e+01 + - 0.e+00 + - 4.32885e+05 + - 0.e+00 + - -3.e+01 + - -2.599185e+06 + - 0.e+00 + - 0.e+00 + - 1.e+00 + +measurements: + oa_fmask: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_fmask.tif + nbart_nir: + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band05.tif + nbart_red: + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band04.tif + nbart_blue: + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band02.tif + nbart_green: + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band03.tif + nbart_swir_1: + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band06.tif + nbart_swir_2: + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band07.tif + oa_time_delta: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_time-delta.tif + oa_solar_zenith: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_solar-zenith.tif + oa_exiting_angle: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_exiting-angle.tif + oa_solar_azimuth: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_solar-azimuth.tif + oa_incident_angle: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_incident-angle.tif + oa_relative_slope: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_relative-slope.tif + oa_satellite_view: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_satellite-view.tif + nbart_panchromatic: + grid: g15m + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band08.tif + oa_nbart_contiguity: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_nbart-contiguity.tif + oa_relative_azimuth: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_relative-azimuth.tif + oa_azimuthal_exiting: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_azimuthal-exiting.tif + oa_satellite_azimuth: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_satellite-azimuth.tif + nbart_coastal_aerosol: + path: ga_ls8c_nbart_3-0-0_101077_2013-07-21_final_band01.tif + oa_azimuthal_incident: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_azimuthal-incident.tif + oa_combined_terrain_shadow: + path: ga_ls8c_oa_3-0-0_101077_2013-07-21_final_combined-terrain-shadow.tif + +lineage: {} +... diff -Nru datacube-1.8.7/integration_tests/data/eo3/ls8_dataset.yaml datacube-1.8.9/integration_tests/data/eo3/ls8_dataset.yaml --- datacube-1.8.7/integration_tests/data/eo3/ls8_dataset.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/ls8_dataset.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,130 @@ +--- +# Dataset +$schema: https://schemas.opendatacube.org/dataset +id: c21648b1-a6fa-4de0-9dc3-9c445d8b295a + +label: ga_ls8c_ard_3-0-0_090086_2016-05-12_final +product: + name: ga_ls8c_ard_3 + +crs: epsg:32655 +geometry: + type: Polygon + coordinates: [[[557897.0, -4219717.0], [557843.0, -4219702.0], [557903.0, -4219399.0], [558668.0, -4216459.0], + [570053.0, -4172809.0], [603128.0, -4046644.0], [607268.0, -4030879.0], [607313.0, -4030777.0], + [607383.0, -4030796.0], [607395.0, -4030785.0], [792773.0, -4079206.0], [794286.0, -4079634.0], + [794286.0, -4079636.0], [794403.0, -4079667.0], [794377.0, -4079831.0], [745188.0, -4268613.0], + [745114.0, -4268617.0], [745011.0, -4268590.0], [744938.0, -4268594.0], [740677.0, -4267484.0], + [557895.0, -4219725.0], [557897.0, -4219717.0]]] + +grids: + default: + shape: [7941, 7901] + transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] + panchromatic: + shape: [15881, 15801] + transform: [15.0, 0.0, 557392.5, 0.0, -15.0, -4030492.5, 0.0, 0.0, 1.0] + +properties: + datetime: 2016-05-12 23:50:37.621730Z + dea:dataset_maturity: final + dtr:end_datetime: 2016-05-12 23:50:52.031499Z + dtr:start_datetime: 2016-05-12 23:50:23.054165Z + eo:cloud_cover: 58.910716655901616 + eo:gsd: 15.0 # Ground sample distance (m) + eo:instrument: OLI_TIRS + eo:platform: landsat-8 + eo:sun_azimuth: 34.58516815 + eo:sun_elevation: 26.29614366 + fmask:clear: 22.3973998672305 + fmask:cloud: 58.910716655901616 + fmask:cloud_shadow: 1.3150997463296996 + fmask:snow: 0.0006217170293219306 + fmask:water: 17.376162013508864 + gqa:abs_iterative_mean_x: 0.19 + gqa:abs_iterative_mean_xy: 0.25 + gqa:abs_iterative_mean_y: 0.16 + gqa:abs_x: 0.43 + gqa:abs_xy: 0.51 + gqa:abs_y: 0.28 + gqa:cep90: 0.49 + gqa:iterative_mean_x: -0.1 + gqa:iterative_mean_xy: 0.15 + gqa:iterative_mean_y: 0.11 + gqa:iterative_stddev_x: 0.24 + gqa:iterative_stddev_xy: 0.3 + gqa:iterative_stddev_y: 0.17 + gqa:mean_x: -0.1 + gqa:mean_xy: 0.11 + gqa:mean_y: 0.05 + gqa:stddev_x: 1.15 + gqa:stddev_xy: 1.28 + gqa:stddev_y: 0.56 + landsat:collection_category: T1 + landsat:collection_number: 1 + landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 + landsat:landsat_scene_id: LC80900862016133LGN02 + landsat:wrs_path: 90 + landsat:wrs_row: 86 + odc:dataset_version: 3.0.0 + odc:file_format: GeoTIFF + odc:processing_datetime: 2019-10-07T20:19:19.218290 + odc:producer: ga.gov.au + odc:product_family: ard + odc:region_code: '090086' + +measurements: + nbart_blue: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band02.tif + nbart_coastal_aerosol: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band01.tif + nbart_green: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band03.tif + nbart_nir: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band05.tif + nbart_panchromatic: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band08.tif + grid: panchromatic + nbart_red: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band04.tif + nbart_swir_1: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band06.tif + nbart_swir_2: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band07.tif + oa_azimuthal_exiting: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-exiting.tif + oa_azimuthal_incident: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-incident.tif + oa_combined_terrain_shadow: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_combined-terrain-shadow.tif + oa_exiting_angle: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_exiting-angle.tif + oa_fmask: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_fmask.tif + oa_incident_angle: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_incident-angle.tif + oa_nbart_contiguity: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_nbart-contiguity.tif + oa_relative_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-azimuth.tif + oa_relative_slope: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-slope.tif + oa_satellite_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-azimuth.tif + oa_satellite_view: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-view.tif + oa_solar_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-azimuth.tif + oa_solar_zenith: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-zenith.tif + oa_time_delta: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_time-delta.tif + +accessories: + thumbnail:nbart: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_thumbnail.jpg + checksum:sha1: + path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.sha1 + metadata:processor: + path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.proc-info.yaml +... diff -Nru datacube-1.8.7/integration_tests/data/eo3/s2_africa_dataset.yaml datacube-1.8.9/integration_tests/data/eo3/s2_africa_dataset.yaml --- datacube-1.8.7/integration_tests/data/eo3/s2_africa_dataset.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/s2_africa_dataset.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,114 @@ +--- +# Dataset +# url: https://explorer.digitalearth.africa/dataset/324fd220-388a-5501-bf2d-de00e78c93e6.odc-metadata.yaml +$schema: https://schemas.opendatacube.org/dataset +id: 324fd220-388a-5501-bf2d-de00e78c93e6 + +label: S2A_MSIL2A_20220808T072631_N0400_R049_T37MCQ_20220808T111058 +product: + name: s2_l2a + +location: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/37/M/CQ/2022/8/S2A_37MCQ_20220808_0_L2A/S2A_37MCQ_20220808_0_L2A.json + +crs: epsg:32737 +geometry: + type: Polygon + coordinates: [[[4.09799e+05, 9.390221e+06], [3.29704e+05, 9.390221e+06], [3.53943e+05, + 9.500019e+06], [4.09799e+05, 9.500019e+06], [4.09799e+05, 9.390221e+06]]] +grids: + g20m: + shape: [5490, 5490] + transform: [2.e+01, 0.e+00, 3.e+05, 0.e+00, -2.e+01, 9.50002e+06, 0.e+00, 0.e+00, + 1.e+00] + g60m: + shape: [1830, 1830] + transform: [6.e+01, 0.e+00, 3.e+05, 0.e+00, -6.e+01, 9.50002e+06, 0.e+00, 0.e+00, + 1.e+00] + g320m: + shape: [343, 343] + transform: [3.2e+02, 0.e+00, 3.e+05, 0.e+00, -3.2e+02, 9.50002e+06, 0.e+00, 0.e+00, + 1.e+00] + default: + shape: [10980, 10980] + transform: [1.e+01, 0.e+00, 3.e+05, 0.e+00, -1.e+01, 9.50002e+06, 0.e+00, 0.e+00, + 1.e+00] + +properties: + datetime: '2022-08-08T07:51:36Z' + eo:cloud_cover: 5.028e+01 + eo:constellation: sentinel-2 + eo:gsd: 10 # Ground sample distance (m) + eo:instrument: MSI + eo:off_nadir: 0 + eo:platform: sentinel-2a + odc:file_format: GeoTIFF + odc:processing_datetime: '2022-08-08T07:51:36Z' + odc:region_code: 37MCQ + proj:epsg: 32737 + sentinel:boa_offset_applied: true + sentinel:data_coverage: 6.191e+01 + sentinel:grid_square: CQ + sentinel:latitude_band: M + sentinel:processing_baseline: '04.00' + sentinel:product_id: S2A_MSIL2A_20220808T072631_N0400_R049_T37MCQ_20220808T111058 + sentinel:sequence: '0' + sentinel:utm_zone: 37 + sentinel:valid_cloud_cover: true + +measurements: + AOT: + grid: g60m + path: AOT.tif + B01: + grid: g60m + path: B01.tif + B02: + path: B02.tif + B03: + path: B03.tif + B04: + path: B04.tif + B05: + grid: g20m + path: B05.tif + B06: + grid: g20m + path: B06.tif + B07: + grid: g20m + path: B07.tif + B08: + path: B08.tif + B09: + grid: g60m + path: B09.tif + B11: + grid: g20m + path: B11.tif + B12: + grid: g20m + path: B12.tif + B8A: + grid: g20m + path: B8A.tif + SCL: + grid: g20m + path: SCL.tif + WVP: + path: WVP.tif + visual: + path: TCI.tif + overview: + grid: g320m + path: L2A_PVI.tif + +accessories: + info: + path: https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/37/M/CQ/2022/8/8/0/tileInfo.json + metadata: + path: https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/37/M/CQ/2022/8/8/0/metadata.xml + thumbnail: + path: https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/37/M/CQ/2022/8/8/0/preview.jpg + +lineage: {} +... diff -Nru datacube-1.8.7/integration_tests/data/eo3/s2_africa_product.yaml datacube-1.8.9/integration_tests/data/eo3/s2_africa_product.yaml --- datacube-1.8.7/integration_tests/data/eo3/s2_africa_product.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/s2_africa_product.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,144 @@ +--- +# Product +# url: https://explorer.digitalearth.africa/products/s2_l2a.odc-product.yaml +name: s2_l2a +metadata_type: eo3 +description: Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) + and converted to Cloud Optimized GeoTIFFs +metadata: + product: + name: s2_l2a +measurements: +- name: B01 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_01 + - coastal_aerosol +- name: B02 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_02 + - blue +- name: B03 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_03 + - green +- name: B04 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_04 + - red +- name: B05 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_05 + - red_edge_1 +- name: B06 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_06 + - red_edge_2 +- name: B07 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_07 + - red_edge_3 +- name: B08 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_08 + - nir + - nir_1 +- name: B8A + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_8a + - nir_narrow + - nir_2 +- name: B09 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_09 + - water_vapour +- name: B11 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_11 + - swir_1 + - swir_16 +- name: B12 + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - band_12 + - swir_2 + - swir_22 +- name: SCL + dtype: uint8 + units: '1' + nodata: 0 + aliases: + - mask + - qa + flags_definition: + qa: + bits: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + values: + '0': no data + '1': saturated or defective + '2': dark area pixels + '3': cloud shadows + '4': vegetation + '5': bare soils + '6': water + '7': unclassified + '8': cloud medium probability + '9': cloud high probability + '10': thin cirrus + '11': snow or ice + description: Sen2Cor Scene Classification +- name: AOT + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - aerosol_optical_thickness +- name: WVP + dtype: uint16 + units: '1' + nodata: 0 + aliases: + - scene_average_water_vapour +... diff -Nru datacube-1.8.7/integration_tests/data/eo3/wo_dataset.yaml datacube-1.8.9/integration_tests/data/eo3/wo_dataset.yaml --- datacube-1.8.7/integration_tests/data/eo3/wo_dataset.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/wo_dataset.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,94 @@ +--- +# Dataset +$schema: https://schemas.opendatacube.org/dataset +id: 8ab655e0-2269-552d-9990-1a19ffbc4933 + +label: ga_ls_wo_3_090086_2016-05-12_final +product: + name: ga_ls_wo_3 + +crs: epsg:32655 +geometry: + type: Polygon + coordinates: [[[744887.1641292616, -4268586.69296148], [744884.9999999991, -4268595.0], + [558427.4207787119, -4219934.026804932], [557855.5131670194, -4219783.46049894], + [557709.1155350873, -4219707.51089463], [557632.5000000001, -4219687.5], [557812.9841364851, + -4218918.719827209], [565387.986437303, -4189848.7110030535], [575002.9904772384, + -4153068.6955619035], [602962.9904772406, -4046433.6955619026], [606937.9957531227, + -4031358.6754970923], [607088.0285426822, -4030803.553238912], [607117.4999999999, + -4030762.5000000005], [687286.8455741361, -4051713.5515318085], [794114.9999999988, + -4079624.9999999995], [794113.4191087743, -4079631.06827168], [794193.1066017186, + -4079651.8933982817], [794152.0157050926, -4079891.280781102], [745042.0157050904, + -4268441.280781103], [744982.499999999, -4268602.499999999], [744887.1641292616, + -4268586.69296148]]] +grids: + default: + shape: [7941, 7901] + transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] + +properties: + datetime: 2016-05-12 23:50:37.621730Z + dea:dataset_maturity: final + dtr:end_datetime: 2016-05-12 23:50:52.031499Z + dtr:start_datetime: 2016-05-12 23:50:23.054165Z + eo:cloud_cover: 58.910716655901616 + eo:gsd: 30.0 # Ground sample distance (m) + eo:instrument: WOOLI_TIRS + eo:platform: landsat-8 + eo:sun_azimuth: 34.58516815 + eo:sun_elevation: 26.29614366 + fmask:clear: 22.3973998672305 + fmask:cloud: 58.910716655901616 + fmask:cloud_shadow: 1.3150997463296996 + fmask:snow: 0.0006217170293219306 + fmask:water: 17.376162013508864 + gqa:abs_iterative_mean_x: 0.19 + gqa:abs_iterative_mean_xy: 0.25 + gqa:abs_iterative_mean_y: 0.16 + gqa:abs_x: 0.43 + gqa:abs_xy: 0.51 + gqa:abs_y: 0.28 + gqa:cep90: 0.49 + gqa:iterative_mean_x: -0.1 + gqa:iterative_mean_xy: 0.15 + gqa:iterative_mean_y: 0.11 + gqa:iterative_stddev_x: 0.24 + gqa:iterative_stddev_xy: 0.3 + gqa:iterative_stddev_y: 0.17 + gqa:mean_x: -0.1 + gqa:mean_xy: 0.11 + gqa:mean_y: 0.05 + gqa:stddev_x: 1.15 + gqa:stddev_xy: 1.28 + gqa:stddev_y: 0.56 + landsat:collection_category: T1 + landsat:collection_number: 1 + landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 + landsat:landsat_scene_id: LC80900862016133LGN02 + landsat:wrs_path: 90 + landsat:wrs_row: 86 + odc:collection_number: 3 + odc:dataset_version: 1.6.0 + odc:file_format: GeoTIFF + odc:naming_conventions: dea_c3 + odc:processing_datetime: 2021-05-19 13:50:41.632832Z + odc:producer: ga.gov.au + odc:product_family: wo + odc:region_code: '090086' + +measurements: + water: + path: ga_ls_wo_3_090086_2016-05-12_final_water.tif + +accessories: + thumbnail: + path: ga_ls_wo_3_090086_2016-05-12_final_thumbnail.jpg + checksum:sha1: + path: ga_ls_wo_3_090086_2016-05-12_final.sha1 + metadata:processor: + path: ga_ls_wo_3_090086_2016-05-12_final.proc-info.yaml + +lineage: + ard: + - c21648b1-a6fa-4de0-9dc3-9c445d8b295a +... diff -Nru datacube-1.8.7/integration_tests/data/eo3/wo_ds_with_lineage.odc-metadata.yaml datacube-1.8.9/integration_tests/data/eo3/wo_ds_with_lineage.odc-metadata.yaml --- datacube-1.8.7/integration_tests/data/eo3/wo_ds_with_lineage.odc-metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/eo3/wo_ds_with_lineage.odc-metadata.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,222 @@ +--- +# Dataset +$schema: https://schemas.opendatacube.org/dataset +id: 8ab655e0-2269-552d-9990-1a19ffbc4933 + +label: ga_ls_wo_3_090086_2016-05-12_final +product: + name: ga_ls_wo_3 + +crs: epsg:32655 +geometry: + type: Polygon + coordinates: [[[744887.1641292616, -4268586.69296148], [744884.9999999991, -4268595.0], + [558427.4207787119, -4219934.026804932], [557855.5131670194, -4219783.46049894], + [557709.1155350873, -4219707.51089463], [557632.5000000001, -4219687.5], [557812.9841364851, + -4218918.719827209], [565387.986437303, -4189848.7110030535], [575002.9904772384, + -4153068.6955619035], [602962.9904772406, -4046433.6955619026], [606937.9957531227, + -4031358.6754970923], [607088.0285426822, -4030803.553238912], [607117.4999999999, + -4030762.5000000005], [687286.8455741361, -4051713.5515318085], [794114.9999999988, + -4079624.9999999995], [794113.4191087743, -4079631.06827168], [794193.1066017186, + -4079651.8933982817], [794152.0157050926, -4079891.280781102], [745042.0157050904, + -4268441.280781103], [744982.499999999, -4268602.499999999], [744887.1641292616, + -4268586.69296148]]] +grids: + default: + shape: [7941, 7901] + transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] + +properties: + datetime: 2016-05-12 23:50:37.621730Z + dea:dataset_maturity: final + dtr:end_datetime: 2016-05-12 23:50:52.031499Z + dtr:start_datetime: 2016-05-12 23:50:23.054165Z + eo:cloud_cover: 58.910716655901616 + eo:gsd: 30.0 # Ground sample distance (m) + eo:instrument: OLI_TIRS + eo:platform: landsat-8 + eo:sun_azimuth: 34.58516815 + eo:sun_elevation: 26.29614366 + fmask:clear: 22.3973998672305 + fmask:cloud: 58.910716655901616 + fmask:cloud_shadow: 1.3150997463296996 + fmask:snow: 0.0006217170293219306 + fmask:water: 17.376162013508864 + gqa:abs_iterative_mean_x: 0.19 + gqa:abs_iterative_mean_xy: 0.25 + gqa:abs_iterative_mean_y: 0.16 + gqa:abs_x: 0.43 + gqa:abs_xy: 0.51 + gqa:abs_y: 0.28 + gqa:cep90: 0.49 + gqa:iterative_mean_x: -0.1 + gqa:iterative_mean_xy: 0.15 + gqa:iterative_mean_y: 0.11 + gqa:iterative_stddev_x: 0.24 + gqa:iterative_stddev_xy: 0.3 + gqa:iterative_stddev_y: 0.17 + gqa:mean_x: -0.1 + gqa:mean_xy: 0.11 + gqa:mean_y: 0.05 + gqa:stddev_x: 1.15 + gqa:stddev_xy: 1.28 + gqa:stddev_y: 0.56 + landsat:collection_category: T1 + landsat:collection_number: 1 + landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 + landsat:landsat_scene_id: LC80900862016133LGN02 + landsat:wrs_path: 90 + landsat:wrs_row: 86 + odc:collection_number: 3 + odc:dataset_version: 1.6.0 + odc:file_format: GeoTIFF + odc:naming_conventions: dea_c3 + odc:processing_datetime: 2021-05-19 13:50:41.632832Z + odc:producer: ga.gov.au + odc:product_family: wo + odc:region_code: '090086' + +measurements: + water: + path: ga_ls_wo_3_090086_2016-05-12_final_water.tif + +accessories: + thumbnail: + path: ga_ls_wo_3_090086_2016-05-12_final_thumbnail.jpg + checksum:sha1: + path: ga_ls_wo_3_090086_2016-05-12_final.sha1 + metadata:processor: + path: ga_ls_wo_3_090086_2016-05-12_final.proc-info.yaml + +lineage: + ard: + id: c21648b1-a6fa-4de0-9dc3-9c445d8b295a + + label: ga_ls8c_ard_3-0-0_090086_2016-05-12_final + product: + name: ga_ls8c_ard_3 + + crs: epsg:32655 + geometry: + type: Polygon + coordinates: [[[744887.1641292607, -4268586.692961479], [744885.0, -4268595.0], + [558427.4207787118, -4219934.026804933], [557855.5131670195, -4219783.460498941], + [557709.1155350845, -4219707.510894631], [557632.5, -4219687.5], [557812.9841364821, + -4218918.719827209], [565387.9864373036, -4189848.711003054], [575002.9904772383, + -4153068.6955619035], [602962.9904772383, -4046433.6955619035], [606937.9957531218, + -4031358.675497092], [607088.0285426815, -4030803.553238913], [607117.5, -4030762.5], + [687286.8455741366, -4051713.551531808], [794115.0, -4079625.0], [794113.4191087746, + -4079631.068271681], [794193.1066017178, -4079651.893398282], [794152.0157050898, + -4079891.280781103], [745042.0157050898, -4268441.280781103], [744982.5, -4268602.5], + [744887.1641292607, -4268586.692961479]]] + grids: + default: + shape: [7941, 7901] + transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] + panchromatic: + shape: [15881, 15801] + transform: [15.0, 0.0, 557392.5, 0.0, -15.0, -4030492.5, 0.0, 0.0, 1.0] + + properties: + datetime: 2016-05-12 23:50:37.621730Z + dea:dataset_maturity: final + dtr:end_datetime: 2016-05-12 23:50:52.031499Z + dtr:start_datetime: 2016-05-12 23:50:23.054165Z + eo:cloud_cover: 58.910716655901616 + eo:gsd: 15.0 # Ground sample distance (m) + eo:instrument: OLI_TIRS + eo:platform: landsat-8 + eo:sun_azimuth: 34.58516815 + eo:sun_elevation: 26.29614366 + fmask:clear: 22.3973998672305 + fmask:cloud: 58.910716655901616 + fmask:cloud_shadow: 1.3150997463296996 + fmask:snow: 0.0006217170293219306 + fmask:water: 17.376162013508864 + gqa:abs_iterative_mean_x: 0.19 + gqa:abs_iterative_mean_xy: 0.25 + gqa:abs_iterative_mean_y: 0.16 + gqa:abs_x: 0.43 + gqa:abs_xy: 0.51 + gqa:abs_y: 0.28 + gqa:cep90: 0.49 + gqa:iterative_mean_x: -0.1 + gqa:iterative_mean_xy: 0.15 + gqa:iterative_mean_y: 0.11 + gqa:iterative_stddev_x: 0.24 + gqa:iterative_stddev_xy: 0.3 + gqa:iterative_stddev_y: 0.17 + gqa:mean_x: -0.1 + gqa:mean_xy: 0.11 + gqa:mean_y: 0.05 + gqa:stddev_x: 1.15 + gqa:stddev_xy: 1.28 + gqa:stddev_y: 0.56 + landsat:collection_category: T1 + landsat:collection_number: 1 + landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 + landsat:landsat_scene_id: LC80900862016133LGN02 + landsat:wrs_path: 90 + landsat:wrs_row: 86 + odc:dataset_version: 3.0.0 + odc:file_format: GeoTIFF + odc:processing_datetime: 2019-10-07 20:19:19.218290Z + odc:producer: ga.gov.au + odc:product_family: ard + odc:region_code: '090086' + + measurements: + nbart_blue: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band02.tif + nbart_coastal_aerosol: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band01.tif + nbart_green: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band03.tif + nbart_nir: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band05.tif + nbart_panchromatic: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band08.tif + grid: panchromatic + nbart_red: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band04.tif + nbart_swir_1: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band06.tif + nbart_swir_2: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band07.tif + oa_azimuthal_exiting: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-exiting.tif + oa_azimuthal_incident: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-incident.tif + oa_combined_terrain_shadow: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_combined-terrain-shadow.tif + oa_exiting_angle: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_exiting-angle.tif + oa_fmask: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_fmask.tif + oa_incident_angle: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_incident-angle.tif + oa_nbart_contiguity: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_nbart-contiguity.tif + oa_relative_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-azimuth.tif + oa_relative_slope: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-slope.tif + oa_satellite_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-azimuth.tif + oa_satellite_view: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-view.tif + oa_solar_azimuth: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-azimuth.tif + oa_solar_zenith: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-zenith.tif + oa_time_delta: + path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_time-delta.tif + + accessories: + thumbnail:nbart: + path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_thumbnail.jpg + checksum:sha1: + path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.sha1 + metadata:processor: + path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.proc-info.yaml +... diff -Nru datacube-1.8.7/integration_tests/data/memory/ard_ls8.odc-product.yaml datacube-1.8.9/integration_tests/data/memory/ard_ls8.odc-product.yaml --- datacube-1.8.7/integration_tests/data/memory/ard_ls8.odc-product.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/memory/ard_ls8.odc-product.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,181 +0,0 @@ ---- -name: ga_ls8c_ard_3 -description: Geoscience Australia Landsat 8 Operational Land Imager and Thermal Infra-Red Scanner Analysis Ready Data Collection 3 -metadata_type: eo3_landsat_ard - -license: CC-BY-4.0 - -metadata: - product: - name: ga_ls8c_ard_3 - properties: - eo:platform: landsat-8 - eo:instrument: OLI_TIRS - odc:product_family: ard - odc:producer: ga.gov.au - -measurements: - # NBART - - name: nbart_coastal_aerosol - aliases: - - nbart_band01 - - coastal_aerosol - dtype: int16 - nodata: -999 - units: '1' - - name: nbart_blue - aliases: - - nbart_band02 - - blue - dtype: int16 - nodata: -999 - units: '1' - - name: nbart_green - aliases: - - nbart_band03 - - green - dtype: int16 - nodata: -999 - units: '1' - - name: nbart_red - aliases: - - nbart_band04 - - red - dtype: int16 - nodata: -999 - units: '1' - - name: nbart_nir - aliases: - - nbart_band05 - - nir - dtype: int16 - nodata: -999 - units: '1' - - name: nbart_swir_1 - aliases: - - nbart_band06 - - swir_1 - # Requested for backwards compatibility with previous collection - - swir1 - dtype: int16 - nodata: -999 - units: '1' - - name: nbart_swir_2 - aliases: - - nbart_band07 - - swir_2 - # Requested for backwards compatibility with previous collection - - swir2 - dtype: int16 - nodata: -999 - units: '1' - - name: nbart_panchromatic - aliases: - - nbart_band08 - - panchromatic - dtype: int16 - nodata: -999 - units: '1' - - # Observation Attributes - - name: oa_fmask - aliases: - - fmask - dtype: uint8 - nodata: 0 - units: '1' - flags_definition: - fmask: - bits: [0, 1, 2, 3, 4, 5, 6, 7] - description: Fmask - values: - '0': nodata - '1': valid - '2': cloud - '3': shadow - '4': snow - '5': water - - name: oa_nbart_contiguity - aliases: - - nbart_contiguity - dtype: uint8 - nodata: 255 - units: '1' - flags_definition: - contiguous: - bits: [0] - values: - '1': true - '0': false - - name: oa_azimuthal_exiting - aliases: - - azimuthal_exiting - dtype: float32 - nodata: .nan - units: '1' - - name: oa_azimuthal_incident - aliases: - - azimuthal_incident - dtype: float32 - nodata: .nan - units: '1' - - name: oa_combined_terrain_shadow - aliases: - - combined_terrain_shadow - dtype: uint8 - nodata: 255 - units: '1' - - name: oa_exiting_angle - aliases: - - exiting_angle - dtype: float32 - nodata: .nan - units: '1' - - name: oa_incident_angle - aliases: - - incident_angle - dtype: float32 - nodata: .nan - units: '1' - - name: oa_relative_azimuth - aliases: - - relative_azimuth - dtype: float32 - nodata: .nan - units: '1' - - name: oa_relative_slope - aliases: - - relative_slope - dtype: float32 - nodata: .nan - units: '1' - - name: oa_satellite_azimuth - aliases: - - satellite_azimuth - dtype: float32 - nodata: .nan - units: '1' - - name: oa_satellite_view - aliases: - - satellite_view - dtype: float32 - nodata: .nan - units: '1' - - name: oa_solar_azimuth - aliases: - - solar_azimuth - dtype: float32 - nodata: .nan - units: '1' - - name: oa_solar_zenith - aliases: - - solar_zenith - dtype: float32 - nodata: .nan - units: '1' - - name: oa_time_delta - aliases: - - time_delta - dtype: float32 - nodata: .nan - units: '1' diff -Nru datacube-1.8.7/integration_tests/data/memory/eo3_landsat_ard.odc-type.yaml datacube-1.8.9/integration_tests/data/memory/eo3_landsat_ard.odc-type.yaml --- datacube-1.8.7/integration_tests/data/memory/eo3_landsat_ard.odc-type.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/memory/eo3_landsat_ard.odc-type.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,298 +0,0 @@ ---- -name: eo3_landsat_ard -description: EO3 for ARD Landsat Collection 3 -dataset: - id: [id] # No longer configurable in newer ODCs. - sources: [lineage, source_datasets] # No longer configurable in newer ODCs. - - grid_spatial: [grid_spatial, projection] - measurements: [measurements] - creation_dt: [properties, 'odc:processing_datetime'] - label: [label] - format: [properties, 'odc:file_format'] - - search_fields: - platform: - description: Platform code - offset: [properties, 'eo:platform'] - indexed: false - - instrument: - description: Instrument name - offset: [properties, 'eo:instrument'] - indexed: false - - product_family: - description: Product family code - offset: [properties, 'odc:product_family'] - indexed: false - - region_code: - description: > - Spatial reference code from the provider. - For Landsat region_code is a scene path row: - '{:03d}{:03d}.format(path,row)' - For Sentinel it is MGRS code. - In general it is a unique string identifier - that datasets covering roughly the same spatial - region share. - - offset: [properties, 'odc:region_code'] - - crs_raw: - description: The raw CRS string as it appears in metadata - offset: ['crs'] - indexed: false - - dataset_maturity: - description: One of - final|interim|nrt (near real time) - offset: [properties, 'dea:dataset_maturity'] - - gqa: - description: GQA Circular error probable (90%) - type: double - offset: [properties, 'gqa:cep90'] - - cloud_cover: - description: Cloud cover percentage [0, 100] - type: double - offset: [properties, 'eo:cloud_cover'] - - time: - description: Acquisition time range - type: datetime-range - min_offset: - - [properties, 'dtr:start_datetime'] - - [properties, datetime] - max_offset: - - [properties, 'dtr:end_datetime'] - - [properties, datetime] - - # LonLat bounding box, generated on the fly from: - # `grids`, `crs` and `geometry` of the new metadata format - # - # Bounding box is defined by two ranges: - # [lon.begin, lon.end] -- Longitude - # [lat.begin, lat.end] -- Latitude - # - # Note that STAC is using `bbox` for the same thing as following: - # - # bbox: [left, bottom, right, top] - # 0 1 2 3 - # lon lat lon lat - # - # But MetadataType does not support integer index keys, so... - # BoundingBox: [lon.begin, lat.begin, lon.end, lat.end] - - lon: - description: Longitude range - type: double-range - min_offset: - - [extent, lon, begin] - max_offset: - - [extent, lon, end] - - lat: - description: Latitude range - type: double-range - min_offset: - - [extent, lat, begin] - max_offset: - - [extent, lat, end] - - landsat_product_id: - description: Landsat Product ID - indexed: false - offset: - - properties - - landsat:landsat_product_id - - # semi-auto generated below - eo_gsd: - description: Ground sample distance, meters - indexed: false - offset: - - properties - - eo:gsd - type: double - eo_sun_azimuth: - description: 'TODO: ' - indexed: false - offset: - - properties - - eo:sun_azimuth - type: double - eo_sun_elevation: - description: 'TODO: ' - indexed: false - offset: - - properties - - eo:sun_elevation - type: double - fmask_clear: - description: 'TODO: ' - indexed: false - offset: - - properties - - fmask:clear - type: double - fmask_cloud_shadow: - description: 'TODO: ' - indexed: false - offset: - - properties - - fmask:cloud_shadow - type: double - fmask_snow: - description: 'TODO: ' - indexed: false - offset: - - properties - - fmask:snow - type: double - fmask_water: - description: 'TODO: ' - indexed: false - offset: - - properties - - fmask:water - type: double - gqa_abs_iterative_mean_x: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:abs_iterative_mean_x - type: double - gqa_abs_iterative_mean_xy: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:abs_iterative_mean_xy - type: double - gqa_abs_iterative_mean_y: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:abs_iterative_mean_y - type: double - gqa_abs_x: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:abs_x - type: double - gqa_abs_xy: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:abs_xy - type: double - gqa_abs_y: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:abs_y - type: double - gqa_cep90: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:cep90 - type: double - gqa_iterative_mean_x: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:iterative_mean_x - type: double - gqa_iterative_mean_xy: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:iterative_mean_xy - type: double - gqa_iterative_mean_y: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:iterative_mean_y - type: double - gqa_iterative_stddev_x: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:iterative_stddev_x - type: double - gqa_iterative_stddev_xy: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:iterative_stddev_xy - type: double - gqa_iterative_stddev_y: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:iterative_stddev_y - type: double - gqa_mean_x: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:mean_x - type: double - gqa_mean_xy: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:mean_xy - type: double - gqa_mean_y: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:mean_y - type: double - gqa_stddev_x: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:stddev_x - type: double - gqa_stddev_xy: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:stddev_xy - type: double - gqa_stddev_y: - description: 'TODO: ' - indexed: false - offset: - - properties - - gqa:stddev_y - type: double - landsat_scene_id: - description: Landsat Scene ID - indexed: false - offset: - - properties - - landsat:landsat_scene_id diff -Nru datacube-1.8.7/integration_tests/data/memory/ga_ls_wo_3.odc-product.yaml datacube-1.8.9/integration_tests/data/memory/ga_ls_wo_3.odc-product.yaml --- datacube-1.8.7/integration_tests/data/memory/ga_ls_wo_3.odc-product.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/memory/ga_ls_wo_3.odc-product.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,59 +0,0 @@ -name: ga_ls_wo_3 -description: Geoscience Australia Landsat Water Observations Collection 3 -metadata_type: eo3 - -license: CC-BY-4.0 - -metadata: - properties: - odc:file_format: GeoTIFF - odc:product_family: wo - product: - name: ga_ls_wo_3 - -measurements: - - name: water - dtype: uint8 - nodata: 1 - units: '1' - flags_definition: - dry: - bits: [7, 6, 5, 4, 3, 2, 1, 0] - description: No water detected - values: {0: true} - nodata: - bits: 0 - description: No data - values: {0: false, 1: true} - noncontiguous: - bits: 1 - description: At least one EO band is missing or saturated - values: {0: false, 1: true} - low_solar_angle: - bits: 2 - description: Low solar incidence angle - values: {0: false, 1: true} - terrain_shadow: - bits: 3 - description: Terrain shadow - values: {0: false, 1: true} - high_slope: - bits: 4 - description: High slope - values: {0: false, 1: true} - cloud_shadow: - bits: 5 - description: Cloud shadow - values: {0: false, 1: true} - cloud: - bits: 6 - description: Cloudy - values: {0: false, 1: true} - water_observed: - bits: 7 - description: Classified as water by the decision tree - values: {0: false, 1: true} - wet: - bits: [7, 6, 5, 4, 3, 2, 1, 0] - description: Clear and Wet - values: {128: true} diff -Nru datacube-1.8.7/integration_tests/data/memory/ls8_dataset.yaml datacube-1.8.9/integration_tests/data/memory/ls8_dataset.yaml --- datacube-1.8.7/integration_tests/data/memory/ls8_dataset.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/memory/ls8_dataset.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,133 +0,0 @@ ---- -# Dataset -$schema: https://schemas.opendatacube.org/dataset -id: c21648b1-a6fa-4de0-9dc3-9c445d8b295a - -label: ga_ls8c_ard_3-0-0_090086_2016-05-12_final -product: - name: ga_ls8c_ard_3 - -crs: epsg:32655 -geometry: - type: Polygon - coordinates: [[[744887.1641292607, -4268586.692961479], [744885.0, -4268595.0], - [558427.4207787118, -4219934.026804933], [557855.5131670195, -4219783.460498941], - [557709.1155350845, -4219707.510894631], [557632.5, -4219687.5], [557812.9841364821, - -4218918.719827209], [565387.9864373036, -4189848.711003054], [575002.9904772383, - -4153068.6955619035], [602962.9904772383, -4046433.6955619035], [606937.9957531218, - -4031358.675497092], [607088.0285426815, -4030803.553238913], [607117.5, -4030762.5], - [687286.8455741366, -4051713.551531808], [794115.0, -4079625.0], [794113.4191087746, - -4079631.068271681], [794193.1066017178, -4079651.893398282], [794152.0157050898, - -4079891.280781103], [745042.0157050898, -4268441.280781103], [744982.5, -4268602.5], - [744887.1641292607, -4268586.692961479]]] -grids: - default: - shape: [7941, 7901] - transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] - panchromatic: - shape: [15881, 15801] - transform: [15.0, 0.0, 557392.5, 0.0, -15.0, -4030492.5, 0.0, 0.0, 1.0] - -properties: - datetime: 2016-05-12 23:50:37.621730Z - dea:dataset_maturity: final - dtr:end_datetime: 2016-05-12 23:50:52.031499Z - dtr:start_datetime: 2016-05-12 23:50:23.054165Z - eo:cloud_cover: 58.910716655901616 - eo:gsd: 15.0 # Ground sample distance (m) - eo:instrument: OLI_TIRS - eo:platform: landsat-8 - eo:sun_azimuth: 34.58516815 - eo:sun_elevation: 26.29614366 - fmask:clear: 22.3973998672305 - fmask:cloud: 58.910716655901616 - fmask:cloud_shadow: 1.3150997463296996 - fmask:snow: 0.0006217170293219306 - fmask:water: 17.376162013508864 - gqa:abs_iterative_mean_x: 0.19 - gqa:abs_iterative_mean_xy: 0.25 - gqa:abs_iterative_mean_y: 0.16 - gqa:abs_x: 0.43 - gqa:abs_xy: 0.51 - gqa:abs_y: 0.28 - gqa:cep90: 0.49 - gqa:iterative_mean_x: -0.1 - gqa:iterative_mean_xy: 0.15 - gqa:iterative_mean_y: 0.11 - gqa:iterative_stddev_x: 0.24 - gqa:iterative_stddev_xy: 0.3 - gqa:iterative_stddev_y: 0.17 - gqa:mean_x: -0.1 - gqa:mean_xy: 0.11 - gqa:mean_y: 0.05 - gqa:stddev_x: 1.15 - gqa:stddev_xy: 1.28 - gqa:stddev_y: 0.56 - landsat:collection_category: T1 - landsat:collection_number: 1 - landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 - landsat:landsat_scene_id: LC80900862016133LGN02 - landsat:wrs_path: 90 - landsat:wrs_row: 86 - odc:dataset_version: 3.0.0 - odc:file_format: GeoTIFF - odc:processing_datetime: 2019-10-07 20:19:19.218290Z - odc:producer: ga.gov.au - odc:product_family: ard - odc:region_code: '090086' - -measurements: - nbart_blue: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band02.tif - nbart_coastal_aerosol: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band01.tif - nbart_green: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band03.tif - nbart_nir: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band05.tif - nbart_panchromatic: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band08.tif - grid: panchromatic - nbart_red: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band04.tif - nbart_swir_1: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band06.tif - nbart_swir_2: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band07.tif - oa_azimuthal_exiting: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-exiting.tif - oa_azimuthal_incident: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-incident.tif - oa_combined_terrain_shadow: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_combined-terrain-shadow.tif - oa_exiting_angle: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_exiting-angle.tif - oa_fmask: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_fmask.tif - oa_incident_angle: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_incident-angle.tif - oa_nbart_contiguity: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_nbart-contiguity.tif - oa_relative_azimuth: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-azimuth.tif - oa_relative_slope: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-slope.tif - oa_satellite_azimuth: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-azimuth.tif - oa_satellite_view: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-view.tif - oa_solar_azimuth: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-azimuth.tif - oa_solar_zenith: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-zenith.tif - oa_time_delta: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_time-delta.tif - -accessories: - thumbnail:nbart: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_thumbnail.jpg - checksum:sha1: - path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.sha1 - metadata:processor: - path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.proc-info.yaml -... diff -Nru datacube-1.8.7/integration_tests/data/memory/wo_dataset.yaml datacube-1.8.9/integration_tests/data/memory/wo_dataset.yaml --- datacube-1.8.7/integration_tests/data/memory/wo_dataset.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/memory/wo_dataset.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,94 +0,0 @@ ---- -# Dataset -$schema: https://schemas.opendatacube.org/dataset -id: 8ab655e0-2269-552d-9990-1a19ffbc4933 - -label: ga_ls_wo_3_090086_2016-05-12_final -product: - name: ga_ls_wo_3 - -crs: epsg:32655 -geometry: - type: Polygon - coordinates: [[[744887.1641292616, -4268586.69296148], [744884.9999999991, -4268595.0], - [558427.4207787119, -4219934.026804932], [557855.5131670194, -4219783.46049894], - [557709.1155350873, -4219707.51089463], [557632.5000000001, -4219687.5], [557812.9841364851, - -4218918.719827209], [565387.986437303, -4189848.7110030535], [575002.9904772384, - -4153068.6955619035], [602962.9904772406, -4046433.6955619026], [606937.9957531227, - -4031358.6754970923], [607088.0285426822, -4030803.553238912], [607117.4999999999, - -4030762.5000000005], [687286.8455741361, -4051713.5515318085], [794114.9999999988, - -4079624.9999999995], [794113.4191087743, -4079631.06827168], [794193.1066017186, - -4079651.8933982817], [794152.0157050926, -4079891.280781102], [745042.0157050904, - -4268441.280781103], [744982.499999999, -4268602.499999999], [744887.1641292616, - -4268586.69296148]]] -grids: - default: - shape: [7941, 7901] - transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] - -properties: - datetime: 2016-05-12 23:50:37.621730Z - dea:dataset_maturity: final - dtr:end_datetime: 2016-05-12 23:50:52.031499Z - dtr:start_datetime: 2016-05-12 23:50:23.054165Z - eo:cloud_cover: 58.910716655901616 - eo:gsd: 30.0 # Ground sample distance (m) - eo:instrument: OLI_TIRS - eo:platform: landsat-8 - eo:sun_azimuth: 34.58516815 - eo:sun_elevation: 26.29614366 - fmask:clear: 22.3973998672305 - fmask:cloud: 58.910716655901616 - fmask:cloud_shadow: 1.3150997463296996 - fmask:snow: 0.0006217170293219306 - fmask:water: 17.376162013508864 - gqa:abs_iterative_mean_x: 0.19 - gqa:abs_iterative_mean_xy: 0.25 - gqa:abs_iterative_mean_y: 0.16 - gqa:abs_x: 0.43 - gqa:abs_xy: 0.51 - gqa:abs_y: 0.28 - gqa:cep90: 0.49 - gqa:iterative_mean_x: -0.1 - gqa:iterative_mean_xy: 0.15 - gqa:iterative_mean_y: 0.11 - gqa:iterative_stddev_x: 0.24 - gqa:iterative_stddev_xy: 0.3 - gqa:iterative_stddev_y: 0.17 - gqa:mean_x: -0.1 - gqa:mean_xy: 0.11 - gqa:mean_y: 0.05 - gqa:stddev_x: 1.15 - gqa:stddev_xy: 1.28 - gqa:stddev_y: 0.56 - landsat:collection_category: T1 - landsat:collection_number: 1 - landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 - landsat:landsat_scene_id: LC80900862016133LGN02 - landsat:wrs_path: 90 - landsat:wrs_row: 86 - odc:collection_number: 3 - odc:dataset_version: 1.6.0 - odc:file_format: GeoTIFF - odc:naming_conventions: dea_c3 - odc:processing_datetime: 2021-05-19 13:50:41.632832Z - odc:producer: ga.gov.au - odc:product_family: wo - odc:region_code: '090086' - -measurements: - water: - path: ga_ls_wo_3_090086_2016-05-12_final_water.tif - -accessories: - thumbnail: - path: ga_ls_wo_3_090086_2016-05-12_final_thumbnail.jpg - checksum:sha1: - path: ga_ls_wo_3_090086_2016-05-12_final.sha1 - metadata:processor: - path: ga_ls_wo_3_090086_2016-05-12_final.proc-info.yaml - -lineage: - ard: - - c21648b1-a6fa-4de0-9dc3-9c445d8b295a -... diff -Nru datacube-1.8.7/integration_tests/data/memory/wo_ds_with_lineage.odc-metadata.yaml datacube-1.8.9/integration_tests/data/memory/wo_ds_with_lineage.odc-metadata.yaml --- datacube-1.8.7/integration_tests/data/memory/wo_ds_with_lineage.odc-metadata.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/data/memory/wo_ds_with_lineage.odc-metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,222 +0,0 @@ ---- -# Dataset -$schema: https://schemas.opendatacube.org/dataset -id: 8ab655e0-2269-552d-9990-1a19ffbc4933 - -label: ga_ls_wo_3_090086_2016-05-12_final -product: - name: ga_ls_wo_3 - -crs: epsg:32655 -geometry: - type: Polygon - coordinates: [[[744887.1641292616, -4268586.69296148], [744884.9999999991, -4268595.0], - [558427.4207787119, -4219934.026804932], [557855.5131670194, -4219783.46049894], - [557709.1155350873, -4219707.51089463], [557632.5000000001, -4219687.5], [557812.9841364851, - -4218918.719827209], [565387.986437303, -4189848.7110030535], [575002.9904772384, - -4153068.6955619035], [602962.9904772406, -4046433.6955619026], [606937.9957531227, - -4031358.6754970923], [607088.0285426822, -4030803.553238912], [607117.4999999999, - -4030762.5000000005], [687286.8455741361, -4051713.5515318085], [794114.9999999988, - -4079624.9999999995], [794113.4191087743, -4079631.06827168], [794193.1066017186, - -4079651.8933982817], [794152.0157050926, -4079891.280781102], [745042.0157050904, - -4268441.280781103], [744982.499999999, -4268602.499999999], [744887.1641292616, - -4268586.69296148]]] -grids: - default: - shape: [7941, 7901] - transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] - -properties: - datetime: 2016-05-12 23:50:37.621730Z - dea:dataset_maturity: final - dtr:end_datetime: 2016-05-12 23:50:52.031499Z - dtr:start_datetime: 2016-05-12 23:50:23.054165Z - eo:cloud_cover: 58.910716655901616 - eo:gsd: 30.0 # Ground sample distance (m) - eo:instrument: OLI_TIRS - eo:platform: landsat-8 - eo:sun_azimuth: 34.58516815 - eo:sun_elevation: 26.29614366 - fmask:clear: 22.3973998672305 - fmask:cloud: 58.910716655901616 - fmask:cloud_shadow: 1.3150997463296996 - fmask:snow: 0.0006217170293219306 - fmask:water: 17.376162013508864 - gqa:abs_iterative_mean_x: 0.19 - gqa:abs_iterative_mean_xy: 0.25 - gqa:abs_iterative_mean_y: 0.16 - gqa:abs_x: 0.43 - gqa:abs_xy: 0.51 - gqa:abs_y: 0.28 - gqa:cep90: 0.49 - gqa:iterative_mean_x: -0.1 - gqa:iterative_mean_xy: 0.15 - gqa:iterative_mean_y: 0.11 - gqa:iterative_stddev_x: 0.24 - gqa:iterative_stddev_xy: 0.3 - gqa:iterative_stddev_y: 0.17 - gqa:mean_x: -0.1 - gqa:mean_xy: 0.11 - gqa:mean_y: 0.05 - gqa:stddev_x: 1.15 - gqa:stddev_xy: 1.28 - gqa:stddev_y: 0.56 - landsat:collection_category: T1 - landsat:collection_number: 1 - landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 - landsat:landsat_scene_id: LC80900862016133LGN02 - landsat:wrs_path: 90 - landsat:wrs_row: 86 - odc:collection_number: 3 - odc:dataset_version: 1.6.0 - odc:file_format: GeoTIFF - odc:naming_conventions: dea_c3 - odc:processing_datetime: 2021-05-19 13:50:41.632832Z - odc:producer: ga.gov.au - odc:product_family: wo - odc:region_code: '090086' - -measurements: - water: - path: ga_ls_wo_3_090086_2016-05-12_final_water.tif - -accessories: - thumbnail: - path: ga_ls_wo_3_090086_2016-05-12_final_thumbnail.jpg - checksum:sha1: - path: ga_ls_wo_3_090086_2016-05-12_final.sha1 - metadata:processor: - path: ga_ls_wo_3_090086_2016-05-12_final.proc-info.yaml - -lineage: - ard: - id: c21648b1-a6fa-4de0-9dc3-9c445d8b295a - - label: ga_ls8c_ard_3-0-0_090086_2016-05-12_final - product: - name: ga_ls8c_ard_3 - - crs: epsg:32655 - geometry: - type: Polygon - coordinates: [[[744887.1641292607, -4268586.692961479], [744885.0, -4268595.0], - [558427.4207787118, -4219934.026804933], [557855.5131670195, -4219783.460498941], - [557709.1155350845, -4219707.510894631], [557632.5, -4219687.5], [557812.9841364821, - -4218918.719827209], [565387.9864373036, -4189848.711003054], [575002.9904772383, - -4153068.6955619035], [602962.9904772383, -4046433.6955619035], [606937.9957531218, - -4031358.675497092], [607088.0285426815, -4030803.553238913], [607117.5, -4030762.5], - [687286.8455741366, -4051713.551531808], [794115.0, -4079625.0], [794113.4191087746, - -4079631.068271681], [794193.1066017178, -4079651.893398282], [794152.0157050898, - -4079891.280781103], [745042.0157050898, -4268441.280781103], [744982.5, -4268602.5], - [744887.1641292607, -4268586.692961479]]] - grids: - default: - shape: [7941, 7901] - transform: [30.0, 0.0, 557385.0, 0.0, -30.0, -4030485.0, 0.0, 0.0, 1.0] - panchromatic: - shape: [15881, 15801] - transform: [15.0, 0.0, 557392.5, 0.0, -15.0, -4030492.5, 0.0, 0.0, 1.0] - - properties: - datetime: 2016-05-12 23:50:37.621730Z - dea:dataset_maturity: final - dtr:end_datetime: 2016-05-12 23:50:52.031499Z - dtr:start_datetime: 2016-05-12 23:50:23.054165Z - eo:cloud_cover: 58.910716655901616 - eo:gsd: 15.0 # Ground sample distance (m) - eo:instrument: OLI_TIRS - eo:platform: landsat-8 - eo:sun_azimuth: 34.58516815 - eo:sun_elevation: 26.29614366 - fmask:clear: 22.3973998672305 - fmask:cloud: 58.910716655901616 - fmask:cloud_shadow: 1.3150997463296996 - fmask:snow: 0.0006217170293219306 - fmask:water: 17.376162013508864 - gqa:abs_iterative_mean_x: 0.19 - gqa:abs_iterative_mean_xy: 0.25 - gqa:abs_iterative_mean_y: 0.16 - gqa:abs_x: 0.43 - gqa:abs_xy: 0.51 - gqa:abs_y: 0.28 - gqa:cep90: 0.49 - gqa:iterative_mean_x: -0.1 - gqa:iterative_mean_xy: 0.15 - gqa:iterative_mean_y: 0.11 - gqa:iterative_stddev_x: 0.24 - gqa:iterative_stddev_xy: 0.3 - gqa:iterative_stddev_y: 0.17 - gqa:mean_x: -0.1 - gqa:mean_xy: 0.11 - gqa:mean_y: 0.05 - gqa:stddev_x: 1.15 - gqa:stddev_xy: 1.28 - gqa:stddev_y: 0.56 - landsat:collection_category: T1 - landsat:collection_number: 1 - landsat:landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1 - landsat:landsat_scene_id: LC80900862016133LGN02 - landsat:wrs_path: 90 - landsat:wrs_row: 86 - odc:dataset_version: 3.0.0 - odc:file_format: GeoTIFF - odc:processing_datetime: 2019-10-07 20:19:19.218290Z - odc:producer: ga.gov.au - odc:product_family: ard - odc:region_code: '090086' - - measurements: - nbart_blue: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band02.tif - nbart_coastal_aerosol: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band01.tif - nbart_green: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band03.tif - nbart_nir: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band05.tif - nbart_panchromatic: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band08.tif - grid: panchromatic - nbart_red: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band04.tif - nbart_swir_1: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band06.tif - nbart_swir_2: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_band07.tif - oa_azimuthal_exiting: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-exiting.tif - oa_azimuthal_incident: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_azimuthal-incident.tif - oa_combined_terrain_shadow: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_combined-terrain-shadow.tif - oa_exiting_angle: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_exiting-angle.tif - oa_fmask: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_fmask.tif - oa_incident_angle: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_incident-angle.tif - oa_nbart_contiguity: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_nbart-contiguity.tif - oa_relative_azimuth: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-azimuth.tif - oa_relative_slope: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_relative-slope.tif - oa_satellite_azimuth: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-azimuth.tif - oa_satellite_view: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_satellite-view.tif - oa_solar_azimuth: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-azimuth.tif - oa_solar_zenith: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_solar-zenith.tif - oa_time_delta: - path: ga_ls8c_oa_3-0-0_090086_2016-05-12_final_time-delta.tif - - accessories: - thumbnail:nbart: - path: ga_ls8c_nbart_3-0-0_090086_2016-05-12_final_thumbnail.jpg - checksum:sha1: - path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.sha1 - metadata:processor: - path: ga_ls8c_ard_3-0-0_090086_2016-05-12_final.proc-info.yaml -... \ No newline at end of file diff -Nru datacube-1.8.7/integration_tests/index/search_utils.py datacube-1.8.9/integration_tests/index/search_utils.py --- datacube-1.8.7/integration_tests/index/search_utils.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/search_utils.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,47 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2022 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 +import csv +import io +from typing import Iterable, Tuple, Dict, List + +from dateutil import tz + +import datacube.scripts.search_tool +from datacube.model import Product, Dataset + + +def _load_product_query( + lazy_results: Iterable[Tuple[Product, Iterable[Dataset]]] +) -> Dict[str, List[Dataset]]: + """ + search_by_product() returns two levels of laziness. load them all into memory + for easy comparison/counts + """ + products = {} # type: Dict[str, List[Dataset]] + for product, datasets in lazy_results: + assert product.name not in products, "search_by_product() returned a product twice" + products[product.name] = list(datasets) + return products + + +def assume_utc(d): + if d.tzinfo is None: + return d.replace(tzinfo=tz.tzutc()) + else: + return d.astimezone(tz.tzutc()) + + +def _csv_search_raw(args, clirunner): + # Do a CSV search from the cli, returning output as a string + result = clirunner(['-f', 'csv'] + list(args), cli_method=datacube.scripts.search_tool.cli, verbose_flag=False) + output = result.output + output_lines = output.split("\n") + return "\n".join(line for line in output_lines if "WARNING" not in line) + + +def _cli_csv_search(args, clirunner): + # Do a CSV search from the cli, returning results as a list of dictionaries + output = _csv_search_raw(args, clirunner) + return list(csv.DictReader(io.StringIO(output))) diff -Nru datacube-1.8.7/integration_tests/index/test_config_docs.py datacube-1.8.9/integration_tests/index/test_config_docs.py --- datacube-1.8.7/integration_tests/index/test_config_docs.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_config_docs.py 2022-11-17 00:47:28.000000000 +0000 @@ -53,19 +53,21 @@ } -def test_metadata_indexes_views_exist(initialised_postgres_db, default_metadata_type): +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_metadata_indexes_views_exist(index, default_metadata_type): """ :type initialised_postgres_db: datacube.drivers.postgres._connections.PostgresDb :type default_metadata_type: datacube.model.MetadataType """ # Metadata indexes should no longer exist. - assert not _object_exists(initialised_postgres_db, 'dix_eo_platform') + assert not _object_exists(index, 'dix_eo_platform') # Ensure view was created (following naming conventions) - assert _object_exists(initialised_postgres_db, 'dv_eo_dataset') + assert _object_exists(index, 'dv_eo_dataset') -def test_dataset_indexes_views_exist(initialised_postgres_db, ls5_telem_type): +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_dataset_indexes_views_exist(index, ls5_telem_type): """ :type initialised_postgres_db: datacube.drivers.postgres._connections.PostgresDb :type ls5_telem_type: datacube.model.DatasetType @@ -73,29 +75,30 @@ assert ls5_telem_type.name == 'ls5_telem_test' # Ensure field indexes were created for the dataset type (following the naming conventions): - assert _object_exists(initialised_postgres_db, "dix_ls5_telem_test_orbit") + assert _object_exists(index, "dix_ls5_telem_test_orbit") # Ensure it does not create a 'platform' index, because that's a fixed field # (ie. identical in every dataset of the type) - assert not _object_exists(initialised_postgres_db, "dix_ls5_telem_test_platform") + assert not _object_exists(index, "dix_ls5_telem_test_platform") # Ensure view was created (following naming conventions) - assert _object_exists(initialised_postgres_db, 'dv_ls5_telem_test_dataset') + assert _object_exists(index, 'dv_ls5_telem_test_dataset') # Ensure view was created (following naming conventions) - assert not _object_exists(initialised_postgres_db, + assert not _object_exists(index, 'dix_ls5_telem_test_gsi'), "indexed=false field gsi shouldn't have an index" -def test_dataset_composite_indexes_exist(initialised_postgres_db, ls5_telem_type): +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_dataset_composite_indexes_exist(index, ls5_telem_type): # This type has fields named lat/lon/time, so composite indexes should now exist for them: # (following the naming conventions) - assert _object_exists(initialised_postgres_db, "dix_ls5_telem_test_sat_path_sat_row_time") + assert _object_exists(index, "dix_ls5_telem_test_sat_path_sat_row_time") # But no individual field indexes for these - assert not _object_exists(initialised_postgres_db, "dix_ls5_telem_test_sat_path") - assert not _object_exists(initialised_postgres_db, "dix_ls5_telem_test_sat_row") - assert not _object_exists(initialised_postgres_db, "dix_ls5_telem_test_time") + assert not _object_exists(index, "dix_ls5_telem_test_sat_path") + assert not _object_exists(index, "dix_ls5_telem_test_sat_row") + assert not _object_exists(index, "dix_ls5_telem_test_time") @pytest.mark.parametrize('datacube_env_name', ('datacube', )) @@ -156,6 +159,7 @@ default_metadata_type: MetadataType, telemetry_metadata_type: MetadataType) -> None: # We're checking for accidental changes here in our field-to-SQL code + # Dubious test as it uses non-EO3 metadata types # If we started outputting a different expression they would quietly no longer match the expression # indexes that exist in our DBs. @@ -206,12 +210,12 @@ ) -def _object_exists(db, index_name): - if db.driver_name == "postgis": +def _object_exists(index, index_name): + if index._db.driver_name == "postgis": schema_name = "odc" else: schema_name = "agdc" - with db.connect() as connection: + with index._active_connection() as connection: val = connection._connection.execute(f"SELECT to_regclass('{schema_name}.{index_name}')").scalar() return val in (index_name, f'{schema_name}.{index_name}') @@ -305,6 +309,7 @@ doc['product_type'] = 'foobar' +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_update_dataset_type(index, ls5_telem_type, ls5_telem_doc, ga_metadata_type_doc): """ :type ls5_telem_type: datacube.model.DatasetType @@ -333,11 +338,11 @@ index.products.update_document(full_doc) # Remove fixed field, forcing a new index to be created (as datasets can now differ for the field). - assert not _object_exists(index._db, 'dix_ls5_telem_test_product_type') + assert not _object_exists(index, 'dix_ls5_telem_test_product_type') del ls5_telem_doc['metadata']['product_type'] index.products.update_document(ls5_telem_doc) # Ensure was updated - assert _object_exists(index._db, 'dix_ls5_telem_test_product_type') + assert _object_exists(index, 'dix_ls5_telem_test_product_type') updated_type = index.products.get_by_name(ls5_telem_type.name) assert updated_type.definition['metadata'] == ls5_telem_doc['metadata'] @@ -474,8 +479,8 @@ index.metadata_types.update_document(different_mt_doc, allow_unsafe_updates=True) updated_type = index.metadata_types.get_by_name(mt_doc['name']) assert ( - isinstance(updated_type.dataset_fields['time'], PgrNumericRangeDocField) - or isinstance(updated_type.dataset_fields['time'], PgsNumericRangeDocField) + isinstance(updated_type.dataset_fields['time'], PgrNumericRangeDocField) + or isinstance(updated_type.dataset_fields['time'], PgsNumericRangeDocField) ) @@ -543,7 +548,8 @@ assert res == [] -def test_update_metadata_type_doc(initialised_postgres_db, index, ls5_telem_type): +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_update_metadata_type_doc(index, ls5_telem_type): type_doc = copy.deepcopy(ls5_telem_type.metadata_type.definition) type_doc['dataset']['search_fields']['test_indexed'] = { 'description': 'indexed test field', @@ -558,5 +564,5 @@ index.metadata_types.update_document(type_doc) assert ls5_telem_type.name == 'ls5_telem_test' - assert _object_exists(initialised_postgres_db, "dix_ls5_telem_test_test_indexed") - assert not _object_exists(initialised_postgres_db, "dix_ls5_telem_test_test_not_indexed") + assert _object_exists(index, "dix_ls5_telem_test_test_indexed") + assert not _object_exists(index, "dix_ls5_telem_test_test_not_indexed") diff -Nru datacube-1.8.7/integration_tests/index/test_index_data.py datacube-1.8.9/integration_tests/index/test_index_data.py --- datacube-1.8.7/integration_tests/index/test_index_data.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_index_data.py 2022-11-17 00:47:28.000000000 +0000 @@ -16,7 +16,6 @@ import pytest from dateutil import tz -from datacube.drivers.postgres import PostgresDb from datacube.index.exceptions import MissingRecordError from datacube.index import Index from datacube.model import Dataset, MetadataType @@ -71,10 +70,10 @@ } -def test_archive_datasets(index, initialised_postgres_db, local_config, default_metadata_type): +def test_archive_datasets(index, local_config, default_metadata_type): dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type) - with initialised_postgres_db.begin() as transaction: - was_inserted = transaction.insert_dataset( + with index.transaction() as transaction: + was_inserted = transaction._connection.insert_dataset( _telemetry_dataset, _telemetry_uuid, dataset_type.id @@ -106,11 +105,11 @@ assert not indexed_dataset.is_archived -def test_purge_datasets(index, initialised_postgres_db, local_config, default_metadata_type, clirunner): +def test_purge_datasets(index, local_config, default_metadata_type, clirunner): # Create dataset dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type) - with initialised_postgres_db.begin() as transaction: - was_inserted = transaction.insert_dataset( + with index.transaction() as transaction: + was_inserted = transaction._connection.insert_dataset( _telemetry_dataset, _telemetry_uuid, dataset_type.id @@ -137,15 +136,15 @@ assert index.datasets.get(_telemetry_uuid) is None -def test_purge_datasets_cli(index, initialised_postgres_db, local_config, default_metadata_type, clirunner): +def test_purge_datasets_cli(index, local_config, default_metadata_type, clirunner): dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type) # Attempt to purge non-existent dataset should fail clirunner(['dataset', 'purge', str(_telemetry_uuid)], expect_success=False) # Create dataset - with initialised_postgres_db.begin() as transaction: - was_inserted = transaction.insert_dataset( + with index.transaction() as transaction: + was_inserted = transaction._connection.insert_dataset( _telemetry_dataset, _telemetry_uuid, dataset_type.id @@ -170,12 +169,12 @@ assert index.datasets.get(_telemetry_uuid) is None -def test_purge_all_datasets_cli(index, initialised_postgres_db, local_config, default_metadata_type, clirunner): +def test_purge_all_datasets_cli(index, local_config, default_metadata_type, clirunner): dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type) # Create dataset - with initialised_postgres_db.begin() as transaction: - was_inserted = transaction.insert_dataset( + with index.transaction() as transaction: + was_inserted = transaction._connection.insert_dataset( _telemetry_dataset, _telemetry_uuid, dataset_type.id @@ -202,12 +201,12 @@ @pytest.fixture -def telemetry_dataset(index: Index, initialised_postgres_db: PostgresDb, default_metadata_type) -> Dataset: +def telemetry_dataset(index: Index, default_metadata_type) -> Dataset: dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type) assert not index.datasets.has(_telemetry_uuid) - with initialised_postgres_db.begin() as transaction: - was_inserted = transaction.insert_dataset( + with index.transaction() as transaction: + was_inserted = transaction._connection.insert_dataset( _telemetry_dataset, _telemetry_uuid, dataset_type.id @@ -217,14 +216,14 @@ return index.datasets.get(_telemetry_uuid) -def test_index_duplicate_dataset(index: Index, initialised_postgres_db: PostgresDb, +def test_index_duplicate_dataset(index: Index, local_config, default_metadata_type) -> None: dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type) assert not index.datasets.has(_telemetry_uuid) - with initialised_postgres_db.begin() as transaction: - was_inserted = transaction.insert_dataset( + with index.transaction() as transaction: + was_inserted = transaction._connection.insert_dataset( _telemetry_dataset, _telemetry_uuid, dataset_type.id @@ -234,7 +233,7 @@ assert index.datasets.has(_telemetry_uuid) # Insert again. - with initialised_postgres_db.connect() as connection: + with index._db._connect() as connection: was_inserted = connection.insert_dataset( _telemetry_dataset, _telemetry_uuid, @@ -273,28 +272,89 @@ 'f226a278-e422-11e6-b501-185e0f80a5c1']) == [] -def test_transactions(index: Index, - initialised_postgres_db: PostgresDb, - local_config, - default_metadata_type) -> None: - assert not index.datasets.has(_telemetry_uuid) - - dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type) - with initialised_postgres_db.begin() as transaction: - was_inserted = transaction.insert_dataset( - _telemetry_dataset, - _telemetry_uuid, - dataset_type.id - ) - assert was_inserted - assert transaction.contains_dataset(_telemetry_uuid) - # Normal DB uses a separate connection: No dataset visible yet. - assert not index.datasets.has(_telemetry_uuid) - - transaction.rollback() - - # Should have been rolled back. - assert not index.datasets.has(_telemetry_uuid) +def test_transactions_api_ctx_mgr(index, + extended_eo3_metadata_type_doc, + ls8_eo3_product, + eo3_ls8_dataset_doc, + eo3_ls8_dataset2_doc): + from datacube.index.hl import Doc2Dataset + resolver = Doc2Dataset(index, products=[ls8_eo3_product.name], verify_lineage=False) + ds1, err = resolver(*eo3_ls8_dataset_doc) + ds2, err = resolver(*eo3_ls8_dataset2_doc) + with pytest.raises(Exception) as e: + with index.transaction() as trans: + assert index.datasets.get(ds1.id) is None + index.datasets.add(ds1) + assert index.datasets.get(ds1.id) is not None + raise Exception("Rollback!") + assert "Rollback!" in str(e.value) + assert index.datasets.get(ds1.id) is None + with index.transaction() as trans: + assert index.datasets.get(ds1.id) is None + index.datasets.add(ds1) + assert index.datasets.get(ds1.id) is not None + assert index.datasets.get(ds1.id) is not None + with index.transaction() as trans: + index.datasets.add(ds2) + assert index.datasets.get(ds2.id) is not None + raise trans.rollback_exception("Rollback") + assert index.datasets.get(ds1.id) is not None + assert index.datasets.get(ds2.id) is None + + +def test_transactions_api_manual(index, + extended_eo3_metadata_type_doc, + ls8_eo3_product, + eo3_ls8_dataset_doc, + eo3_ls8_dataset2_doc): + from datacube.index.hl import Doc2Dataset + resolver = Doc2Dataset(index, products=[ls8_eo3_product.name], verify_lineage=False) + ds1, err = resolver(*eo3_ls8_dataset_doc) + ds2, err = resolver(*eo3_ls8_dataset2_doc) + trans = index.transaction() + index.datasets.add(ds1) + assert index.datasets.get(ds1.id) is not None + trans.begin() + index.datasets.add(ds2) + assert index.datasets.get(ds1.id) is not None + assert index.datasets.get(ds2.id) is not None + trans.rollback() + assert index.datasets.get(ds1.id) is not None + assert index.datasets.get(ds2.id) is None + trans.begin() + index.datasets.add(ds2) + trans.commit() + assert index.datasets.get(ds1.id) is not None + assert index.datasets.get(ds2.id) is not None + + +def test_transactions_api_hybrid(index, + extended_eo3_metadata_type_doc, + ls8_eo3_product, + eo3_ls8_dataset_doc, + eo3_ls8_dataset2_doc): + from datacube.index.hl import Doc2Dataset + resolver = Doc2Dataset(index, products=[ls8_eo3_product.name], verify_lineage=False) + ds1, err = resolver(*eo3_ls8_dataset_doc) + ds2, err = resolver(*eo3_ls8_dataset2_doc) + with index.transaction() as trans: + assert index.datasets.get(ds1.id) is None + index.datasets.add(ds1) + assert index.datasets.get(ds1.id) is not None + trans.rollback() + assert index.datasets.get(ds1.id) is None + trans.begin() + assert index.datasets.get(ds1.id) is None + index.datasets.add(ds1) + assert index.datasets.get(ds1.id) is not None + trans.commit() + assert index.datasets.get(ds1.id) is not None + trans.begin() + index.datasets.add(ds2) + assert index.datasets.get(ds2.id) is not None + trans.rollback() + assert index.datasets.get(ds1.id) is not None + assert index.datasets.get(ds2.id) is None def test_get_missing_things(index: Index) -> None: @@ -305,8 +365,9 @@ missing_thing = index.datasets.get(uuid_, include_sources=False) assert missing_thing is None, "get() should return none when it doesn't exist" - missing_thing = index.datasets.get(uuid_, include_sources=True) - assert missing_thing is None, "get() should return none when it doesn't exist" + if index.supports_lineage: + missing_thing = index.datasets.get(uuid_, include_sources=True) + assert missing_thing is None, "get() should return none when it doesn't exist" id_ = sys.maxsize missing_thing = index.metadata_types.get(id_) @@ -316,6 +377,7 @@ assert missing_thing is None, "get() should return none when it doesn't exist" +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_index_dataset_with_sources(index, default_metadata_type): type_ = index.products.add_document(_pseudo_telemetry_dataset_type) @@ -342,11 +404,8 @@ index.datasets.add(child, with_lineage=True) index.datasets.add(child, with_lineage=False) - # backwards compatibility code path checks, don't use this in normal code - for p in ('skip', 'ensure', 'verify'): - index.datasets.add(child, sources_policy=p) - +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_index_dataset_with_location(index: Index, default_metadata_type: MetadataType): first_file = Path('/tmp/first/something.yaml').absolute() second_file = Path('/tmp/second/something.yaml').absolute() diff -Nru datacube-1.8.7/integration_tests/index/test_memory_index.py datacube-1.8.9/integration_tests/index/test_memory_index.py --- datacube-1.8.7/integration_tests/index/test_memory_index.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_memory_index.py 2022-11-17 00:47:28.000000000 +0000 @@ -5,7 +5,6 @@ import datetime import pytest -from uuid import UUID from datacube.testutils import gen_dataset_test_dag from datacube.utils import InvalidDocException, read_documents, SimpleDocNav @@ -549,7 +548,23 @@ for id_ in ds_ids: assert id_ in loc_ids ds_ = SimpleDocNav(gen_dataset_test_dag(1, force_tree=True)) - assert UUID(ds_.id) in ds_ids + assert ds_.id in ds_ids ds_from_idx = idx.datasets.get(ds_.id, include_sources=True) - assert str(ds_from_idx.sources['ab'].id) == ds_.sources['ab'].id - assert str(ds_from_idx.sources['ac'].sources["cd"].id) == ds_.sources['ac'].sources['cd'].id + assert ds_from_idx.sources['ab'].id == ds_.sources['ab'].id + assert ds_from_idx.sources['ac'].sources["cd"].id == ds_.sources['ac'].sources['cd'].id + + +def test_mem_transactions(mem_index_fresh): + trans = mem_index_fresh.index.transaction() + assert not trans.active + trans.begin() + assert trans.active + trans.commit() + assert not trans.active + trans.begin() + assert mem_index_fresh.index.thread_transaction() == trans + with pytest.raises(ValueError): + trans.begin() + trans.rollback() + assert not trans.active + assert mem_index_fresh.index.thread_transaction() is None diff -Nru datacube-1.8.7/integration_tests/index/test_null_index.py datacube-1.8.9/integration_tests/index/test_null_index.py --- datacube-1.8.7/integration_tests/index/test_null_index.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_null_index.py 2022-11-17 00:47:28.000000000 +0000 @@ -18,7 +18,7 @@ assert "default" in idxs._drivers assert "null" in idxs._drivers with Datacube(config=null_config, validate_connection=True) as dc: - assert(dc.index.url) == "null" + assert dc.index.url == "null" def test_null_user_resource(null_config): @@ -32,7 +32,7 @@ dc.index.users.grant_role("role1", "user1", "user2") -def test_null_user_resource(null_config): +def test_null_metadata_types_resource(null_config): with Datacube(config=null_config, validate_connection=True) as dc: assert dc.index.metadata_types.get_all() == [] with pytest.raises(NotImplementedError) as e: @@ -120,3 +120,20 @@ assert dc.index.datasets.search_summaries(foo="bar", baz=12) == [] assert dc.index.datasets.search_eager(foo="bar", baz=12) == [] assert dc.index.datasets.search_returning_datasets_light(("foo", "baz"), foo="bar", baz=12) == [] + + +def test_null_transactions(null_config): + with Datacube(config=null_config, validate_connection=True) as dc: + trans = dc.index.transaction() + assert not trans.active + trans.begin() + assert trans.active + trans.commit() + assert not trans.active + trans.begin() + assert dc.index.thread_transaction() == trans + with pytest.raises(ValueError): + trans.begin() + trans.rollback() + assert not trans.active + assert dc.index.thread_transaction() is None diff -Nru datacube-1.8.7/integration_tests/index/test_postgis_index.py datacube-1.8.9/integration_tests/index/test_postgis_index.py --- datacube-1.8.7/integration_tests/index/test_postgis_index.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_postgis_index.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,192 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2022 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 +import pytest + +from datacube.model import Range +from datacube.index import Index +from datacube.utils.geometry import CRS + + +@pytest.mark.parametrize('datacube_env_name', ('experimental',)) +def test_create_spatial_index(index: Index): + # Default spatial index for 4326 + assert list(index.spatial_indexes()) == [CRS("EPSG:4326")] + # WKT CRS which cannot be mapped to an EPSG number. + assert not index.create_spatial_index(CRS( + 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]]' + ',PRIMEM["Weird",22.3],UNIT["Degree",0.017453292519943295]]' + )) + assert list(index.spatial_indexes()) == [CRS("EPSG:4326")] + assert index.create_spatial_index(CRS("EPSG:3577")) + assert index.create_spatial_index(CRS("WGS-84")) + assert set(index.spatial_indexes(refresh=True)) == {CRS("EPSG:3577"), CRS("EPSG:4326")} + + +@pytest.mark.parametrize('datacube_env_name', ('experimental',)) +def test_spatial_index_maintain(index: Index, ls8_eo3_product, eo3_ls8_dataset_doc): + index.create_spatial_index(CRS("EPSG:3577")) + assert set(index.spatial_indexes(refresh=True)) == {CRS("EPSG:3577"), CRS("EPSG:4326")} + from datacube.index.hl import Doc2Dataset + resolver = Doc2Dataset(index, products=[ls8_eo3_product.name], verify_lineage=False) + ds, err = resolver(*eo3_ls8_dataset_doc) + assert err is None and ds is not None + ds = index.datasets.add(ds) + assert ds + index.datasets.archive([ds.id]) + index.datasets.purge([ds.id]) + # Can't really read yet, but seems to write at least + + +@pytest.mark.parametrize('datacube_env_name', ('experimental',)) +def test_spatial_index_populate(index: Index, + ls8_eo3_product, + wo_eo3_product, + ls8_eo3_dataset, ls8_eo3_dataset2, + ls8_eo3_dataset3, ls8_eo3_dataset4, + wo_eo3_dataset): + index.create_spatial_index(CRS("EPSG:3577")) + assert set(index.spatial_indexes(refresh=True)) == {CRS("EPSG:3577"), CRS("EPSG:4326")} + assert index.update_spatial_index( + crses=[CRS("EPSG:4326")], + dataset_ids=[ls8_eo3_dataset.id, ls8_eo3_dataset2.id] + ) == 2 + assert index.update_spatial_index(product_names=[ls8_eo3_product.name]) == 8 + assert index.update_spatial_index() == 10 + assert index.update_spatial_index( + crses=[CRS("EPSG:4326")], + product_names=[wo_eo3_product.name], + dataset_ids=[ls8_eo3_dataset.id] + ) == 2 + assert index.update_spatial_index(product_names=[ls8_eo3_product.name], dataset_ids=[ls8_eo3_dataset.id]) == 8 + + +@pytest.mark.parametrize('datacube_env_name', ('experimental',)) +def test_spatial_index_crs_validity(index: Index, + ls8_eo3_product, ls8_eo3_dataset, + africa_s2_eo3_product, africa_eo3_dataset): + epsg4326 = CRS("EPSG:4326") + epsg3577 = CRS("EPSG:3577") + index.create_spatial_index(epsg3577) + assert set(index.spatial_indexes(refresh=True)) == {epsg4326, epsg3577} + assert index.update_spatial_index(crses=[epsg3577]) == 2 + + +def test_spatial_index_crs_santise(): + epsg4326 = CRS("EPSG:4326") + epsg3577 = CRS("EPSG:3577") + from datacube.drivers.postgis._api import PostgisDbAPI + from datacube.utils.geometry import polygon + # EPSG:4326 polygons to be converted in EPSG:3577 + # Equal to entire valid region + entire = polygon(( + (112.85, -43.7), + (112.85, -9.86), + (153.69, -9.86), + (153.69, -43.7), + (112.85, -43.7)), crs=epsg4326) + # inside valid region + valid = polygon(( + (130.15, -25.7), + (130.15, -19.86), + (135.22, -19.86), + (135.22, -25.7), + (130.15, -25.7)), crs=epsg4326) + # completely outside valid region + invalid = polygon(( + (-10.15, 25.7), + (-10.15, 33.86), + (5.22, 33.86), + (5.22, 25.7), + (-10.15, 25.7)), crs=epsg4326) + # intersects valid region + partial = polygon(( + (103.15, -25.7), + (103.15, -19.86), + (135.22, -19.86), + (135.22, -25.7), + (103.15, -25.7)), crs=epsg4326) + + assert PostgisDbAPI._sanitise_extent(entire, epsg3577) == entire.to_crs("EPSG:3577") + assert PostgisDbAPI._sanitise_extent(valid, epsg3577) == valid.to_crs("EPSG:3577") + assert PostgisDbAPI._sanitise_extent(invalid, epsg3577) is None + assert PostgisDbAPI._sanitise_extent(partial, epsg3577).area < partial.to_crs("EPSG:3577").area + + +@pytest.mark.parametrize('datacube_env_name', ('experimental',)) +def test_spatial_extent(index, + ls8_eo3_dataset, ls8_eo3_dataset2, + ls8_eo3_dataset3, ls8_eo3_dataset4, + africa_s2_eo3_product, africa_eo3_dataset): + epsg4326 = CRS("EPSG:4326") + epsg3577 = CRS("EPSG:3577") + index.create_spatial_index(epsg3577) + index.update_spatial_index(crses=[epsg3577]) + ext1 = index.datasets.spatial_extent([ls8_eo3_dataset.id], epsg4326) + ext2 = index.datasets.spatial_extent([ls8_eo3_dataset2.id], epsg4326) + ext12 = index.datasets.spatial_extent([ls8_eo3_dataset.id, ls8_eo3_dataset2.id], epsg4326) + assert ext1 is not None and ext2 is not None and ext12 is not None + assert ext1 == ext2 + assert ext12.difference(ext1).area < 0.001 + assert ls8_eo3_dataset.extent.to_crs(epsg4326).intersects(ext1) + assert ls8_eo3_dataset.extent.to_crs(epsg4326).intersects(ext12) + assert ls8_eo3_dataset2.extent.to_crs(epsg4326).intersects(ext2) + assert ls8_eo3_dataset2.extent.to_crs(epsg4326).intersects(ext12) + extau12 = index.datasets.spatial_extent([ls8_eo3_dataset.id, ls8_eo3_dataset2.id], epsg3577) + extau12africa = index.datasets.spatial_extent( + [ls8_eo3_dataset.id, ls8_eo3_dataset2.id, africa_eo3_dataset.id], + epsg3577 + ) + assert extau12 == extau12africa + ext3 = index.datasets.spatial_extent([ls8_eo3_dataset3.id], epsg4326) + ext1234 = index.datasets.spatial_extent( + [ + ls8_eo3_dataset.id, ls8_eo3_dataset2.id, + ls8_eo3_dataset3.id, ls8_eo3_dataset4.id + ], epsg4326) + assert ext1.difference(ext1234).area < 0.001 + assert ext3.difference(ext1234).area < 0.001 + ext1_3577 = index.datasets.spatial_extent([ls8_eo3_dataset.id], epsg3577) + assert ext1_3577.intersects(ls8_eo3_dataset.extent._to_crs(epsg3577)) + + +@pytest.mark.parametrize('datacube_env_name', ('experimental',)) +def test_spatial_search(index, + ls8_eo3_dataset, ls8_eo3_dataset2, + ls8_eo3_dataset3, ls8_eo3_dataset4): + epsg4326 = CRS("EPSG:4326") + epsg3577 = CRS("EPSG:3577") + index.create_spatial_index(epsg3577) + index.update_spatial_index(crses=[epsg3577]) + # Test old style lat/lon search + dss = index.datasets.search_eager( + product=ls8_eo3_dataset.type.name, + lat=Range(begin=-37.5, end=37.0), + lon=Range(begin=148.5, end=149.0) + ) + dssids = [ds.id for ds in dss] + assert len(dssids) == 2 + assert ls8_eo3_dataset.id in dssids + assert ls8_eo3_dataset2.id in dssids + # Test polygons + exact1_4326 = ls8_eo3_dataset.extent.to_crs(epsg4326) + exact1_3577 = ls8_eo3_dataset.extent.to_crs(epsg3577) + exact3_4326 = ls8_eo3_dataset3.extent.to_crs(epsg4326) + exact3_3577 = ls8_eo3_dataset3.extent.to_crs(epsg3577) + dssids = set(ds.id for ds in index.datasets.search(product=ls8_eo3_dataset.type.name, geometry=exact1_4326)) + assert len(dssids) == 2 + assert ls8_eo3_dataset.id in dssids + assert ls8_eo3_dataset2.id in dssids + dssids = [ds.id for ds in index.datasets.search(product=ls8_eo3_dataset.type.name, geometry=exact1_3577)] + assert len(dssids) == 2 + assert ls8_eo3_dataset.id in dssids + assert ls8_eo3_dataset2.id in dssids + dssids = [ds.id for ds in index.datasets.search(product=ls8_eo3_dataset.type.name, geometry=exact3_4326)] + assert len(dssids) == 2 + assert ls8_eo3_dataset3.id in dssids + assert ls8_eo3_dataset3.id in dssids + dssids = [ds.id for ds in index.datasets.search(product=ls8_eo3_dataset.type.name, geometry=exact3_3577)] + assert len(dssids) == 2 + assert ls8_eo3_dataset3.id in dssids + assert ls8_eo3_dataset3.id in dssids diff -Nru datacube-1.8.7/integration_tests/index/test_search_eo3.py datacube-1.8.9/integration_tests/index/test_search_eo3.py --- datacube-1.8.7/integration_tests/index/test_search_eo3.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_search_eo3.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,863 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2022 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 +""" +Module +""" +import datetime +from typing import Any + +import pytest +import yaml +from dateutil import tz + +import datacube.scripts.search_tool +from datacube.config import LocalConfig +from datacube.drivers.postgres._connections import DEFAULT_DB_USER +from datacube.index import Index +from datacube.model import Dataset +from datacube.model import Product +from datacube.model import Range + +from datacube import Datacube +from .search_utils import assume_utc, _cli_csv_search, _csv_search_raw, _load_product_query + + +def test_search_dataset_equals_eo3(index: Index, ls8_eo3_dataset: Dataset): + datasets = index.datasets.search_eager( + platform='landsat-8' + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + datasets = index.datasets.search_eager( + platform='landsat-8', + instrument='OLI_TIRS' + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + # Wrong product family + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + platform='landsat-8', + product_family='splunge', + ) + + +def test_search_dataset_by_metadata_eo3(index: Index, ls8_eo3_dataset: Dataset) -> None: + datasets = index.datasets.search_by_metadata( + {"properties": {"eo:platform": "landsat-8", "eo:instrument": "OLI_TIRS"}} + ) + datasets = list(datasets) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + datasets = index.datasets.search_by_metadata( + {"properties": {"eo:platform": "landsat-5", "eo:instrument": "TM"}} + ) + datasets = list(datasets) + assert len(datasets) == 0 + + +def test_search_day_eo3(index: Index, ls8_eo3_dataset: Dataset) -> None: + # Matches day + datasets = index.datasets.search_eager( + time=datetime.date(2016, 5, 12) + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + # Different day: no match + datasets = index.datasets.search_eager( + time=datetime.date(2016, 5, 13) + ) + assert len(datasets) == 0 + + +def test_search_dataset_ranges_eo3(index: Index, ls8_eo3_dataset: Dataset) -> None: + # In the lat bounds. + datasets = index.datasets.search_eager( + lat=Range(-37.5, -36.5), + time=Range( + datetime.datetime(2016, 5, 12, 23, 0, 0), + datetime.datetime(2016, 5, 12, 23, 59, 59) + ) + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + # Out of the lat bounds. + datasets = index.datasets.search_eager( + lat=Range(28, 32), + time=Range( + datetime.datetime(2016, 5, 12, 23, 0, 0), + datetime.datetime(2016, 5, 12, 23, 59, 59) + ) + ) + assert len(datasets) == 0 + + # Out of the time bounds + datasets = index.datasets.search_eager( + lat=Range(-37.5, -36.5), + time=Range( + datetime.datetime(2014, 7, 26, 21, 48, 0), + datetime.datetime(2014, 7, 26, 21, 50, 0) + ) + ) + assert len(datasets) == 0 + + # A dataset that overlaps but is not fully contained by the search bounds. + # Should we distinguish between 'contains' and 'overlaps'? + datasets = index.datasets.search_eager( + lat=Range(-40, -37.1) + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + # Single point search + datasets = index.datasets.search_eager( + lat=-37.0, + time=Range( + datetime.datetime(2016, 5, 12, 23, 0, 0), + datetime.datetime(2016, 5, 12, 23, 59, 59) + ) + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + datasets = index.datasets.search_eager( + lat=30.0, + time=Range( + datetime.datetime(2016, 5, 12, 23, 0, 0), + datetime.datetime(2016, 5, 12, 23, 59, 59) + ) + ) + assert len(datasets) == 0 + + # Single timestamp search + datasets = index.datasets.search_eager( + lat=Range(-37.5, -36.5), + time=datetime.datetime(2016, 5, 12, 23, 50, 40), + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + datasets = index.datasets.search_eager( + lat=Range(-37.5, -36.5), + time=datetime.datetime(2016, 5, 12, 23, 0, 0) + ) + assert len(datasets) == 0 + + +def test_zero_width_range_search(index: Index, ls8_eo3_dataset4: Dataset) -> None: + # Test time search against zero-width time metadata + datasets = index.datasets.search_eager(time=Range( + begin=datetime.datetime(2013, 7, 21, 0, 57, 26, 432563, tzinfo=datetime.timezone.utc), + end=datetime.datetime(2013, 7, 21, 0, 57, 26, 432563, tzinfo=datetime.timezone.utc) + )) + assert len(datasets) == 1 + + datasets = index.datasets.search_eager(time=Range( + begin=datetime.datetime(2013, 7, 21, 0, 57, 26, 432563, tzinfo=datetime.timezone.utc), + end=datetime.datetime(2013, 7, 21, 0, 57, 27, 432563, tzinfo=datetime.timezone.utc) + )) + assert len(datasets) == 1 + + datasets = index.datasets.search_eager(time=Range( + begin=datetime.datetime(2013, 7, 21, 0, 57, 25, 432563, tzinfo=datetime.timezone.utc), + end=datetime.datetime(2013, 7, 21, 0, 57, 26, 432563, tzinfo=datetime.timezone.utc) + )) + assert len(datasets) == 1 + + +def test_search_globally_eo3(index: Index, ls8_eo3_dataset: Dataset) -> None: + # No expressions means get all. + results = list(index.datasets.search()) + assert len(results) == 1 + + # Dataset sources aren't loaded by default + assert results[0].sources is None + + +def test_search_by_product_eo3(index: Index, + base_eo3_product_doc: Product, + ls8_eo3_dataset: Dataset, + wo_eo3_dataset: Dataset) -> None: + # Query all the test data, the counts should match expected + results = _load_product_query(index.datasets.search_by_product()) + assert len(results) == 2 + dataset_count = sum(len(ds) for ds in results.values()) + assert dataset_count == 2 + + # Query one product + products = _load_product_query(index.datasets.search_by_product( + platform='landsat-8', + product_family='wo' + )) + assert len(products) == 1 + [dataset] = products[base_eo3_product_doc["name"]] + assert dataset.id == wo_eo3_dataset.id + + +def test_search_limit_eo3(index, ls8_eo3_dataset, ls8_eo3_dataset2, wo_eo3_dataset): + prod = ls8_eo3_dataset.type.name + datasets = list(index.datasets.search(product=prod)) + assert len(datasets) == 2 + datasets = list(index.datasets.search(limit=1, product=prod)) + ids = [ds.id for ds in datasets] + assert len(ids) == 1 + assert len(datasets) == 1 + datasets = list(index.datasets.search(limit=0, product=prod)) + assert len(datasets) == 0 + datasets = list(index.datasets.search(limit=5, product=prod)) + assert len(datasets) == 2 + + datasets = list(index.datasets.search_returning(('id',), product=prod)) + assert len(datasets) == 2 + datasets = list(index.datasets.search_returning(('id',), limit=1, product=prod)) + assert len(datasets) == 1 + datasets = list(index.datasets.search_returning(('id',), limit=0, product=prod)) + assert len(datasets) == 0 + datasets = list(index.datasets.search_returning(('id',), limit=5, product=prod)) + assert len(datasets) == 2 + + # Limit is per product not overall. (But why?!?) + datasets = list(index.datasets.search()) + assert len(datasets) == 3 + datasets = list(index.datasets.search(limit=1)) + assert len(datasets) == 2 + datasets = list(index.datasets.search(limit=0)) + assert len(datasets) == 0 + datasets = list(index.datasets.search(limit=5)) + assert len(datasets) == 3 + + datasets = list(index.datasets.search_returning(('id',))) + assert len(datasets) == 3 + datasets = list(index.datasets.search_returning(('id',), limit=1)) + assert len(datasets) == 2 + datasets = list(index.datasets.search_returning(('id',), limit=0)) + assert len(datasets) == 0 + datasets = list(index.datasets.search_returning(('id',), limit=5)) + assert len(datasets) == 3 + + +def test_search_or_expressions_eo3(index: Index, + ls8_eo3_dataset: Dataset, + ls8_eo3_dataset2: Dataset, + wo_eo3_dataset: Dataset) -> None: + # Three EO3 datasets: + # - two landsat8 ard + # - one wo + + all_datasets = index.datasets.search_eager() + assert len(all_datasets) == 3 + all_ids = set(dataset.id for dataset in all_datasets) + + # OR all instruments: should return all datasets + datasets = index.datasets.search_eager( + instrument=['WOOLI_TIRS', 'OLI_TIRS', 'OLI_TIRS2'] + ) + assert len(datasets) == 3 + ids = set(dataset.id for dataset in datasets) + assert ids == all_ids + + # OR expression with only one clause. + datasets = index.datasets.search_eager( + instrument=['OLI_TIRS'] + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + # OR both products: return all + datasets = index.datasets.search_eager( + product=[ls8_eo3_dataset.type.name, wo_eo3_dataset.type.name] + ) + assert len(datasets) == 3 + ids = set(dataset.id for dataset in datasets) + assert ids == all_ids + + # eo OR eo3: return all + datasets = index.datasets.search_eager( + metadata_type=[ + # LS5 + children + ls8_eo3_dataset.metadata_type.name, + # Nothing + # LS8 dataset + wo_eo3_dataset.metadata_type.name + ] + ) + assert len(datasets) == 3 + ids = set(dataset.id for dataset in datasets) + assert ids == all_ids + + # Redundant ORs should have no effect. + datasets = index.datasets.search_eager( + product=[wo_eo3_dataset.type.name, wo_eo3_dataset.type.name, wo_eo3_dataset.type.name] + ) + assert len(datasets) == 1 + assert datasets[0].id == wo_eo3_dataset.id + + +def test_search_returning_eo3(index: Index, + local_config: LocalConfig, + ls8_eo3_dataset: Dataset, + ls8_eo3_dataset2: Dataset, + wo_eo3_dataset: Dataset) -> None: + assert index.datasets.count() == 3, "Expected three test datasets" + + # Expect one product with our one dataset. + results = list(index.datasets.search_returning( + ('id', 'region_code', 'dataset_maturity'), + platform='landsat-8', + instrument='OLI_TIRS', + )) + assert len(results) == 1 + id_, region_code, maturity = results[0] + assert id_ == ls8_eo3_dataset.id + assert region_code == '090086' + assert maturity == 'final' + + results = list(index.datasets.search_returning( + ('id', 'metadata_doc',), + platform='landsat-8', + instrument='OLI_TIRS', + )) + assert len(results) == 1 + id_, document = results[0] + assert id_ == ls8_eo3_dataset.id + assert document == ls8_eo3_dataset.metadata_doc + + my_username = local_config.get('db_username', DEFAULT_DB_USER) + + # Mixture of document and native fields + results = list(index.datasets.search_returning( + ('id', 'creation_time', 'format', 'label'), + platform='landsat-8', + instrument='OLI_TIRS', + indexed_by=my_username, + )) + assert len(results) == 1 + + id_, creation_time, format_, label = results[0] + + assert id_ == ls8_eo3_dataset.id + assert format_ == 'GeoTIFF' + + # It's always UTC in the document + expected_time = creation_time.astimezone(tz.tzutc()).replace(tzinfo=None) + assert expected_time.isoformat() == ls8_eo3_dataset.metadata.creation_dt + assert label == ls8_eo3_dataset.metadata.label + + +def test_search_returning_rows_eo3(index, + eo3_ls8_dataset_doc, + eo3_ls8_dataset2_doc, + ls8_eo3_dataset, ls8_eo3_dataset2): + dataset = ls8_eo3_dataset + uri = eo3_ls8_dataset_doc[1] + uri3 = eo3_ls8_dataset2_doc[1] + results = list(index.datasets.search_returning( + ('id', 'uri'), + platform='landsat-8', + instrument='OLI_TIRS', + )) + assert len(results) == 1 + assert results == [(dataset.id, uri)] + + index.datasets.archive_location(dataset.id, uri) + index.datasets.remove_location(dataset.id, uri) + + # If returning a field like uri, there will be one result per location. + # No locations + results = list(index.datasets.search_returning( + ('id', 'uri'), + platform='landsat-8', + instrument='OLI_TIRS', + )) + assert len(results) == 0 + + # Add a second location and we should get two results + index.datasets.add_location(dataset.id, uri) + uri2 = 'file:///tmp/test2' + index.datasets.add_location(dataset.id, uri2) + results = set(index.datasets.search_returning( + ('id', 'uri'), + platform='landsat-8', + instrument='OLI_TIRS', + )) + assert len(results) == 2 + assert results == { + (dataset.id, uri), + (dataset.id, uri2) + } + + # A second dataset already has a location: + results = set(index.datasets.search_returning( + ('id', 'uri'), + platform='landsat-8', + dataset_maturity='final', + )) + assert len(results) == 3 + assert results == { + (dataset.id, uri), + (dataset.id, uri2), + (ls8_eo3_dataset2.id, uri3), + } + + +def test_searches_only_type_eo3(index: Index, + wo_eo3_dataset: Dataset, + ls8_eo3_dataset: Dataset) -> None: + assert ls8_eo3_dataset.metadata_type.name != wo_eo3_dataset.metadata_type.name + + # One result in the product + datasets = index.datasets.search_eager( + product=wo_eo3_dataset.type.name, + platform='landsat-8' + ) + assert len(datasets) == 1 + assert datasets[0].id == wo_eo3_dataset.id + + # One result in the metadata type + datasets = index.datasets.search_eager( + metadata_type="eo3", + platform='landsat-8' + ) + assert len(datasets) == 1 + assert datasets[0].id == wo_eo3_dataset.id + + # No results when searching for a different dataset type. + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + product="spam_and_eggs", + platform='landsat-8' + ) + + # Two result when no types specified. + datasets = index.datasets.search_eager( + platform='landsat-8' + ) + assert len(datasets) == 2 + assert set(ds.id for ds in datasets) == {ls8_eo3_dataset.id, wo_eo3_dataset.id} + + # No results for different metadata type. + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + metadata_type='spam_type', + platform='landsat-8', + ) + + +def test_search_special_fields_eo3(index: Index, + ls8_eo3_dataset: Dataset, + wo_eo3_dataset: Dataset) -> None: + # 'product' is a special case + datasets = index.datasets.search_eager( + product=ls8_eo3_dataset.type.name + ) + assert len(datasets) == 1 + assert datasets[0].id == ls8_eo3_dataset.id + + # Unknown field: no results + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + platform='landsat-8', + flavour='vanilla', + ) + + +def test_search_by_uri_eo3(index, ls8_eo3_dataset, ls8_eo3_dataset2, eo3_ls8_dataset_doc): + datasets = index.datasets.search_eager(product=ls8_eo3_dataset.type.name, + uri=eo3_ls8_dataset_doc[1]) + assert len(datasets) == 1 + + datasets = index.datasets.search_eager(product=ls8_eo3_dataset.type.name, + uri='file:///x/yz') + assert len(datasets) == 0 + + +def test_search_conflicting_types(index, ls8_eo3_dataset): + # Should return no results. + with pytest.raises(ValueError): + index.datasets.search_eager( + product=ls8_eo3_dataset.type.name, + # The ls8 type is not of type storage_unit. + metadata_type='storage_unit' + ) + + +def test_fetch_all_of_md_type(index: Index, ls8_eo3_dataset: Dataset) -> None: + # Get every dataset of the md type. + assert ls8_eo3_dataset.metadata_type is not None # to shut up mypy + results = index.datasets.search_eager( + metadata_type=ls8_eo3_dataset.metadata_type.name + ) + assert len(results) == 1 + assert results[0].id == ls8_eo3_dataset.id + # Get every dataset of the type. + results = index.datasets.search_eager( + product=ls8_eo3_dataset.type.name + ) + assert len(results) == 1 + assert results[0].id == ls8_eo3_dataset.id + + # No results for another. + with pytest.raises(ValueError): + results = index.datasets.search_eager( + metadata_type='spam_and_eggs' + ) + + +def test_count_searches(index: Index, + ls8_eo3_dataset: Dataset) -> None: + # One result in the telemetry type + datasets = index.datasets.count( + product=ls8_eo3_dataset.type.name, + platform='landsat-8', + instrument='OLI_TIRS', + ) + assert datasets == 1 + + # One result in the metadata type + datasets = index.datasets.count( + metadata_type=ls8_eo3_dataset.metadata_type.name, + platform='landsat-8', + instrument='OLI_TIRS', + ) + assert datasets == 1 + + # No results when searching for a different dataset type. + datasets = index.datasets.count( + product="spam_and_eggs", + platform='landsat-8', + instrument='OLI_TIRS' + ) + assert datasets == 0 + + # One result when no types specified. + datasets = index.datasets.count( + platform='landsat-8', + instrument='OLI_TIRS', + ) + assert datasets == 1 + + # No results for different metadata type. + datasets = index.datasets.count( + metadata_type='spam_and_eggs', + platform='landsat-8', + instrument='OLI_TIRS' + ) + assert datasets == 0 + + +def test_count_by_product_searches_eo3(index: Index, + ls8_eo3_dataset: Dataset, + ls8_eo3_dataset2: Dataset, + wo_eo3_dataset: Dataset) -> None: + # Two result in the ls8 type + products = tuple(index.datasets.count_by_product( + product=ls8_eo3_dataset.type.name, + platform='landsat-8' + )) + assert products == ((ls8_eo3_dataset.type, 2),) + + # Two results in the metadata type + products = tuple(index.datasets.count_by_product( + metadata_type=ls8_eo3_dataset.metadata_type.name, + platform='landsat-8', + )) + assert products == ((ls8_eo3_dataset.type, 2),) + + # No results when searching for a different dataset type. + products = tuple(index.datasets.count_by_product( + product="spam_and_eggs", + platform='landsat-8' + )) + assert products == () + + # Three results over 2 products when no types specified. + products = set(index.datasets.count_by_product( + platform='landsat-8', + )) + assert products == {(ls8_eo3_dataset.type, 2), (wo_eo3_dataset.type, 1)} + + # No results for different metadata type. + products = tuple(index.datasets.count_by_product( + metadata_type='spam_and_eggs', + )) + assert products == () + + +def test_count_time_groups(index: Index, + ls8_eo3_dataset: Dataset) -> None: + timeline = list(index.datasets.count_product_through_time( + '1 day', + product=ls8_eo3_dataset.type.name, + time=Range( + datetime.datetime(2016, 5, 11, tzinfo=tz.tzutc()), + datetime.datetime(2016, 5, 13, tzinfo=tz.tzutc()) + ) + )) + + assert len(timeline) == 2 + assert timeline == [ + ( + Range(datetime.datetime(2016, 5, 11, tzinfo=tz.tzutc()), + datetime.datetime(2016, 5, 12, tzinfo=tz.tzutc())), + 0 + ), + ( + Range(datetime.datetime(2016, 5, 12, tzinfo=tz.tzutc()), + datetime.datetime(2016, 5, 13, tzinfo=tz.tzutc())), + 1 + ) + ] + + +def test_count_time_groups_cli(clirunner: Any, + ls8_eo3_dataset: Dataset) -> None: + result = clirunner( + [ + 'product-counts', + '1 day', + 'time in [2016-05-11, 2016-05-13]' + ], cli_method=datacube.scripts.search_tool.cli, + verbose_flag='' + ) + expected_out = ( + f'{ls8_eo3_dataset.type.name}\n' + ' 2016-05-11: 0\n' + ' 2016-05-12: 1\n' + ) + assert result.output.endswith(expected_out) + + +def test_search_cli_basic(clirunner: Any, + ls8_eo3_dataset: Dataset) -> None: + """ + Search datasets using the cli. + """ + result = clirunner( + [ + # No search arguments: return all datasets. + 'datasets' + ], cli_method=datacube.scripts.search_tool.cli + ) + assert str(ls8_eo3_dataset.id) in result.output + assert str(ls8_eo3_dataset.metadata_type.name) in result.output + assert result.exit_code == 0 + + +def test_cli_info_eo3(index: Index, + clirunner: Any, + ls8_eo3_dataset: Dataset, + ls8_eo3_dataset2: Dataset, + eo3_ls8_dataset_doc) -> None: + """ + Search datasets using the cli. + """ + index.datasets.add_location(ls8_eo3_dataset.id, 'file:///tmp/location1') + + opts = [ + 'dataset', 'info', str(ls8_eo3_dataset.id) + ] + result = clirunner(opts, verbose_flag='') + + output = result.output + # Remove WARNING messages for experimental driver + output_lines = [line for line in output.splitlines() if "WARNING:" not in line] + output = "\n".join(output_lines) + + # Should be a valid yaml + yaml_docs = list(yaml.safe_load_all(output)) + assert len(yaml_docs) == 1 + + # We output properties in order for readability: + output_lines = set(line for line in output_lines) + expected_lines = [ + "id: " + str(ls8_eo3_dataset.id), + 'product: ga_ls8c_ard_3', + 'status: active', + 'locations:', + '- file:///tmp/location1', + f'- {eo3_ls8_dataset_doc[1]}', + 'fields:', + ' creation_time: 2019-10-07 20:19:19.218290', + ' format: GeoTIFF', + ' instrument: OLI_TIRS', + ' label: ga_ls8c_ard_3-0-0_090086_2016-05-12_final', + ' landsat_product_id: LC08_L1TP_090086_20160512_20180203_01_T1', + ' landsat_scene_id: LC80900862016133LGN02', + ' lat: {begin: -38.53221689818913, end: -36.41618895501644}', + ' lon: {begin: 147.65992717003462, end: 150.3003802932316}', + ' platform: landsat-8', + ' product_family: ard', + ' region_code: 090086', + " time: {begin: '2016-05-12T23:50:23.054165+00:00', end: '2016-05-12T23:50:52.031499+00:00'}", + ] + for line in expected_lines: + assert line in output_lines + + # Check indexed time separately, as we don't care what timezone it's displayed in. + indexed_time = yaml_docs[0]['indexed'] + assert isinstance(indexed_time, datetime.datetime) + assert assume_utc(indexed_time) == assume_utc(ls8_eo3_dataset.indexed_time) + + # Request two, they should have separate yaml documents + opts.append(str(ls8_eo3_dataset2.id)) + + result = clirunner(opts) + yaml_docs = list(yaml.safe_load_all(result.output)) + assert len(yaml_docs) == 2, "Two datasets should produce two sets of info" + assert yaml_docs[0]['id'] == str(ls8_eo3_dataset.id) + assert yaml_docs[1]['id'] == str(ls8_eo3_dataset2.id) + + +def test_find_duplicates_eo3(index, + ls8_eo3_dataset, ls8_eo3_dataset2, + ls8_eo3_dataset3, ls8_eo3_dataset4, + wo_eo3_dataset): + # Our four ls8 datasets and one wo. + all_datasets = index.datasets.search_eager() + assert len(all_datasets) == 5 + + # First two ls8 datasets have the same path/row, last two have a different row. + expected_ls8_path_row_duplicates = [ + ( + ('090086', 'final'), + {ls8_eo3_dataset.id, ls8_eo3_dataset2.id} + ), + ( + ('101077', 'final'), + {ls8_eo3_dataset3.id, ls8_eo3_dataset4.id} + ), + + ] + + # Specifying groups as fields: + f = ls8_eo3_dataset.metadata_type.dataset_fields.get + field_res = sorted(index.datasets.search_product_duplicates( + ls8_eo3_dataset.type, + f('region_code'), f('dataset_maturity') + )) + assert field_res == expected_ls8_path_row_duplicates + # Field names as strings + product_res = sorted(index.datasets.search_product_duplicates( + ls8_eo3_dataset.type, + 'region_code', 'dataset_maturity' + )) + assert product_res == expected_ls8_path_row_duplicates + + # No WO duplicates: there's only one + sat_res = sorted(index.datasets.search_product_duplicates( + wo_eo3_dataset.type, + 'region_code', 'dataset_maturity' + )) + assert sat_res == [] + + +def test_csv_search_via_cli_eo3(clirunner: Any, + ls8_eo3_dataset: Dataset, + ls8_eo3_dataset2: Dataset) -> None: + """ + Search datasets via the cli with csv output + """ + def matches_both(*args): + rows = _cli_csv_search(('datasets',) + args, clirunner) + assert len(rows) == 2 + assert {rows[0]['id'], rows[1]['id']} == {str(ls8_eo3_dataset.id), str(ls8_eo3_dataset2.id)} + + def matches_1(*args): + rows = _cli_csv_search(('datasets',) + args, clirunner) + assert len(rows) == 1 + assert rows[0]['id'] == str(ls8_eo3_dataset.id) + + def matches_none(*args): + rows = _cli_csv_search(('datasets',) + args, clirunner) + assert len(rows) == 0 + + def no_such_product(*args): + with pytest.raises(ValueError): + _cli_csv_search(('datasets',) + args, clirunner) + + matches_both('lat in [-40, -10]') + matches_both('product=' + ls8_eo3_dataset.type.name) + + # Don't return on a mismatch + matches_none('lat in [150, 160]') + + # Match only a single dataset using multiple fields + matches_1('platform=landsat-8', 'time in [2016-05-11, 2016-05-13]') + + # One matching field, one non-matching + no_such_product('time in [2016-05-11, 2014-05-13]', 'platform=landsat-5') + + # Test date shorthand + matches_both('time in [2016-05, 2016-05]') + matches_none('time in [2014-06, 2014-06]') + + matches_both('time in 2016-05') + matches_none('time in 2014-08') + matches_both('time in 2016') + matches_none('time in 2015') + + matches_both('time in [2016, 2016]') + matches_both('time in [2015, 2017]') + matches_none('time in [2015, 2015]') + matches_none('time in [2013, 2013]') + + matches_both('time in [2016-4, 2016-8]') + matches_none('time in [2016-1, 2016-3]') + matches_both('time in [2005, 2017]') + + +_EXT_AND_BASE_EO3_OUTPUT_HEADER = [ + 'id', + 'crs_raw', + 'dataset_maturity', + 'eo_gsd', 'eo_sun_azimuth', 'eo_sun_elevation', + 'cloud_cover', 'fmask_clear', 'fmask_cloud_shadow', 'fmask_snow', 'fmask_water', + 'format', + 'gqa', 'gqa_abs_iterative_mean_x', 'gqa_abs_iterative_mean_xy', 'gqa_abs_iterative_mean_y', + 'gqa_abs_x,gqa_abs_xy', 'gqa_abs_y', 'gqa_cep90', + 'gqa_iterative_mean_x', 'gqa_iterative_mean_xy', 'gqa_iterative_mean_y', + 'gqa_iterative_stddev_x', 'gqa_iterative_stddev_xy', 'gqa_iterative_stddev_y', + 'gqa_mean_x', 'gqa_mean_xy', + 'gqa_mean_y,gqa_stddev_x', 'gqa_stddev_xy', 'gqa_stddev_y', + 'creation_time', 'indexed_by', 'indexed_time', + 'instrument', 'label', + 'landsat_product_id', 'landsat_scene_id', + 'lat', 'lon', + 'metadata_doc', 'metadata_type', 'metadata_type_id', + 'platform', 'product', 'product_family', + 'region_code', 'time', 'uri' +] + + +def test_csv_structure_eo3(clirunner, ls8_eo3_dataset, ls8_eo3_dataset2): + output = _csv_search_raw(['datasets', ' lat in [-40, -10]'], clirunner) + lines = [line.strip() for line in output.split('\n') if line] + # A header and two dataset rows + assert len(lines) == 3 + header_line = lines[0] + for header in _EXT_AND_BASE_EO3_OUTPUT_HEADER: + assert header in header_line + + +def test_query_dataset_multi_product_eo3(index: Index, ls8_eo3_dataset, wo_eo3_dataset): + # We have one ls5 level1 and its child nbar + dc = Datacube(index) + + # Can we query a single product name? + datasets = dc.find_datasets(product="ga_ls8c_ard_3") + assert len(datasets) == 1 + + # Can we query multiple products? + datasets = dc.find_datasets(product=['ga_ls8c_ard_3', 'ga_ls_wo_3']) + assert len(datasets) == 2 + + # Can we query multiple products in a tuple + datasets = dc.find_datasets(product=('ga_ls8c_ard_3', 'ga_ls_wo_3')) + assert len(datasets) == 2 diff -Nru datacube-1.8.7/integration_tests/index/test_search_legacy.py datacube-1.8.9/integration_tests/index/test_search_legacy.py --- datacube-1.8.7/integration_tests/index/test_search_legacy.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_search_legacy.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,1056 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2022 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 +""" +Module +""" +import copy +import datetime +import uuid +from decimal import Decimal +from uuid import UUID +from typing import List, Any + +import pytest +import yaml +from dateutil import tz +from psycopg2._range import NumericRange + +from datacube.config import LocalConfig +from datacube.drivers.postgres._connections import DEFAULT_DB_USER +from datacube.index import Index +from datacube.model import Dataset +from datacube.model import Product +from datacube.model import MetadataType +from datacube.model import Range + +from datacube.testutils import load_dataset_definition + +from datacube import Datacube +from .search_utils import _load_product_query, assume_utc, _csv_search_raw, _cli_csv_search + + +@pytest.fixture +def pseudo_ls8_type(index, ga_metadata_type): + index.products.add_document({ + 'name': 'ls8_telemetry', + 'description': 'telemetry test', + 'metadata': { + 'product_type': 'pseudo_ls8_data', + 'platform': { + 'code': 'LANDSAT_8' + }, + 'instrument': { + 'name': 'OLI_TIRS' + }, + 'format': { + 'name': 'PSEUDOMD' + } + }, + 'metadata_type': ga_metadata_type.name + }) + return index.products.get_by_name('ls8_telemetry') + + +@pytest.fixture +def pseudo_ls8_dataset(index, pseudo_ls8_type): + id_ = str(uuid.uuid4()) + with index._active_connection() as connection: + was_inserted = connection.insert_dataset( + { + 'id': id_, + 'product_type': 'pseudo_ls8_data', + 'checksum_path': 'package.sha1', + 'ga_label': 'LS8_OLITIRS_STD-MD_P00_LC81160740742015089ASA00_' + '116_074_20150330T022553Z20150330T022657', + + 'ga_level': 'P00', + 'size_bytes': 637660782, + 'platform': { + 'code': 'LANDSAT_8' + }, + # We're unlikely to have extent info for a raw dataset, we'll use it for search tests. + 'extent': { + 'from_dt': datetime.datetime(2014, 7, 26, 23, 48, 0, 343853), + 'to_dt': datetime.datetime(2014, 7, 26, 23, 52, 0, 343853), + 'coord': { + 'll': {'lat': -31.33333, 'lon': 149.78434}, + 'lr': {'lat': -31.37116, 'lon': 152.20094}, + 'ul': {'lat': -29.23394, 'lon': 149.85216}, + 'ur': {'lat': -29.26873, 'lon': 152.21782} + } + }, + 'image': { + 'satellite_ref_point_start': {'x': 116, 'y': 74}, + 'satellite_ref_point_end': {'x': 116, 'y': 84}, + }, + 'creation_dt': datetime.datetime(2015, 4, 22, 6, 32, 4), + 'instrument': {'name': 'OLI_TIRS'}, + 'format': { + 'name': 'PSEUDOMD' + }, + 'lineage': { + 'source_datasets': {} + } + }, + id_, + pseudo_ls8_type.id + ) + assert was_inserted + d = index.datasets.get(id_) + # The dataset should have been matched to the telemetry type. + assert d.type.id == pseudo_ls8_type.id + + return d + + +@pytest.fixture +def pseudo_ls8_dataset2(index, pseudo_ls8_type): + # Like the previous dataset, but a day later in time. + id_ = str(uuid.uuid4()) + with index._active_connection() as connection: + was_inserted = connection.insert_dataset( + { + 'id': id_, + 'product_type': 'pseudo_ls8_data', + 'checksum_path': 'package.sha1', + 'ga_label': 'LS8_OLITIRS_STD-MD_P00_LC81160740742015089ASA00_' + '116_074_20150330T022553Z20150330T022657', + + 'ga_level': 'P00', + 'size_bytes': 637660782, + 'platform': { + 'code': 'LANDSAT_8' + }, + 'image': { + 'satellite_ref_point_start': {'x': 116, 'y': 74}, + 'satellite_ref_point_end': {'x': 116, 'y': 84}, + }, + # We're unlikely to have extent info for a raw dataset, we'll use it for search tests. + 'extent': { + 'from_dt': datetime.datetime(2014, 7, 27, 23, 48, 0, 343853), + 'to_dt': datetime.datetime(2014, 7, 27, 23, 52, 0, 343853), + 'coord': { + 'll': {'lat': -31.33333, 'lon': 149.78434}, + 'lr': {'lat': -31.37116, 'lon': 152.20094}, + 'ul': {'lat': -29.23394, 'lon': 149.85216}, + 'ur': {'lat': -29.26873, 'lon': 152.21782} + } + }, + 'creation_dt': datetime.datetime(2015, 4, 22, 6, 32, 4), + 'instrument': {'name': 'OLI_TIRS'}, + 'format': { + 'name': 'PSEUDOMD' + }, + 'lineage': { + 'source_datasets': {} + } + }, + id_, + pseudo_ls8_type.id + ) + assert was_inserted + d = index.datasets.get(id_) + # The dataset should have been matched to the telemetry type. + assert d.type.id == pseudo_ls8_type.id + + return d + + +# Datasets 3 and 4 mirror 1 and 2 but have a different path/row. +@pytest.fixture +def pseudo_ls8_dataset3(index: Index, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset) -> Dataset: + # Same as 1, but a different path/row + id_ = str(uuid.uuid4()) + dataset_doc = copy.deepcopy(pseudo_ls8_dataset.metadata_doc) + dataset_doc['id'] = id_ + dataset_doc['image'] = { + 'satellite_ref_point_start': {'x': 116, 'y': 85}, + 'satellite_ref_point_end': {'x': 116, 'y': 87}, + } + + with index._active_connection() as connection: + was_inserted = connection.insert_dataset( + dataset_doc, + id_, + pseudo_ls8_type.id + ) + assert was_inserted + d = index.datasets.get(id_) + # The dataset should have been matched to the telemetry type. + assert d.type.id == pseudo_ls8_type.id + return d + + +@pytest.fixture +def pseudo_ls8_dataset4(index: Index, + pseudo_ls8_type: Product, + pseudo_ls8_dataset2: Dataset) -> Dataset: + # Same as 2, but a different path/row + id_ = str(uuid.uuid4()) + dataset_doc = copy.deepcopy(pseudo_ls8_dataset2.metadata_doc) + dataset_doc['id'] = id_ + dataset_doc['image'] = { + 'satellite_ref_point_start': {'x': 116, 'y': 85}, + 'satellite_ref_point_end': {'x': 116, 'y': 87}, + } + + with index._active_connection() as connection: + was_inserted = connection.insert_dataset( + dataset_doc, + id_, + pseudo_ls8_type.id + ) + assert was_inserted + d = index.datasets.get(id_) + # The dataset should have been matched to the telemetry type. + assert d.type.id == pseudo_ls8_type.id + return d + + +@pytest.fixture +def ls5_dataset_w_children(index, clirunner, example_ls5_dataset_path, indexed_ls5_scene_products): + clirunner(['dataset', 'add', str(example_ls5_dataset_path)]) + doc = load_dataset_definition(example_ls5_dataset_path) + return index.datasets.get(doc.id, include_sources=True) + + +@pytest.fixture +def ls5_dataset_nbar_type(ls5_dataset_w_children: Dataset, + indexed_ls5_scene_products: List[Product]) -> Product: + for dataset_type in indexed_ls5_scene_products: + if dataset_type.name == ls5_dataset_w_children.type.name: + return dataset_type + else: + raise RuntimeError("LS5 type was not among types") + + +def test_search_dataset_equals(index: Index, pseudo_ls8_dataset: Dataset): + datasets = index.datasets.search_eager( + platform='LANDSAT_8' + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + datasets = index.datasets.search_eager( + platform='LANDSAT_8', + instrument='OLI_TIRS' + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # Wrong sensor name + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + platform='LANDSAT-8', + instrument='TM', + ) + + +def test_search_dataset_by_metadata(index: Index, pseudo_ls8_dataset: Dataset) -> None: + datasets = index.datasets.search_by_metadata( + {"platform": {"code": "LANDSAT_8"}, "instrument": {"name": "OLI_TIRS"}} + ) + datasets = list(datasets) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + datasets = index.datasets.search_by_metadata( + {"platform": {"code": "LANDSAT_5"}, "instrument": {"name": "TM"}} + ) + datasets = list(datasets) + assert len(datasets) == 0 + + +def test_search_day(index: Index, pseudo_ls8_dataset: Dataset) -> None: + # Matches day + datasets = index.datasets.search_eager( + time=datetime.date(2014, 7, 26) + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # Different day: no match + datasets = index.datasets.search_eager( + time=datetime.date(2014, 7, 27) + ) + assert len(datasets) == 0 + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_search_dataset_ranges(index: Index, pseudo_ls8_dataset: Dataset) -> None: + # In the lat bounds. + datasets = index.datasets.search_eager( + lat=Range(-30.5, -29.5), + time=Range( + datetime.datetime(2014, 7, 26, 23, 0, 0), + datetime.datetime(2014, 7, 26, 23, 59, 0) + ) + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # Out of the lat bounds. + datasets = index.datasets.search_eager( + lat=Range(28, 32), + time=Range( + datetime.datetime(2014, 7, 26, 23, 48, 0), + datetime.datetime(2014, 7, 26, 23, 50, 0) + ) + ) + assert len(datasets) == 0 + + # Out of the time bounds + datasets = index.datasets.search_eager( + lat=Range(-30.5, -29.5), + time=Range( + datetime.datetime(2014, 7, 26, 21, 48, 0), + datetime.datetime(2014, 7, 26, 21, 50, 0) + ) + ) + assert len(datasets) == 0 + + # A dataset that overlaps but is not fully contained by the search bounds. + # TODO: Do we want overlap as the default behaviour? + # Should we distinguish between 'contains' and 'overlaps'? + datasets = index.datasets.search_eager( + lat=Range(-40, -30) + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # Single point search + datasets = index.datasets.search_eager( + lat=-30.0, + time=Range( + datetime.datetime(2014, 7, 26, 23, 0, 0), + datetime.datetime(2014, 7, 26, 23, 59, 0) + ) + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + datasets = index.datasets.search_eager( + lat=30.0, + time=Range( + datetime.datetime(2014, 7, 26, 23, 0, 0), + datetime.datetime(2014, 7, 26, 23, 59, 0) + ) + ) + assert len(datasets) == 0 + + # Single timestamp search + datasets = index.datasets.search_eager( + lat=Range(-30.5, -29.5), + time=datetime.datetime(2014, 7, 26, 23, 50, 0) + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + datasets = index.datasets.search_eager( + lat=Range(-30.5, -29.5), + time=datetime.datetime(2014, 7, 26, 23, 30, 0) + ) + assert len(datasets) == 0 + + +def test_search_globally(index: Index, pseudo_ls8_dataset: Dataset) -> None: + # No expressions means get all. + results = list(index.datasets.search()) + assert len(results) == 1 + + # Dataset sources aren't loaded by default + assert results[0].sources is None + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_search_by_product(index: Index, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset, + indexed_ls5_scene_products, + ls5_dataset_w_children: Dataset) -> None: + # Query all the test data, the counts should match expected + results = _load_product_query(index.datasets.search_by_product()) + assert len(results) == 7 + dataset_count = sum(len(ds) for ds in results.values()) + assert dataset_count == 4 + + # Query one product + products = _load_product_query(index.datasets.search_by_product( + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert len(products) == 1 + [dataset] = products[pseudo_ls8_type.name] + assert dataset.id == pseudo_ls8_dataset.id + + +def test_search_limit(index, pseudo_ls8_dataset, pseudo_ls8_dataset2): + datasets = list(index.datasets.search()) + assert len(datasets) == 2 + datasets = list(index.datasets.search(limit=1)) + assert len(datasets) == 1 + datasets = list(index.datasets.search(limit=0)) + assert len(datasets) == 0 + datasets = list(index.datasets.search(limit=5)) + assert len(datasets) == 2 + + datasets = list(index.datasets.search_returning(('id',))) + assert len(datasets) == 2 + datasets = list(index.datasets.search_returning(('id',), limit=1)) + assert len(datasets) == 1 + datasets = list(index.datasets.search_returning(('id',), limit=0)) + assert len(datasets) == 0 + datasets = list(index.datasets.search_returning(('id',), limit=5)) + assert len(datasets) == 2 + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_search_or_expressions(index: Index, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset, + ls5_dataset_nbar_type: Product, + ls5_dataset_w_children: Dataset, + default_metadata_type: MetadataType, + telemetry_metadata_type: MetadataType) -> None: + # Four datasets: + # Our standard LS8 + # - type=ls8_telemetry + # LS5 with children: + # - type=ls5_nbar_scene + # - type=ls5_level1_scene + # - type=ls5_satellite_telemetry_data + + all_datasets = index.datasets.search_eager() + assert len(all_datasets) == 4 + all_ids = set(dataset.id for dataset in all_datasets) + + # OR all platforms: should return all datasets + datasets = index.datasets.search_eager( + platform=['LANDSAT_5', 'LANDSAT_7', 'LANDSAT_8'] + ) + assert len(datasets) == 4 + ids = set(dataset.id for dataset in datasets) + assert ids == all_ids + + # OR expression with only one clause. + datasets = index.datasets.search_eager( + platform=['LANDSAT_8'] + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # OR two products: return two + datasets = index.datasets.search_eager( + product=[pseudo_ls8_type.name, ls5_dataset_nbar_type.name] + ) + assert len(datasets) == 2 + ids = set(dataset.id for dataset in datasets) + assert ids == {pseudo_ls8_dataset.id, ls5_dataset_w_children.id} + + # eo OR telemetry: return all + datasets = index.datasets.search_eager( + metadata_type=[ + # LS5 + children + default_metadata_type.name, + # Nothing + telemetry_metadata_type.name, + # LS8 dataset + pseudo_ls8_type.metadata_type.name + ] + ) + assert len(datasets) == 4 + ids = set(dataset.id for dataset in datasets) + assert ids == all_ids + + # Redundant ORs should have no effect. + datasets = index.datasets.search_eager( + product=[pseudo_ls8_type.name, pseudo_ls8_type.name, pseudo_ls8_type.name] + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_search_returning(index: Index, + local_config: LocalConfig, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset, + ls5_dataset_w_children) -> None: + + assert index.datasets.count() == 4, "Expected four test datasets" + + # Expect one product with our one dataset. + results = list(index.datasets.search_returning( + ('id', 'sat_path', 'sat_row'), + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert len(results) == 1 + id_, path_range, sat_range = results[0] + assert id_ == pseudo_ls8_dataset.id + # TODO: output nicer types? + assert path_range == NumericRange(Decimal('116'), Decimal('116'), '[]') + assert sat_range == NumericRange(Decimal('74'), Decimal('84'), '[]') + + results = list(index.datasets.search_returning( + ('id', 'metadata_doc',), + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert len(results) == 1 + id_, document = results[0] + assert id_ == pseudo_ls8_dataset.id + assert document == pseudo_ls8_dataset.metadata_doc + + my_username = local_config.get('db_username', DEFAULT_DB_USER) + + # Mixture of document and native fields + results = list(index.datasets.search_returning( + ('id', 'creation_time', 'format', 'label'), + platform='LANDSAT_8', + indexed_by=my_username, + )) + assert len(results) == 1 + + id_, creation_time, format_, label = results[0] + + assert id_ == pseudo_ls8_dataset.id + assert format_ == 'PSEUDOMD' + + # It's always UTC in the document + expected_time = creation_time.astimezone(tz.tzutc()).replace(tzinfo=None) + assert expected_time.isoformat() == pseudo_ls8_dataset.metadata_doc['creation_dt'] + assert label == pseudo_ls8_dataset.metadata_doc['ga_label'] + + +def test_search_returning_rows(index, pseudo_ls8_type, + pseudo_ls8_dataset, pseudo_ls8_dataset2, + indexed_ls5_scene_products): + dataset = pseudo_ls8_dataset + + # If returning a field like uri, there will be one result per location. + + # No locations + results = list(index.datasets.search_returning( + ('id', 'uri'), + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert len(results) == 0 + + # Add a location to the dataset and we should get one result + test_uri = 'file:///tmp/test1' + index.datasets.add_location(dataset.id, test_uri) + results = list(index.datasets.search_returning( + ('id', 'uri'), + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert len(results) == 1 + assert results == [(dataset.id, test_uri)] + + # Add a second location and we should get two results + test_uri2 = 'file:///tmp/test2' + index.datasets.add_location(dataset.id, test_uri2) + results = set(index.datasets.search_returning( + ('id', 'uri'), + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert len(results) == 2 + assert results == { + (dataset.id, test_uri), + (dataset.id, test_uri2) + } + + # A second dataset now has a location too: + test_uri3 = 'mdss://c10/tmp/something' + index.datasets.add_location(pseudo_ls8_dataset2.id, test_uri3) + # Datasets and locations should still correctly match up... + results = set(index.datasets.search_returning( + ('id', 'uri'), + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert len(results) == 3 + assert results == { + (dataset.id, test_uri), + (dataset.id, test_uri2), + (pseudo_ls8_dataset2.id, test_uri3), + } + + +def test_searches_only_type(index: Index, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset, + ls5_telem_type) -> None: + # The dataset should have been matched to the telemetry type. + assert pseudo_ls8_dataset.type.id == pseudo_ls8_type.id + assert index.datasets.search_eager() + + # One result in the telemetry type + datasets = index.datasets.search_eager( + product=pseudo_ls8_type.name, + platform='LANDSAT_8', + instrument='OLI_TIRS', + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # One result in the metadata type + datasets = index.datasets.search_eager( + metadata_type=pseudo_ls8_type.metadata_type.name, + platform='LANDSAT_8', + instrument='OLI_TIRS', + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # No results when searching for a different dataset type. + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + product=ls5_telem_type.name, + platform='LANDSAT_8', + instrument='OLI_TIRS' + ) + + # One result when no types specified. + datasets = index.datasets.search_eager( + platform='LANDSAT_8', + instrument='OLI_TIRS' + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # No results for different metadata type. + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + metadata_type='telemetry', + platform='LANDSAT_8', + instrument='OLI_TIRS' + ) + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_search_special_fields(index: Index, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset, + ls5_dataset_w_children) -> None: + # 'product' is a special case + datasets = index.datasets.search_eager( + product=pseudo_ls8_type.name + ) + assert len(datasets) == 1 + assert datasets[0].id == pseudo_ls8_dataset.id + + # Unknown field: no results + with pytest.raises(ValueError): + datasets = index.datasets.search_eager( + platform='LANDSAT_8', + flavour='chocolate', + ) + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_search_by_uri(index, ls5_dataset_w_children): + datasets = index.datasets.search_eager(product=ls5_dataset_w_children.type.name, + uri=ls5_dataset_w_children.local_uri) + assert len(datasets) == 1 + + datasets = index.datasets.search_eager(product=ls5_dataset_w_children.type.name, + uri='file:///x/yz') + assert len(datasets) == 0 + + +# Current formulation of this test relies on non-EO3 test data +# (But postgis implementation isn't handling lineage yet either) +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_get_dataset_with_children(index: Index, ls5_dataset_w_children: Dataset) -> None: + id_ = ls5_dataset_w_children.id + assert isinstance(id_, UUID) + + # Sources not loaded by default + d = index.datasets.get(id_) + assert d.sources is None + + # Ask for all sources + d = index.datasets.get(id_, include_sources=True) + assert list(d.sources.keys()) == ['level1'] + level1 = d.sources['level1'] + assert list(level1.sources.keys()) == ['satellite_telemetry_data'] + assert list(level1.sources['satellite_telemetry_data'].sources) == [] + + # It should also work with a string id + d = index.datasets.get(str(id_), include_sources=True) + assert list(d.sources.keys()) == ['level1'] + level1 = d.sources['level1'] + assert list(level1.sources.keys()) == ['satellite_telemetry_data'] + assert list(level1.sources['satellite_telemetry_data'].sources) == [] + + +def test_count_by_product_searches(index: Index, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset, + ls5_telem_type: Product) -> None: + # The dataset should have been matched to the telemetry type. + assert pseudo_ls8_dataset.type.id == pseudo_ls8_type.id + assert index.datasets.search_eager() + + # One result in the telemetry type + products = tuple(index.datasets.count_by_product( + product=pseudo_ls8_type.name, + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert products == ((pseudo_ls8_type, 1),) + + # One result in the metadata type + products = tuple(index.datasets.count_by_product( + metadata_type=pseudo_ls8_type.metadata_type.name, + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert products == ((pseudo_ls8_type, 1),) + + # No results when searching for a different dataset type. + products = tuple(index.datasets.count_by_product( + product=ls5_telem_type.name, + platform='LANDSAT_8', + instrument='OLI_TIRS' + )) + assert products == () + + # One result when no types specified. + products = tuple(index.datasets.count_by_product( + platform='LANDSAT_8', + instrument='OLI_TIRS', + )) + assert products == ((pseudo_ls8_type, 1),) + + # Only types with datasets should be returned (these params match ls5_gtiff too) + products = tuple(index.datasets.count_by_product()) + assert products == ((pseudo_ls8_type, 1),) + + # No results for different metadata type. + products = tuple(index.datasets.count_by_product( + metadata_type='telemetry', + )) + assert products == () + + +# Current formulation of this test relies on non-EO3 test data +# (But postgis driver doesn't support lineage yet anyway.) +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +@pytest.mark.usefixtures('ga_metadata_type', + 'indexed_ls5_scene_products') +def test_source_filter(clirunner, index, example_ls5_dataset_path): + clirunner( + [ + 'dataset', + 'add', + str(example_ls5_dataset_path) + ] + ) + + all_nbar = index.datasets.search_eager(product='ls5_nbar_scene') + assert len(all_nbar) == 1 + all_level1 = index.datasets.search_eager(product='ls5_level1_scene') + assert len(all_level1) == 1 + assert all_level1[0].metadata.gsi == 'ASA' + + dss = index.datasets.search_eager( + product='ls5_nbar_scene', + source_filter={'product': 'ls5_level1_scene', 'gsi': 'ASA'} + ) + assert dss == all_nbar + dss = index.datasets.search_eager( + product='ls5_nbar_scene', + source_filter={'product': 'ls5_level1_scene', 'gsi': 'GREG'} + ) + assert dss == [] + + with pytest.raises(RuntimeError): + dss = index.datasets.search_eager( + product='ls5_nbar_scene', + source_filter={'gsi': 'ASA'} + ) + + +def test_cli_info(index: Index, + clirunner: Any, + pseudo_ls8_dataset: Dataset, + pseudo_ls8_dataset2: Dataset) -> None: + """ + Search datasets using the cli. + """ + index.datasets.add_location(pseudo_ls8_dataset.id, 'file:///tmp/location1') + index.datasets.add_location(pseudo_ls8_dataset.id, 'file:///tmp/location2') + + opts = [ + 'dataset', 'info', str(pseudo_ls8_dataset.id) + ] + result = clirunner(opts, verbose_flag='') + + output = result.output + # Remove WARNING messages for experimental driver + output_lines = [line for line in output.splitlines() if "WARNING:" not in line] + output = "\n".join(output_lines) + + # Should be a valid yaml + yaml_docs = list(yaml.safe_load_all(output)) + assert len(yaml_docs) == 1 + + # We output properties in order for readability: + output_lines = [line for line in output_lines if not line.startswith('indexed:')] + expected_lines = [ + "id: " + str(pseudo_ls8_dataset.id), + 'product: ls8_telemetry', + 'status: active', + # Newest location first + 'locations:', + '- file:///tmp/location2', + '- file:///tmp/location1', + 'fields:', + ' creation_time: 2015-04-22 06:32:04', + ' format: PSEUDOMD', + ' gsi: null', + ' instrument: OLI_TIRS', + ' label: LS8_OLITIRS_STD-MD_P00_LC81160740742015089ASA00_116_074_20150330T022553Z20150330T022657', + ' lat: {begin: -31.37116, end: -29.23394}', + ' lon: {begin: 149.78434, end: 152.21782}', + ' orbit: null', + ' platform: LANDSAT_8', + ' product_type: pseudo_ls8_data', + ' sat_path: {begin: 116, end: 116}', + ' sat_row: {begin: 74, end: 84}', + " time: {begin: '2014-07-26T23:48:00.343853', end: '2014-07-26T23:52:00.343853'}", + ] + assert expected_lines == output_lines + + # Check indexed time separately, as we don't care what timezone it's displayed in. + indexed_time = yaml_docs[0]['indexed'] + assert isinstance(indexed_time, datetime.datetime) + assert assume_utc(indexed_time) == assume_utc(pseudo_ls8_dataset.indexed_time) + + # Request two, they should have separate yaml documents + opts.append(str(pseudo_ls8_dataset2.id)) + + result = clirunner(opts) + yaml_docs = list(yaml.safe_load_all(result.output)) + assert len(yaml_docs) == 2, "Two datasets should produce two sets of info" + assert yaml_docs[0]['id'] == str(pseudo_ls8_dataset.id) + assert yaml_docs[1]['id'] == str(pseudo_ls8_dataset2.id) + + +def test_cli_missing_info(clirunner, index): + id_ = str(uuid.uuid4()) + result = clirunner( + [ + 'dataset', 'info', id_ + ], + catch_exceptions=False, + expect_success=False, + verbose_flag=False + ) + assert result.exit_code == 1, "Should return exit status when dataset is missing" + # This should have been output to stderr, but the CliRunner doesnit distinguish + assert result.output.endswith("{id} missing\n".format(id=id_)) + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_find_duplicates(index, pseudo_ls8_type, + pseudo_ls8_dataset, pseudo_ls8_dataset2, pseudo_ls8_dataset3, pseudo_ls8_dataset4, + ls5_dataset_w_children): + # type: (Index, Product, Dataset, Dataset, Dataset, Dataset, Dataset) -> None + + # Our four ls8 datasets and three ls5. + all_datasets = index.datasets.search_eager() + assert len(all_datasets) == 7 + + # First two ls8 datasets have the same path/row, last two have a different row. + expected_ls8_path_row_duplicates = [ + ( + ( + NumericRange(Decimal('116'), Decimal('116'), '[]'), + NumericRange(Decimal('74'), Decimal('84'), '[]') + ), + {pseudo_ls8_dataset.id, pseudo_ls8_dataset2.id} + ), + ( + ( + NumericRange(Decimal('116'), Decimal('116'), '[]'), + NumericRange(Decimal('85'), Decimal('87'), '[]') + ), + {pseudo_ls8_dataset3.id, pseudo_ls8_dataset4.id} + ), + + ] + + # Specifying groups as fields: + f = pseudo_ls8_type.metadata_type.dataset_fields.get + field_res = sorted(index.datasets.search_product_duplicates( + pseudo_ls8_type, + f('sat_path'), f('sat_row') + )) + assert field_res == expected_ls8_path_row_duplicates + # Field names as strings + product_res = sorted(index.datasets.search_product_duplicates( + pseudo_ls8_type, + 'sat_path', 'sat_row' + )) + assert product_res == expected_ls8_path_row_duplicates + + # Get duplicates that start on the same day + f = pseudo_ls8_type.metadata_type.dataset_fields.get + field_res = sorted(index.datasets.search_product_duplicates( + pseudo_ls8_type, + f('time').lower.day # type: ignore + )) + + # Datasets 1 & 3 are on the 26th. + # Datasets 2 & 4 are on the 27th. + assert field_res == [ + ( + ( + datetime.datetime(2014, 7, 26, 0, 0), + ), + {pseudo_ls8_dataset.id, pseudo_ls8_dataset3.id} + ), + ( + ( + datetime.datetime(2014, 7, 27, 0, 0), + ), + {pseudo_ls8_dataset2.id, pseudo_ls8_dataset4.id} + ), + + ] + + # No LS5 duplicates: there's only one of each + sat_res = sorted(index.datasets.search_product_duplicates( + ls5_dataset_w_children.type, + 'sat_path', 'sat_row' + )) + assert sat_res == [] + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_csv_search_via_cli(clirunner: Any, + pseudo_ls8_type: Product, + pseudo_ls8_dataset: Dataset, + pseudo_ls8_dataset2: Dataset) -> None: + """ + Search datasets via the cli with csv output + """ + + # Test dataset is: + # platform: LANDSAT_8 + # from: 2014-7-26 23:48:00 + # to: 2014-7-26 23:52:00 + # coords: + # ll: (-31.33333, 149.78434) + # lr: (-31.37116, 152.20094) + # ul: (-29.23394, 149.85216) + # ur: (-29.26873, 152.21782) + + # Dataset 2 is the same but on day 2014-7-27 + + def matches_both(*args): + rows = _cli_csv_search(('datasets',) + args, clirunner) + assert len(rows) == 2 + assert {rows[0]['id'], rows[1]['id']} == {str(pseudo_ls8_dataset.id), str(pseudo_ls8_dataset2.id)} + + def matches_1(*args): + rows = _cli_csv_search(('datasets',) + args, clirunner) + assert len(rows) == 1 + assert rows[0]['id'] == str(pseudo_ls8_dataset.id) + + def matches_none(*args): + rows = _cli_csv_search(('datasets',) + args, clirunner) + assert len(rows) == 0 + + def no_such_product(*args): + with pytest.raises(ValueError): + _cli_csv_search(('datasets',) + args, clirunner) + + matches_both('lat in [-40, -10]') + matches_both('product=' + pseudo_ls8_type.name) + + # Don't return on a mismatch + matches_none('lat in [150, 160]') + + # Match only a single dataset using multiple fields + matches_1('platform=LANDSAT_8', 'time in [2014-07-24, 2014-07-26]') + + # One matching field, one non-matching + no_such_product('time in [2014-07-24, 2014-07-26]', 'platform=LANDSAT_5') + + # Test date shorthand + matches_both('time in [2014-07, 2014-07]') + matches_none('time in [2014-06, 2014-06]') + + matches_both('time in 2014-07') + matches_none('time in 2014-08') + matches_both('time in 2014') + matches_none('time in 2015') + + matches_both('time in [2014, 2014]') + matches_both('time in [2013, 2014]') + matches_none('time in [2015, 2015]') + matches_none('time in [2013, 2013]') + + matches_both('time in [2014-7, 2014-8]') + matches_none('time in [2014-6, 2014-6]') + matches_both('time in [2005, 2015]') + + +# Headers are currently in alphabetical order. +_EXPECTED_OUTPUT_HEADER_LEGACY = 'creation_time,dataset_type_id,format,gsi,id,indexed_by,indexed_time,' \ + 'instrument,label,lat,lon,metadata_doc,metadata_type,metadata_type_id,' \ + 'orbit,platform,product,product_type,sat_path,sat_row,time,uri' + +_EXPECTED_OUTPUT_HEADER = 'creation_time,format,gsi,id,indexed_by,indexed_time,instrument,label,' \ + 'lat,lon,metadata_doc,metadata_type,metadata_type_id,orbit,platform,' \ + 'product,product_id,product_type,sat_path,sat_row,time,uri' + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_csv_structure(clirunner, pseudo_ls8_type, ls5_telem_type, + pseudo_ls8_dataset, pseudo_ls8_dataset2): + output = _csv_search_raw(['datasets', ' lat in [-40, -10]'], clirunner) + lines = [line.strip() for line in output.split('\n') if line] + # A header and two dataset rows + assert len(lines) == 3 + header_line = lines[0] + assert header_line in (_EXPECTED_OUTPUT_HEADER, _EXPECTED_OUTPUT_HEADER_LEGACY) + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_query_dataset_multi_product(index: Index, ls5_dataset_w_children: Dataset): + # We have one ls5 level1 and its child nbar + dc = Datacube(index) + + # Can we query a single product name? + datasets = dc.find_datasets(product='ls5_nbar_scene') + assert len(datasets) == 1 + + # Can we query multiple products? + datasets = dc.find_datasets(product=['ls5_nbar_scene', 'ls5_level1_scene']) + assert len(datasets) == 2 + + # Can we query multiple products in a tuple + datasets = dc.find_datasets(product=('ls5_nbar_scene', 'ls5_level1_scene')) + assert len(datasets) == 2 diff -Nru datacube-1.8.7/integration_tests/index/test_search.py datacube-1.8.9/integration_tests/index/test_search.py --- datacube-1.8.7/integration_tests/index/test_search.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_search.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,1217 +0,0 @@ -# This file is part of the Open Data Cube, see https://opendatacube.org for more information -# -# Copyright (c) 2015-2020 ODC Contributors -# SPDX-License-Identifier: Apache-2.0 -""" -Module -""" -import copy -import csv -import datetime -import io -import uuid -from decimal import Decimal -from uuid import UUID -from typing import List, Iterable, Dict, Any, Tuple - -import pytest -import yaml -from dateutil import tz -from psycopg2._range import NumericRange - -import datacube.scripts.cli_app -import datacube.scripts.search_tool -from datacube.config import LocalConfig -from datacube.drivers.postgres import PostgresDb -from datacube.drivers.postgres._connections import DEFAULT_DB_USER -from datacube.index import Index -from datacube.model import Dataset -from datacube.model import DatasetType -from datacube.model import MetadataType -from datacube.model import Range - -from datacube.testutils import load_dataset_definition - -from datacube import Datacube - - -@pytest.fixture -def pseudo_ls8_type(index, ga_metadata_type): - index.products.add_document({ - 'name': 'ls8_telemetry', - 'description': 'telemetry test', - 'metadata': { - 'product_type': 'pseudo_ls8_data', - 'platform': { - 'code': 'LANDSAT_8' - }, - 'instrument': { - 'name': 'OLI_TIRS' - }, - 'format': { - 'name': 'PSEUDOMD' - } - }, - 'metadata_type': ga_metadata_type.name - }) - return index.products.get_by_name('ls8_telemetry') - - -@pytest.fixture -def pseudo_ls8_dataset(index, initialised_postgres_db, pseudo_ls8_type): - id_ = str(uuid.uuid4()) - with initialised_postgres_db.connect() as connection: - was_inserted = connection.insert_dataset( - { - 'id': id_, - 'product_type': 'pseudo_ls8_data', - 'checksum_path': 'package.sha1', - 'ga_label': 'LS8_OLITIRS_STD-MD_P00_LC81160740742015089ASA00_' - '116_074_20150330T022553Z20150330T022657', - - 'ga_level': 'P00', - 'size_bytes': 637660782, - 'platform': { - 'code': 'LANDSAT_8' - }, - # We're unlikely to have extent info for a raw dataset, we'll use it for search tests. - 'extent': { - 'from_dt': datetime.datetime(2014, 7, 26, 23, 48, 0, 343853), - 'to_dt': datetime.datetime(2014, 7, 26, 23, 52, 0, 343853), - 'coord': { - 'll': {'lat': -31.33333, 'lon': 149.78434}, - 'lr': {'lat': -31.37116, 'lon': 152.20094}, - 'ul': {'lat': -29.23394, 'lon': 149.85216}, - 'ur': {'lat': -29.26873, 'lon': 152.21782} - } - }, - 'image': { - 'satellite_ref_point_start': {'x': 116, 'y': 74}, - 'satellite_ref_point_end': {'x': 116, 'y': 84}, - }, - 'creation_dt': datetime.datetime(2015, 4, 22, 6, 32, 4), - 'instrument': {'name': 'OLI_TIRS'}, - 'format': { - 'name': 'PSEUDOMD' - }, - 'lineage': { - 'source_datasets': {} - } - }, - id_, - pseudo_ls8_type.id - ) - assert was_inserted - d = index.datasets.get(id_) - # The dataset should have been matched to the telemetry type. - assert d.type.id == pseudo_ls8_type.id - - return d - - -@pytest.fixture -def pseudo_ls8_dataset2(index, initialised_postgres_db, pseudo_ls8_type): - # Like the previous dataset, but a day later in time. - id_ = str(uuid.uuid4()) - with initialised_postgres_db.connect() as connection: - was_inserted = connection.insert_dataset( - { - 'id': id_, - 'product_type': 'pseudo_ls8_data', - 'checksum_path': 'package.sha1', - 'ga_label': 'LS8_OLITIRS_STD-MD_P00_LC81160740742015089ASA00_' - '116_074_20150330T022553Z20150330T022657', - - 'ga_level': 'P00', - 'size_bytes': 637660782, - 'platform': { - 'code': 'LANDSAT_8' - }, - 'image': { - 'satellite_ref_point_start': {'x': 116, 'y': 74}, - 'satellite_ref_point_end': {'x': 116, 'y': 84}, - }, - # We're unlikely to have extent info for a raw dataset, we'll use it for search tests. - 'extent': { - 'from_dt': datetime.datetime(2014, 7, 27, 23, 48, 0, 343853), - 'to_dt': datetime.datetime(2014, 7, 27, 23, 52, 0, 343853), - 'coord': { - 'll': {'lat': -31.33333, 'lon': 149.78434}, - 'lr': {'lat': -31.37116, 'lon': 152.20094}, - 'ul': {'lat': -29.23394, 'lon': 149.85216}, - 'ur': {'lat': -29.26873, 'lon': 152.21782} - } - }, - 'creation_dt': datetime.datetime(2015, 4, 22, 6, 32, 4), - 'instrument': {'name': 'OLI_TIRS'}, - 'format': { - 'name': 'PSEUDOMD' - }, - 'lineage': { - 'source_datasets': {} - } - }, - id_, - pseudo_ls8_type.id - ) - assert was_inserted - d = index.datasets.get(id_) - # The dataset should have been matched to the telemetry type. - assert d.type.id == pseudo_ls8_type.id - - return d - - -# Datasets 3 and 4 mirror 1 and 2 but have a different path/row. -@pytest.fixture -def pseudo_ls8_dataset3(index: Index, - initialised_postgres_db: PostgresDb, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset) -> Dataset: - # Same as 1, but a different path/row - id_ = str(uuid.uuid4()) - dataset_doc = copy.deepcopy(pseudo_ls8_dataset.metadata_doc) - dataset_doc['id'] = id_ - dataset_doc['image'] = { - 'satellite_ref_point_start': {'x': 116, 'y': 85}, - 'satellite_ref_point_end': {'x': 116, 'y': 87}, - } - - with initialised_postgres_db.connect() as connection: - was_inserted = connection.insert_dataset( - dataset_doc, - id_, - pseudo_ls8_type.id - ) - assert was_inserted - d = index.datasets.get(id_) - # The dataset should have been matched to the telemetry type. - assert d.type.id == pseudo_ls8_type.id - return d - - -@pytest.fixture -def pseudo_ls8_dataset4(index: Index, - initialised_postgres_db: PostgresDb, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset2: Dataset) -> Dataset: - # Same as 2, but a different path/row - id_ = str(uuid.uuid4()) - dataset_doc = copy.deepcopy(pseudo_ls8_dataset2.metadata_doc) - dataset_doc['id'] = id_ - dataset_doc['image'] = { - 'satellite_ref_point_start': {'x': 116, 'y': 85}, - 'satellite_ref_point_end': {'x': 116, 'y': 87}, - } - - with initialised_postgres_db.connect() as connection: - was_inserted = connection.insert_dataset( - dataset_doc, - id_, - pseudo_ls8_type.id - ) - assert was_inserted - d = index.datasets.get(id_) - # The dataset should have been matched to the telemetry type. - assert d.type.id == pseudo_ls8_type.id - return d - - -@pytest.fixture -def ls5_dataset_w_children(index, clirunner, example_ls5_dataset_path, indexed_ls5_scene_products): - clirunner(['dataset', 'add', str(example_ls5_dataset_path)]) - doc = load_dataset_definition(example_ls5_dataset_path) - return index.datasets.get(doc.id, include_sources=True) - - -@pytest.fixture -def ls5_dataset_nbar_type(ls5_dataset_w_children: Dataset, - indexed_ls5_scene_products: List[DatasetType]) -> DatasetType: - for dataset_type in indexed_ls5_scene_products: - if dataset_type.name == ls5_dataset_w_children.type.name: - return dataset_type - else: - raise RuntimeError("LS5 type was not among types") - - -def test_search_dataset_equals(index: Index, pseudo_ls8_dataset: Dataset): - datasets = index.datasets.search_eager( - platform='LANDSAT_8' - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - datasets = index.datasets.search_eager( - platform='LANDSAT_8', - instrument='OLI_TIRS' - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # Wrong sensor name - with pytest.raises(ValueError): - datasets = index.datasets.search_eager( - platform='LANDSAT-8', - instrument='TM', - ) - - -def test_search_dataset_by_metadata(index: Index, pseudo_ls8_dataset: Dataset) -> None: - datasets = index.datasets.search_by_metadata( - {"platform": {"code": "LANDSAT_8"}, "instrument": {"name": "OLI_TIRS"}} - ) - datasets = list(datasets) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - datasets = index.datasets.search_by_metadata( - {"platform": {"code": "LANDSAT_5"}, "instrument": {"name": "TM"}} - ) - datasets = list(datasets) - assert len(datasets) == 0 - - -def test_search_day(index: Index, pseudo_ls8_dataset: Dataset) -> None: - # Matches day - datasets = index.datasets.search_eager( - time=datetime.date(2014, 7, 26) - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # Different day: no match - datasets = index.datasets.search_eager( - time=datetime.date(2014, 7, 27) - ) - assert len(datasets) == 0 - - -def test_search_dataset_ranges(index: Index, pseudo_ls8_dataset: Dataset) -> None: - # In the lat bounds. - datasets = index.datasets.search_eager( - lat=Range(-30.5, -29.5), - time=Range( - datetime.datetime(2014, 7, 26, 23, 0, 0), - datetime.datetime(2014, 7, 26, 23, 59, 0) - ) - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # Out of the lat bounds. - datasets = index.datasets.search_eager( - lat=Range(28, 32), - time=Range( - datetime.datetime(2014, 7, 26, 23, 48, 0), - datetime.datetime(2014, 7, 26, 23, 50, 0) - ) - ) - assert len(datasets) == 0 - - # Out of the time bounds - datasets = index.datasets.search_eager( - lat=Range(-30.5, -29.5), - time=Range( - datetime.datetime(2014, 7, 26, 21, 48, 0), - datetime.datetime(2014, 7, 26, 21, 50, 0) - ) - ) - assert len(datasets) == 0 - - # A dataset that overlaps but is not fully contained by the search bounds. - # TODO: Do we want overlap as the default behaviour? - # Should we distinguish between 'contains' and 'overlaps'? - datasets = index.datasets.search_eager( - lat=Range(-40, -30) - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # Single point search - datasets = index.datasets.search_eager( - lat=-30.0, - time=Range( - datetime.datetime(2014, 7, 26, 23, 0, 0), - datetime.datetime(2014, 7, 26, 23, 59, 0) - ) - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - datasets = index.datasets.search_eager( - lat=30.0, - time=Range( - datetime.datetime(2014, 7, 26, 23, 0, 0), - datetime.datetime(2014, 7, 26, 23, 59, 0) - ) - ) - assert len(datasets) == 0 - - # Single timestamp search - datasets = index.datasets.search_eager( - lat=Range(-30.5, -29.5), - time=datetime.datetime(2014, 7, 26, 23, 50, 0) - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - datasets = index.datasets.search_eager( - lat=Range(-30.5, -29.5), - time=datetime.datetime(2014, 7, 26, 23, 30, 0) - ) - assert len(datasets) == 0 - - -def test_search_globally(index: Index, pseudo_ls8_dataset: Dataset) -> None: - # Insert dataset. It should be matched to the telemetry collection. - # No expressions means get all. - results = list(index.datasets.search()) - assert len(results) == 1 - - # Dataset sources aren't loaded by default - assert results[0].sources is None - - -def _load_product_query( - lazy_results: Iterable[Tuple[DatasetType, Iterable[Dataset]]] -) -> Dict[str, List[Dataset]]: - """ - search_by_product() returns two levels of laziness. load them all into memory - for easy comparison/counts - """ - products = {} # type: Dict[str, List[Dataset]] - for product, datasets in lazy_results: - assert product.name not in products, "search_by_product() returned a product twice" - products[product.name] = list(datasets) - return products - - -def test_search_by_product(index: Index, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - indexed_ls5_scene_products, - ls5_dataset_w_children: Dataset) -> None: - # Query all the test data, the counts should match expected - results = _load_product_query(index.datasets.search_by_product()) - assert len(results) == 7 - dataset_count = sum(len(ds) for ds in results.values()) - assert dataset_count == 4 - - # Query one product - products = _load_product_query(index.datasets.search_by_product( - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert len(products) == 1 - [dataset] = products[pseudo_ls8_type.name] - assert dataset.id == pseudo_ls8_dataset.id - - -def test_search_limit(index, pseudo_ls8_dataset, pseudo_ls8_dataset2): - datasets = list(index.datasets.search()) - assert len(datasets) == 2 - datasets = list(index.datasets.search(limit=1)) - assert len(datasets) == 1 - datasets = list(index.datasets.search(limit=0)) - assert len(datasets) == 0 - datasets = list(index.datasets.search(limit=5)) - assert len(datasets) == 2 - - datasets = list(index.datasets.search_returning(('id',))) - assert len(datasets) == 2 - datasets = list(index.datasets.search_returning(('id',), limit=1)) - assert len(datasets) == 1 - datasets = list(index.datasets.search_returning(('id',), limit=0)) - assert len(datasets) == 0 - datasets = list(index.datasets.search_returning(('id',), limit=5)) - assert len(datasets) == 2 - - -def test_search_or_expressions(index: Index, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - ls5_dataset_nbar_type: DatasetType, - ls5_dataset_w_children: Dataset, - default_metadata_type: MetadataType, - telemetry_metadata_type: MetadataType) -> None: - # Four datasets: - # Our standard LS8 - # - type=ls8_telemetry - # LS5 with children: - # - type=ls5_nbar_scene - # - type=ls5_level1_scene - # - type=ls5_satellite_telemetry_data - - all_datasets = index.datasets.search_eager() - assert len(all_datasets) == 4 - all_ids = set(dataset.id for dataset in all_datasets) - - # OR all platforms: should return all datasets - datasets = index.datasets.search_eager( - platform=['LANDSAT_5', 'LANDSAT_7', 'LANDSAT_8'] - ) - assert len(datasets) == 4 - ids = set(dataset.id for dataset in datasets) - assert ids == all_ids - - # OR expression with only one clause. - datasets = index.datasets.search_eager( - platform=['LANDSAT_8'] - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # OR two products: return two - datasets = index.datasets.search_eager( - product=[pseudo_ls8_type.name, ls5_dataset_nbar_type.name] - ) - assert len(datasets) == 2 - ids = set(dataset.id for dataset in datasets) - assert ids == {pseudo_ls8_dataset.id, ls5_dataset_w_children.id} - - # eo OR telemetry: return all - datasets = index.datasets.search_eager( - metadata_type=[ - # LS5 + children - default_metadata_type.name, - # Nothing - telemetry_metadata_type.name, - # LS8 dataset - pseudo_ls8_type.metadata_type.name - ] - ) - assert len(datasets) == 4 - ids = set(dataset.id for dataset in datasets) - assert ids == all_ids - - # Redundant ORs should have no effect. - datasets = index.datasets.search_eager( - product=[pseudo_ls8_type.name, pseudo_ls8_type.name, pseudo_ls8_type.name] - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - -def test_search_returning(index: Index, - local_config: LocalConfig, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - ls5_dataset_w_children) -> None: - - assert index.datasets.count() == 4, "Expected four test datasets" - - # Expect one product with our one dataset. - results = list(index.datasets.search_returning( - ('id', 'sat_path', 'sat_row'), - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert len(results) == 1 - id_, path_range, sat_range = results[0] - assert id_ == pseudo_ls8_dataset.id - # TODO: output nicer types? - assert path_range == NumericRange(Decimal('116'), Decimal('116'), '[]') - assert sat_range == NumericRange(Decimal('74'), Decimal('84'), '[]') - - results = list(index.datasets.search_returning( - ('id', 'metadata_doc',), - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert len(results) == 1 - id_, document = results[0] - assert id_ == pseudo_ls8_dataset.id - assert document == pseudo_ls8_dataset.metadata_doc - - my_username = local_config.get('db_username', DEFAULT_DB_USER) - - # Mixture of document and native fields - results = list(index.datasets.search_returning( - ('id', 'creation_time', 'format', 'label'), - platform='LANDSAT_8', - indexed_by=my_username, - )) - assert len(results) == 1 - - id_, creation_time, format_, label = results[0] - - assert id_ == pseudo_ls8_dataset.id - assert format_ == 'PSEUDOMD' - - # It's always UTC in the document - expected_time = creation_time.astimezone(tz.tzutc()).replace(tzinfo=None) - assert expected_time.isoformat() == pseudo_ls8_dataset.metadata_doc['creation_dt'] - assert label == pseudo_ls8_dataset.metadata_doc['ga_label'] - - -def test_search_returning_rows(index, pseudo_ls8_type, - pseudo_ls8_dataset, pseudo_ls8_dataset2, - indexed_ls5_scene_products): - dataset = pseudo_ls8_dataset - - # If returning a field like uri, there will be one result per location. - - # No locations - results = list(index.datasets.search_returning( - ('id', 'uri'), - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert len(results) == 0 - - # Add a location to the dataset and we should get one result - test_uri = 'file:///tmp/test1' - index.datasets.add_location(dataset.id, test_uri) - results = list(index.datasets.search_returning( - ('id', 'uri'), - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert len(results) == 1 - assert results == [(dataset.id, test_uri)] - - # Add a second location and we should get two results - test_uri2 = 'file:///tmp/test2' - index.datasets.add_location(dataset.id, test_uri2) - results = set(index.datasets.search_returning( - ('id', 'uri'), - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert len(results) == 2 - assert results == { - (dataset.id, test_uri), - (dataset.id, test_uri2) - } - - # A second dataset now has a location too: - test_uri3 = 'mdss://c10/tmp/something' - index.datasets.add_location(pseudo_ls8_dataset2.id, test_uri3) - # Datasets and locations should still correctly match up... - results = set(index.datasets.search_returning( - ('id', 'uri'), - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert len(results) == 3 - assert results == { - (dataset.id, test_uri), - (dataset.id, test_uri2), - (pseudo_ls8_dataset2.id, test_uri3), - } - - -def test_searches_only_type(index: Index, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - ls5_telem_type) -> None: - # The dataset should have been matched to the telemetry type. - assert pseudo_ls8_dataset.type.id == pseudo_ls8_type.id - assert index.datasets.search_eager() - - # One result in the telemetry type - datasets = index.datasets.search_eager( - product=pseudo_ls8_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS', - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # One result in the metadata type - datasets = index.datasets.search_eager( - metadata_type=pseudo_ls8_type.metadata_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS', - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # No results when searching for a different dataset type. - with pytest.raises(ValueError): - datasets = index.datasets.search_eager( - product=ls5_telem_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS' - ) - - # One result when no types specified. - datasets = index.datasets.search_eager( - platform='LANDSAT_8', - instrument='OLI_TIRS' - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # No results for different metadata type. - with pytest.raises(ValueError): - datasets = index.datasets.search_eager( - metadata_type='telemetry', - platform='LANDSAT_8', - instrument='OLI_TIRS' - ) - - -def test_search_special_fields(index: Index, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - ls5_dataset_w_children) -> None: - # 'product' is a special case - datasets = index.datasets.search_eager( - product=pseudo_ls8_type.name - ) - assert len(datasets) == 1 - assert datasets[0].id == pseudo_ls8_dataset.id - - # Unknown field: no results - with pytest.raises(ValueError): - datasets = index.datasets.search_eager( - platform='LANDSAT_8', - flavour='chocolate', - ) - - -def test_search_by_uri(index, ls5_dataset_w_children): - datasets = index.datasets.search_eager(product=ls5_dataset_w_children.type.name, - uri=ls5_dataset_w_children.local_uri) - assert len(datasets) == 1 - - datasets = index.datasets.search_eager(product=ls5_dataset_w_children.type.name, - uri='file:///x/yz') - assert len(datasets) == 0 - - -def test_search_conflicting_types(index, pseudo_ls8_dataset, pseudo_ls8_type): - # Should return no results. - with pytest.raises(ValueError): - index.datasets.search_eager( - product=pseudo_ls8_type.name, - # The telemetry type is not of type storage_unit. - metadata_type='storage_unit' - ) - - -def test_fetch_all_of_md_type(index: Index, pseudo_ls8_dataset: Dataset) -> None: - # Get every dataset of the md type. - assert pseudo_ls8_dataset.metadata_type is not None # to shut up mypy - results = index.datasets.search_eager( - metadata_type=pseudo_ls8_dataset.metadata_type.name - ) - assert len(results) == 1 - assert results[0].id == pseudo_ls8_dataset.id - # Get every dataset of the type. - results = index.datasets.search_eager( - product=pseudo_ls8_dataset.type.name - ) - assert len(results) == 1 - assert results[0].id == pseudo_ls8_dataset.id - - # No results for another. - with pytest.raises(ValueError): - results = index.datasets.search_eager( - metadata_type='telemetry' - ) - - -def test_count_searches(index: Index, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - ls5_telem_type) -> None: - # The dataset should have been matched to the telemetry type. - assert pseudo_ls8_dataset.type.id == pseudo_ls8_type.id - assert index.datasets.search_eager() - - # One result in the telemetry type - datasets = index.datasets.count( - product=pseudo_ls8_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS', - ) - assert datasets == 1 - - # One result in the metadata type - datasets = index.datasets.count( - metadata_type=pseudo_ls8_type.metadata_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS', - ) - assert datasets == 1 - - # No results when searching for a different dataset type. - datasets = index.datasets.count( - product=ls5_telem_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS' - ) - assert datasets == 0 - - # One result when no types specified. - datasets = index.datasets.count( - platform='LANDSAT_8', - instrument='OLI_TIRS', - ) - assert datasets == 1 - - # No results for different metadata type. - datasets = index.datasets.count( - metadata_type='telemetry', - platform='LANDSAT_8', - instrument='OLI_TIRS' - ) - assert datasets == 0 - - -def test_get_dataset_with_children(index: Index, ls5_dataset_w_children: Dataset) -> None: - id_ = ls5_dataset_w_children.id - assert isinstance(id_, UUID) - - # Sources not loaded by default - d = index.datasets.get(id_) - assert d.sources is None - - # Ask for all sources - d = index.datasets.get(id_, include_sources=True) - assert list(d.sources.keys()) == ['level1'] - level1 = d.sources['level1'] - assert list(level1.sources.keys()) == ['satellite_telemetry_data'] - assert list(level1.sources['satellite_telemetry_data'].sources) == [] - - # It should also work with a string id - d = index.datasets.get(str(id_), include_sources=True) - assert list(d.sources.keys()) == ['level1'] - level1 = d.sources['level1'] - assert list(level1.sources.keys()) == ['satellite_telemetry_data'] - assert list(level1.sources['satellite_telemetry_data'].sources) == [] - - -def test_count_by_product_searches(index: Index, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - ls5_telem_type: DatasetType) -> None: - # The dataset should have been matched to the telemetry type. - assert pseudo_ls8_dataset.type.id == pseudo_ls8_type.id - assert index.datasets.search_eager() - - # One result in the telemetry type - products = tuple(index.datasets.count_by_product( - product=pseudo_ls8_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert products == ((pseudo_ls8_type, 1),) - - # One result in the metadata type - products = tuple(index.datasets.count_by_product( - metadata_type=pseudo_ls8_type.metadata_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert products == ((pseudo_ls8_type, 1),) - - # No results when searching for a different dataset type. - products = tuple(index.datasets.count_by_product( - product=ls5_telem_type.name, - platform='LANDSAT_8', - instrument='OLI_TIRS' - )) - assert products == () - - # One result when no types specified. - products = tuple(index.datasets.count_by_product( - platform='LANDSAT_8', - instrument='OLI_TIRS', - )) - assert products == ((pseudo_ls8_type, 1),) - - # Only types with datasets should be returned (these params match ls5_gtiff too) - products = tuple(index.datasets.count_by_product()) - assert products == ((pseudo_ls8_type, 1),) - - # No results for different metadata type. - products = tuple(index.datasets.count_by_product( - metadata_type='telemetry', - )) - assert products == () - - -def test_count_time_groups(index: Index, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset) -> None: - # 'from_dt': datetime.datetime(2014, 7, 26, 23, 48, 0, 343853), - # 'to_dt': datetime.datetime(2014, 7, 26, 23, 52, 0, 343853), - timeline = list(index.datasets.count_product_through_time( - '1 day', - product=pseudo_ls8_type.name, - time=Range( - datetime.datetime(2014, 7, 25, tzinfo=tz.tzutc()), - datetime.datetime(2014, 7, 27, tzinfo=tz.tzutc()) - ) - )) - - assert len(timeline) == 2 - assert timeline == [ - ( - Range(datetime.datetime(2014, 7, 25, tzinfo=tz.tzutc()), - datetime.datetime(2014, 7, 26, tzinfo=tz.tzutc())), - 0 - ), - ( - Range(datetime.datetime(2014, 7, 26, tzinfo=tz.tzutc()), - datetime.datetime(2014, 7, 27, tzinfo=tz.tzutc())), - 1 - ) - ] - - -@pytest.mark.usefixtures('ga_metadata_type', - 'indexed_ls5_scene_products') -def test_source_filter(clirunner, index, example_ls5_dataset_path): - clirunner( - [ - 'dataset', - 'add', - str(example_ls5_dataset_path) - ] - ) - - all_nbar = index.datasets.search_eager(product='ls5_nbar_scene') - assert len(all_nbar) == 1 - all_level1 = index.datasets.search_eager(product='ls5_level1_scene') - assert len(all_level1) == 1 - assert all_level1[0].metadata.gsi == 'ASA' - - dss = index.datasets.search_eager( - product='ls5_nbar_scene', - source_filter={'product': 'ls5_level1_scene', 'gsi': 'ASA'} - ) - assert dss == all_nbar - dss = index.datasets.search_eager( - product='ls5_nbar_scene', - source_filter={'product': 'ls5_level1_scene', 'gsi': 'GREG'} - ) - assert dss == [] - - with pytest.raises(RuntimeError): - dss = index.datasets.search_eager( - product='ls5_nbar_scene', - source_filter={'gsi': 'ASA'} - ) - - -def test_count_time_groups_cli(clirunner: Any, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset) -> None: - result = clirunner( - [ - 'product-counts', - '1 day', - 'time in [2014-07-25, 2014-07-27]' - ], cli_method=datacube.scripts.search_tool.cli, - verbose_flag='' - ) - - expected_out = ( - '{}\n' - ' 2014-07-25: 0\n' - ' 2014-07-26: 1\n' - ).format(pseudo_ls8_type.name) - - assert result.output.endswith(expected_out) - - -def test_search_cli_basic(clirunner: Any, - telemetry_metadata_type: MetadataType, - pseudo_ls8_dataset: Dataset) -> None: - """ - Search datasets using the cli. - """ - result = clirunner( - [ - # No search arguments: return all datasets. - 'datasets' - ], cli_method=datacube.scripts.search_tool.cli - ) - - assert str(pseudo_ls8_dataset.id) in result.output - assert str(telemetry_metadata_type.name) in result.output - - assert result.exit_code == 0 - - -def test_cli_info(index: Index, - clirunner: Any, - pseudo_ls8_dataset: Dataset, - pseudo_ls8_dataset2: Dataset) -> None: - """ - Search datasets using the cli. - """ - index.datasets.add_location(pseudo_ls8_dataset.id, 'file:///tmp/location1') - index.datasets.add_location(pseudo_ls8_dataset.id, 'file:///tmp/location2') - - opts = [ - 'dataset', 'info', str(pseudo_ls8_dataset.id) - ] - result = clirunner(opts, verbose_flag='') - - output = result.output - # Remove WARNING messages for experimental driver - output_lines = [line for line in output.splitlines() if "WARNING:" not in line] - output = "\n".join(output_lines) - - # Should be a valid yaml - yaml_docs = list(yaml.safe_load_all(output)) - assert len(yaml_docs) == 1 - - # We output properties in order for readability: - output_lines = [line for line in output_lines if not line.startswith('indexed:')] - expected_lines = [ - "id: " + str(pseudo_ls8_dataset.id), - 'product: ls8_telemetry', - 'status: active', - # Newest location first - 'locations:', - '- file:///tmp/location2', - '- file:///tmp/location1', - 'fields:', - ' creation_time: 2015-04-22 06:32:04', - ' format: PSEUDOMD', - ' gsi: null', - ' instrument: OLI_TIRS', - ' label: LS8_OLITIRS_STD-MD_P00_LC81160740742015089ASA00_116_074_20150330T022553Z20150330T022657', - ' lat: {begin: -31.37116, end: -29.23394}', - ' lon: {begin: 149.78434, end: 152.21782}', - ' orbit: null', - ' platform: LANDSAT_8', - ' product_type: pseudo_ls8_data', - ' sat_path: {begin: 116, end: 116}', - ' sat_row: {begin: 74, end: 84}', - " time: {begin: '2014-07-26T23:48:00.343853', end: '2014-07-26T23:52:00.343853'}", - ] - assert expected_lines == output_lines - - # Check indexed time separately, as we don't care what timezone it's displayed in. - indexed_time = yaml_docs[0]['indexed'] - assert isinstance(indexed_time, datetime.datetime) - assert assume_utc(indexed_time) == assume_utc(pseudo_ls8_dataset.indexed_time) - - # Request two, they should have separate yaml documents - opts.append(str(pseudo_ls8_dataset2.id)) - - result = clirunner(opts) - yaml_docs = list(yaml.safe_load_all(result.output)) - assert len(yaml_docs) == 2, "Two datasets should produce two sets of info" - assert yaml_docs[0]['id'] == str(pseudo_ls8_dataset.id) - assert yaml_docs[1]['id'] == str(pseudo_ls8_dataset2.id) - - -def assume_utc(d): - if d.tzinfo is None: - return d.replace(tzinfo=tz.tzutc()) - else: - return d.astimezone(tz.tzutc()) - - -def test_cli_missing_info(clirunner, initialised_postgres_db): - id_ = str(uuid.uuid4()) - result = clirunner( - [ - 'dataset', 'info', id_ - ], - catch_exceptions=False, - expect_success=False, - verbose_flag=False - ) - assert result.exit_code == 1, "Should return exit status when dataset is missing" - # This should have been output to stderr, but the CliRunner doesnit distinguish - assert result.output.endswith("{id} missing\n".format(id=id_)) - - -def test_find_duplicates(index, pseudo_ls8_type, - pseudo_ls8_dataset, pseudo_ls8_dataset2, pseudo_ls8_dataset3, pseudo_ls8_dataset4, - ls5_dataset_w_children): - # type: (Index, DatasetType, Dataset, Dataset, Dataset, Dataset, Dataset) -> None - - # Our four ls8 datasets and three ls5. - all_datasets = index.datasets.search_eager() - assert len(all_datasets) == 7 - - # First two ls8 datasets have the same path/row, last two have a different row. - expected_ls8_path_row_duplicates = [ - ( - ( - NumericRange(Decimal('116'), Decimal('116'), '[]'), - NumericRange(Decimal('74'), Decimal('84'), '[]') - ), - {pseudo_ls8_dataset.id, pseudo_ls8_dataset2.id} - ), - ( - ( - NumericRange(Decimal('116'), Decimal('116'), '[]'), - NumericRange(Decimal('85'), Decimal('87'), '[]') - ), - {pseudo_ls8_dataset3.id, pseudo_ls8_dataset4.id} - ), - - ] - - # Specifying groups as fields: - f = pseudo_ls8_type.metadata_type.dataset_fields.get - field_res = sorted(index.datasets.search_product_duplicates( - pseudo_ls8_type, - f('sat_path'), f('sat_row') - )) - assert field_res == expected_ls8_path_row_duplicates - # Field names as strings - product_res = sorted(index.datasets.search_product_duplicates( - pseudo_ls8_type, - 'sat_path', 'sat_row' - )) - assert product_res == expected_ls8_path_row_duplicates - - # Get duplicates that start on the same day - f = pseudo_ls8_type.metadata_type.dataset_fields.get - field_res = sorted(index.datasets.search_product_duplicates( - pseudo_ls8_type, - f('time').lower.day # type: ignore - )) - - # Datasets 1 & 3 are on the 26th. - # Datasets 2 & 4 are on the 27th. - assert field_res == [ - ( - ( - datetime.datetime(2014, 7, 26, 0, 0), - ), - {pseudo_ls8_dataset.id, pseudo_ls8_dataset3.id} - ), - ( - ( - datetime.datetime(2014, 7, 27, 0, 0), - ), - {pseudo_ls8_dataset2.id, pseudo_ls8_dataset4.id} - ), - - ] - - # No LS5 duplicates: there's only one of each - sat_res = sorted(index.datasets.search_product_duplicates( - ls5_dataset_w_children.type, - 'sat_path', 'sat_row' - )) - assert sat_res == [] - - -def test_csv_search_via_cli(clirunner: Any, - pseudo_ls8_type: DatasetType, - pseudo_ls8_dataset: Dataset, - pseudo_ls8_dataset2: Dataset) -> None: - """ - Search datasets via the cli with csv output - """ - - # Test dataset is: - # platform: LANDSAT_8 - # from: 2014-7-26 23:48:00 - # to: 2014-7-26 23:52:00 - # coords: - # ll: (-31.33333, 149.78434) - # lr: (-31.37116, 152.20094) - # ul: (-29.23394, 149.85216) - # ur: (-29.26873, 152.21782) - - # Dataset 2 is the same but on day 2014-7-27 - - def matches_both(*args): - rows = _cli_csv_search(('datasets',) + args, clirunner) - assert len(rows) == 2 - assert {rows[0]['id'], rows[1]['id']} == {str(pseudo_ls8_dataset.id), str(pseudo_ls8_dataset2.id)} - - def matches_1(*args): - rows = _cli_csv_search(('datasets',) + args, clirunner) - assert len(rows) == 1 - assert rows[0]['id'] == str(pseudo_ls8_dataset.id) - - def matches_none(*args): - rows = _cli_csv_search(('datasets',) + args, clirunner) - assert len(rows) == 0 - - def no_such_product(*args): - with pytest.raises(ValueError): - _cli_csv_search(('datasets',) + args, clirunner) - - matches_both('lat in [-40, -10]') - matches_both('product=' + pseudo_ls8_type.name) - - # Don't return on a mismatch - matches_none('lat in [150, 160]') - - # Match only a single dataset using multiple fields - matches_1('platform=LANDSAT_8', 'time in [2014-07-24, 2014-07-26]') - - # One matching field, one non-matching - no_such_product('time in [2014-07-24, 2014-07-26]', 'platform=LANDSAT_5') - - # Test date shorthand - matches_both('time in [2014-07, 2014-07]') - matches_none('time in [2014-06, 2014-06]') - - matches_both('time in 2014-07') - matches_none('time in 2014-08') - matches_both('time in 2014') - matches_none('time in 2015') - - matches_both('time in [2014, 2014]') - matches_both('time in [2013, 2014]') - matches_none('time in [2015, 2015]') - matches_none('time in [2013, 2013]') - - matches_both('time in [2014-7, 2014-8]') - matches_none('time in [2014-6, 2014-6]') - matches_both('time in [2005, 2015]') - - -# Headers are currently in alphabetical order. -_EXPECTED_OUTPUT_HEADER = 'creation_time,dataset_type_id,format,gsi,id,indexed_by,indexed_time,' \ - 'instrument,label,lat,lon,metadata_doc,metadata_type,metadata_type_id,' \ - 'orbit,platform,product,product_type,sat_path,sat_row,time,uri' - - -def test_csv_structure(clirunner, pseudo_ls8_type, ls5_telem_type, - pseudo_ls8_dataset, pseudo_ls8_dataset2): - output = _csv_search_raw(['datasets', ' lat in [-40, -10]'], clirunner) - lines = [line.strip() for line in output.split('\n') if line] - # A header and two dataset rows - assert len(lines) == 3 - - assert lines[0] == _EXPECTED_OUTPUT_HEADER - - -def test_query_dataset_multi_product(index: Index, ls5_dataset_w_children: Dataset): - # We have one ls5 level1 and its child nbar - dc = Datacube(index) - - # Can we query a single product name? - datasets = dc.find_datasets(product='ls5_nbar_scene') - assert len(datasets) == 1 - - # Can we query multiple products? - datasets = dc.find_datasets(product=['ls5_nbar_scene', 'ls5_level1_scene']) - assert len(datasets) == 2 - - # Can we query multiple products in a tuple - datasets = dc.find_datasets(product=('ls5_nbar_scene', 'ls5_level1_scene')) - assert len(datasets) == 2 - - -def _cli_csv_search(args, clirunner): - # Do a CSV search from the cli, returning results as a list of dictionaries - output = _csv_search_raw(args, clirunner) - return list(csv.DictReader(io.StringIO(output))) - - -def _csv_search_raw(args, clirunner): - # Do a CSV search from the cli, returning output as a string - result = clirunner(['-f', 'csv'] + list(args), cli_method=datacube.scripts.search_tool.cli, verbose_flag=False) - output = result.output - output_lines = output.split("\n") - return "\n".join(line for line in output_lines if "WARNING:" not in line) diff -Nru datacube-1.8.7/integration_tests/index/test_update_columns.py datacube-1.8.9/integration_tests/index/test_update_columns.py --- datacube-1.8.7/integration_tests/index/test_update_columns.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/index/test_update_columns.py 2022-11-17 00:47:28.000000000 +0000 @@ -58,7 +58,7 @@ result = clirunner(["system", "init"]) assert "Created." in result.output - with uninitialised_postgres_db.connect() as connection: + with uninitialised_postgres_db._connect() as connection: assert check_column(connection, _schema.METADATA_TYPE.name, "updated") assert not check_column(connection, _schema.METADATA_TYPE.name, "fake_column") assert check_column(connection, _schema.PRODUCT.name, "updated") @@ -81,7 +81,7 @@ result = clirunner(["system", "init"]) assert "Created." in result.output - with uninitialised_postgres_db.connect() as connection: + with uninitialised_postgres_db._connect() as connection: # Drop all the columns for an init rerun drop_column(connection, _schema.METADATA_TYPE.name, "updated") drop_column(connection, _schema.PRODUCT.name, "updated") @@ -95,7 +95,7 @@ result = clirunner(["system", "init"]) - with uninitialised_postgres_db.connect() as connection: + with uninitialised_postgres_db._connect() as connection: assert check_column(connection, _schema.METADATA_TYPE.name, "updated") assert check_column(connection, _schema.PRODUCT.name, "updated") assert check_column(connection, _schema.DATASET.name, "updated") diff -Nru datacube-1.8.7/integration_tests/test_celery_runner.py datacube-1.8.9/integration_tests/test_celery_runner.py --- datacube-1.8.7/integration_tests/test_celery_runner.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_celery_runner.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,157 +0,0 @@ -# This file is part of the Open Data Cube, see https://opendatacube.org for more information -# -# Copyright (c) 2015-2020 ODC Contributors -# SPDX-License-Identifier: Apache-2.0 -""" -Tests for datacube._celery_runner -""" - -from time import sleep -import subprocess -import pytest -import sys - -cr = pytest.importorskip("datacube._celery_runner") - -PORT = 29374 -PASS = 'dfhksdjh23iuervao' -WRONG_PASS = 'sdfghdfjsghjdfiueuiwei' -REDIS_WAIT = 0.5 - - -def check_redis_binary(): - try: - return subprocess.check_call(['redis-server', '--version']) == 0 - except Exception: - return False - - -have_redis = check_redis_binary() -skip_if_no_redis = pytest.mark.skipif(not have_redis, reason="Needs redis-server to run") - - -@skip_if_no_redis -def test_launch_redis_no_password(): - is_running = cr.check_redis(port=PORT) - assert is_running is False, "Redis should not be running at the start of the test" - - redis_stop = cr.launch_redis(PORT, password=None, loglevel='verbose') - assert redis_stop is not None - - sleep(REDIS_WAIT) - is_running = cr.check_redis(port=PORT) - assert is_running is True - - redis_stop() - sleep(REDIS_WAIT) - is_running = cr.check_redis(port=PORT) - assert is_running is False - - -@skip_if_no_redis -def test_launch_redis_with_config_password(): - is_running = cr.check_redis(port=PORT) - assert is_running is False, "Redis should not be running at the start of the test" - - redis_stop = cr.launch_redis(PORT, password='', loglevel='verbose') - assert redis_stop is not None - - sleep(REDIS_WAIT) - is_running = cr.check_redis(port=PORT, password='') - assert is_running is True - - redis_stop() - sleep(REDIS_WAIT) - is_running = cr.check_redis(port=PORT, password='') - assert is_running is False - - -@skip_if_no_redis -def test_launch_redis_with_custom_password(): - is_running = cr.check_redis(port=PORT) - assert is_running is False, "Redis should not be running at the start of the test" - - redis_stop = cr.launch_redis(PORT, password=PASS, loglevel='verbose') - assert redis_stop is not None - - sleep(REDIS_WAIT) - is_running = cr.check_redis(port=PORT, password=PASS) - assert is_running is True - - is_running = cr.check_redis(port=PORT, password=WRONG_PASS) - assert is_running is False - - redis_stop() - sleep(REDIS_WAIT) - is_running = cr.check_redis(port=PORT, password=PASS) - assert is_running is False - - -def _echo(x, please_fail=False): - if please_fail: - raise IOError('Fake I/O error, cause you asked') - return x - - -@pytest.mark.timeout(30) -@pytest.mark.skipif(sys.platform == 'win32', - reason="does not run on Windows") -@skip_if_no_redis -def test_celery_with_worker(): - DATA = [1, 2, 3, 4] - - def launch_worker(): - args = ['bash', '-c', - 'nohup {} -m datacube.execution.worker --executor celery localhost:{} --nprocs 1 &'.format( - sys.executable, PORT)] - try: - subprocess.check_call(args) - except subprocess.CalledProcessError: - return False - - return True - - assert cr.check_redis(port=PORT, password='') is False, "Redis should not be running at the start of the test" - - runner = cr.CeleryExecutor(host='localhost', port=PORT, password='') - sleep(REDIS_WAIT) - - assert cr.check_redis(port=PORT, password='') - - # no workers yet - future = runner.submit(_echo, 0) - assert future.ready() is False - runner.release(future) - - futures = runner.map(_echo, DATA) - assert len(futures) == len(DATA) - - completed, failed, pending = runner.get_ready(futures) - - assert len(completed) == 0 - assert len(failed) == 0 - assert len(pending) == len(DATA) - # not worker test done - - worker_started_ok = launch_worker() - assert worker_started_ok - - futures = runner.map(_echo, DATA) - results = runner.results(futures) - - assert len(results) == len(DATA) - assert set(results) == set(DATA) - - # Test failure pass-through - future = runner.submit(_echo, "", please_fail=True) - - for ff in runner.as_completed([future]): - assert ff.ready() is True - with pytest.raises(IOError): - runner.result(ff) - - del runner - - # Redis shouldn't be running now. - is_running = cr.check_redis(port=PORT) - assert is_running is False diff -Nru datacube-1.8.7/integration_tests/test_cli_output.py datacube-1.8.9/integration_tests/test_cli_output.py --- datacube-1.8.7/integration_tests/test_cli_output.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_cli_output.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,138 @@ +import pytest + + +def test_cli_product_subcommand(index_empty, clirunner, dataset_add_configs): + runner = clirunner(['product', 'update'], verbose_flag=False, expect_success=False) + assert "Usage: [OPTIONS] [FILES]" in runner.output + assert "Update existing products." in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['product', 'update', dataset_add_configs.empty_file], verbose_flag=False, expect_success=False) + assert "All files are empty, exit" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['product', 'add'], verbose_flag=False, expect_success=False) + assert "Usage: [OPTIONS] [FILES]" in runner.output + assert "Add or update products in" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['product', 'add', dataset_add_configs.empty_file], verbose_flag=False, expect_success=False) + assert "All files are empty, exit" in runner.output + assert runner.exit_code == 1 + + +def test_cli_metadata_subcommand(index_empty, clirunner, dataset_add_configs): + runner = clirunner(['metadata', 'update'], verbose_flag=False, expect_success=False) + assert "Usage: [OPTIONS] [FILES]" in runner.output + assert "Update existing metadata types." in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['metadata', 'update', dataset_add_configs.empty_file], verbose_flag=False, expect_success=False) + assert "All files are empty, exit" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['metadata', 'add'], verbose_flag=False, expect_success=False) + assert "Usage: [OPTIONS] [FILES]" in runner.output + assert "Add or update metadata types in" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['metadata', 'add', dataset_add_configs.empty_file], verbose_flag=False, expect_success=False) + assert "All files are empty, exit" in runner.output + assert runner.exit_code == 1 + + +def test_cli_dataset_subcommand(index_empty, clirunner, dataset_add_configs): + clirunner(['metadata', 'add', dataset_add_configs.metadata]) + clirunner(['product', 'add', dataset_add_configs.products]) + + runner = clirunner(['dataset', 'add'], verbose_flag=False, expect_success=False) + assert "Indexing datasets [####################################] 100%" not in runner.output + assert "Usage: [OPTIONS] [DATASET_PATHS]" in runner.output + assert "Add datasets" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['dataset', 'update'], verbose_flag=False, expect_success=False) + assert "0 successful, 0 failed" not in runner.output + assert "Usage: [OPTIONS] [DATASET_PATHS]" in runner.output + assert "Update datasets" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['dataset', 'info'], verbose_flag=False, expect_success=False) + assert "Usage: [OPTIONS] [IDS]" in runner.output + assert "Display dataset information" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['dataset', 'uri-search'], verbose_flag=False, expect_success=False) + assert "Usage: [OPTIONS] [PATHS]" in runner.output + assert "Search by dataset locations" in runner.output + assert runner.exit_code == 1 + + if index_empty.supports_legacy: + clirunner(['dataset', 'add', dataset_add_configs.datasets]) + else: + # Does not support legacy datasets + with pytest.raises(ValueError): + # Expect to fail with legacy datasets + clirunner(['dataset', 'add', dataset_add_configs.datasets]) + # Use EO3 datasets to allow subsequent tests to run. + clirunner(['dataset', 'add', dataset_add_configs.datasets_eo3]) + + runner = clirunner(['dataset', 'archive'], verbose_flag=False, expect_success=False) + assert "Completed dataset archival." not in runner.output + assert "Usage: [OPTIONS] [IDS]" in runner.output + assert "Archive datasets" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['dataset', 'archive', "--all"], verbose_flag=False) + assert "Completed dataset archival." in runner.output + assert "Usage: [OPTIONS] [IDS]" not in runner.output + assert "Archive datasets" not in runner.output + assert runner.exit_code == 0 + + runner = clirunner(['dataset', 'restore'], verbose_flag=False, expect_success=False) + assert "Usage: [OPTIONS] [IDS]" in runner.output + assert "Restore datasets" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['dataset', 'restore', "--all"], verbose_flag=False) + assert "restoring" in runner.output + assert "Usage: [OPTIONS] [IDS]" not in runner.output + assert "Restore datasets" not in runner.output + assert runner.exit_code == 0 + + runner = clirunner(['dataset', 'purge'], verbose_flag=False, expect_success=False) + assert "Completed dataset purge." not in runner.output + assert "Usage: [OPTIONS] [IDS]" in runner.output + assert "Purge archived datasets" in runner.output + assert runner.exit_code == 1 + + runner = clirunner(['dataset', 'purge', "--all"], verbose_flag=False) + assert "Completed dataset purge." in runner.output + assert "Usage: [OPTIONS] [IDS]" not in runner.output + assert runner.exit_code == 0 + + +def test_readd_and_update_metadata_product_dataset_command(index_empty, clirunner, dataset_add_configs): + clirunner(['metadata', 'add', dataset_add_configs.metadata]) + rerun_add = clirunner(['metadata', 'add', dataset_add_configs.metadata]) + assert "WARNING Metadata Type" in rerun_add.output + assert "is already in the database" in rerun_add.output + + update = clirunner(['metadata', 'update', dataset_add_configs.metadata]) + assert "WARNING No changes detected for metadata type" in update.output + + add = clirunner(['product', 'add', dataset_add_configs.products]) + rerun_add = clirunner(['product', 'add', dataset_add_configs.products]) + assert "WARNING Product" in rerun_add.output + assert "is already in the database" in rerun_add.output + + update = clirunner(['product', 'update', dataset_add_configs.products]) + assert "WARNING No changes detected for product" in update.output + + clirunner(['dataset', 'add', dataset_add_configs.datasets_eo3]) + rerun_add = clirunner(['dataset', 'add', dataset_add_configs.datasets_eo3]) + assert "WARNING Dataset" in rerun_add.output + assert "is already in the database" in rerun_add.output + + update = clirunner(['dataset', 'update', dataset_add_configs.datasets_eo3]) + assert "1 successful, 0 failed" in update.output diff -Nru datacube-1.8.7/integration_tests/test_config_tool.py datacube-1.8.9/integration_tests/test_config_tool.py --- datacube-1.8.7/integration_tests/test_config_tool.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_config_tool.py 2022-11-17 00:47:28.000000000 +0000 @@ -20,20 +20,18 @@ INVALID_MAPPING_DOCS = map(str, Path(__file__).parent.parent.joinpath('docs').glob('*')) -def _dataset_type_count(db): - with db.connect() as connection: +def _dataset_type_count(index): + with index._active_connection() as connection: return len(list(connection.get_all_products())) -def test_add_example_dataset_types(clirunner, initialised_postgres_db, default_metadata_type): +def test_add_example_dataset_types(clirunner, index, default_metadata_type): """ Add example mapping docs, to ensure they're valid and up-to-date. We add them all to a single database to check for things like duplicate ids. - - :type initialised_postgres_db: datacube.drivers.postgres._connections.PostgresDb """ - existing_mappings = _dataset_type_count(initialised_postgres_db) + existing_mappings = _dataset_type_count(index) print('{} mappings'.format(existing_mappings)) for mapping_path in EXAMPLE_DATASET_TYPE_DOCS: @@ -42,7 +40,7 @@ result = clirunner(['-v', 'product', 'add', mapping_path]) assert result.exit_code == 0 - mappings_count = _dataset_type_count(initialised_postgres_db) + mappings_count = _dataset_type_count(index) assert mappings_count > existing_mappings, "Mapping document was not added: " + str(mapping_path) existing_mappings = mappings_count @@ -79,11 +77,8 @@ assert result.exit_code == 0 -def test_error_returned_on_invalid(clirunner, initialised_postgres_db): - """ - :type initialised_postgres_db: datacube.drivers.postgres._connections.PostgresDb - """ - assert _dataset_type_count(initialised_postgres_db) == 0 +def test_error_returned_on_invalid(clirunner, index): + assert _dataset_type_count(index) == 0 for mapping_path in INVALID_MAPPING_DOCS: result = clirunner( @@ -95,10 +90,10 @@ expect_success=False ) assert result.exit_code != 0, "Success return code for invalid document." - assert _dataset_type_count(initialised_postgres_db) == 0, "Invalid document was added to DB" + assert _dataset_type_count(index) == 0, "Invalid document was added to DB" -def test_config_check(clirunner, initialised_postgres_db, local_config): +def test_config_check(clirunner, index, local_config): """ :type local_config: datacube.config.LocalConfig """ @@ -119,7 +114,7 @@ assert user_regex.match(result.output) -def test_list_users_does_not_fail(clirunner, local_config, initialised_postgres_db): +def test_list_users_does_not_fail(clirunner, local_config, index): """ :type local_config: datacube.config.LocalConfig """ @@ -149,6 +144,7 @@ assert "eo3 " in result.output +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_db_init_rebuild(clirunner, local_config, ls5_telem_type): if local_config._env == "datacube": from datacube.drivers.postgres import _dynamic @@ -177,31 +173,31 @@ ) in result.output -def test_db_init(clirunner, initialised_postgres_db): - if initialised_postgres_db.driver_name == "postgis": +def test_db_init(clirunner, index): + if index._db.driver_name == "postgis": from datacube.drivers.postgis._core import drop_db, has_schema else: from datacube.drivers.postgres._core import drop_db, has_schema - with initialised_postgres_db.connect() as connection: + with index._db._connect() as connection: drop_db(connection._connection) - assert not has_schema(initialised_postgres_db._engine, connection._connection) + assert not has_schema(index._db._engine, connection._connection) # Run on an empty database. - if initialised_postgres_db.driver_name == "postgis": + if index._db.driver_name == "postgis": result = clirunner(['-E', 'experimental', 'system', 'init']) else: result = clirunner(['system', 'init']) assert 'Created.' in result.output - with initialised_postgres_db.connect() as connection: - assert has_schema(initialised_postgres_db._engine, connection._connection) + with index._db._connect() as connection: + assert has_schema(index._db._engine, connection._connection) -def test_add_no_such_product(clirunner, initialised_postgres_db): - result = clirunner(['dataset', 'add', '--dtype', 'no_such_product'], expect_success=False) +def test_add_no_such_product(clirunner, index): + result = clirunner(['dataset', 'add', '--dtype', 'no_such_product', '/tmp'], expect_success=False) assert result.exit_code != 0 assert "DEPRECATED option detected" in result.output assert "ERROR Supplied product name" in result.output @@ -213,13 +209,13 @@ # Test that names are escaped ('test_user_"invalid+_chars_{n}', None), ('test_user_invalid_desc_{n}', 'Invalid "\' chars in description')]) -def example_user(clirunner, initialised_postgres_db, request): +def example_user(clirunner, index, request): username, description = request.param username = username.format(n=random.randint(111111, 999999)) # test_roles = (user_name for role_name, user_name, desc in roles if user_name.startswith('test_')) - with initialised_postgres_db.connect() as connection: + with index._db._connect() as connection: users = (user_name for role_name, user_name, desc in connection.list_users()) if username in users: connection.drop_users([username]) @@ -229,7 +225,7 @@ yield username, description - with initialised_postgres_db.connect() as connection: + with index._db._connect() as connection: users = (user_name for role_name, user_name, desc in connection.list_users()) if username in users: connection.drop_users([username]) diff -Nru datacube-1.8.7/integration_tests/test_dataset_add.py datacube-1.8.9/integration_tests/test_dataset_add.py --- datacube-1.8.7/integration_tests/test_dataset_add.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_dataset_add.py 2022-11-17 00:47:28.000000000 +0000 @@ -4,6 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 import math +import pytest import toolz import yaml @@ -24,7 +25,7 @@ ds_ = index.datasets.get(ds.id, include_sources=True) assert ds_ is not None - assert str(ds_.id) == ds.id + assert ds_.id == ds.id assert ds_.sources == {} assert index.datasets.get(ds.sources['ab'].id) is None @@ -195,12 +196,30 @@ assert 'ERROR Failed reading documents from ' in r.output +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_dataset_add_no_id(dataset_add_configs, index_empty, clirunner): + p = dataset_add_configs + index = index_empty + ds_no_id = load_dataset_definition(p.datasets_no_id) + + clirunner(['metadata', 'add', p.metadata]) + clirunner(['product', 'add', p.products]) + + # Check .hl.Doc2Dataset + doc2ds = Doc2Dataset(index) + _ds, _err = doc2ds(ds_no_id, 'file:///something') + assert _err == 'No id defined in dataset doc' + + +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_dataset_add(dataset_add_configs, index_empty, clirunner): p = dataset_add_configs index = index_empty r = clirunner(['dataset', 'add', p.datasets], expect_success=False) assert r.exit_code != 0 - assert 'Found no products' in r.output + assert 'Found no matching products' in r.output clirunner(['metadata', 'add', p.metadata]) clirunner(['product', 'add', p.products]) @@ -215,32 +234,31 @@ doc2ds = Doc2Dataset(index) _ds, _err = doc2ds(ds.doc, 'file:///something') assert _err is None - assert str(_ds.id) == ds.id + assert _ds.id == ds.id assert _ds.metadata_doc == ds.doc # Check dataset search - r = clirunner(['dataset', 'search'], expect_success=True) - assert ds.id in r.output - assert ds_bad1.id not in r.output - assert ds.sources['ab'].id in r.output - assert ds.sources['ac'].sources['cd'].id in r.output + assert str(ds.id) in r.output + assert str(ds_bad1.id) not in r.output + assert str(ds.sources['ab'].id) in r.output + assert str(ds.sources['ac'].sources['cd'].id) in r.output - r = clirunner(['dataset', 'info', '-f', 'csv', ds.id]) - assert ds.id in r.output + r = clirunner(['dataset', 'info', '-f', 'csv', str(ds.id)]) + assert str(ds.id) in r.output - r = clirunner(['dataset', 'info', '-f', 'yaml', '--show-sources', ds.id]) - assert ds.sources['ae'].id in r.output + r = clirunner(['dataset', 'info', '-f', 'yaml', '--show-sources', str(ds.id)]) + assert str(ds.sources['ae'].id) in r.output - r = clirunner(['dataset', 'info', '-f', 'yaml', '--show-derived', ds.sources['ae'].id]) - assert ds.id in r.output + r = clirunner(['dataset', 'info', '-f', 'yaml', '--show-derived', str(ds.sources['ae'].id)]) + assert str(ds.id) in r.output ds_ = SimpleDocNav(gen_dataset_test_dag(1, force_tree=True)) assert ds_.id == ds.id x = index.datasets.get(ds.id, include_sources=True) - assert str(x.sources['ab'].id) == ds.sources['ab'].id - assert str(x.sources['ac'].sources['cd'].id) == ds.sources['ac'].sources['cd'].id + assert x.sources['ab'].id == ds.sources['ab'].id + assert x.sources['ac'].sources['cd'].id == ds.sources['ac'].sources['cd'].id check_skip_lineage_test(clirunner, index) check_no_product_match(clirunner, index) @@ -258,10 +276,6 @@ assert "ERROR Supplied product name" in r.output assert r.exit_code != 0 - # Check that deprecated option is accepted - r = clirunner(['dataset', 'add', '--auto-match', p.datasets]) - assert 'WARNING --auto-match option is deprecated' in r.output - # test dataset add eo3 r = clirunner(['dataset', 'add', p.datasets_eo3]) assert r.exit_code == 0 @@ -278,7 +292,9 @@ assert 'location' not in _ds.metadata_doc -def test_dataset_add_ambgious_products(dataset_add_configs, index_empty, clirunner): +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) +def test_dataset_add_ambiguous_products(dataset_add_configs, index_empty, clirunner): p = dataset_add_configs index = index_empty @@ -338,6 +354,8 @@ assert index.datasets.has(ds.id) is True +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_dataset_add_with_nans(dataset_add_configs, index_empty, clirunner): p = dataset_add_configs index = index_empty @@ -377,6 +395,8 @@ assert c_doc['val_is_neginf'] == '-Infinity' +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_dataset_add_inconsistent_measurements(dataset_add_configs, index_empty, clirunner): p = dataset_add_configs index = index_empty @@ -433,11 +453,11 @@ print(r.output) r = clirunner(['dataset', 'search', '-f', 'csv']) - assert ds1.id not in r.output - assert ds2.id not in r.output - assert ds3.id not in r.output - assert ds4.id in r.output - assert ds5.id in r.output + assert str(ds1.id) not in r.output + assert str(ds2.id) not in r.output + assert str(ds3.id) not in r.output + assert str(ds4.id) in r.output + assert str(ds5.id) in r.output def dataset_archive_prep(dataset_add_configs, index_empty, clirunner): @@ -455,14 +475,16 @@ return p, index, ds +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_dataset_archive_dry_run(dataset_add_configs, index_empty, clirunner): p, index, ds = dataset_archive_prep(dataset_add_configs, index_empty, clirunner) non_existent_uuid = '00000000-1036-5607-a62f-fde5e3fec985' # Single valid UUID is detected and not archived - single_valid_uuid = clirunner(['dataset', 'archive', '--dry-run', ds.id]) - assert ds.id in single_valid_uuid.output + single_valid_uuid = clirunner(['dataset', 'archive', '--dry-run', str(ds.id)]) + assert str(ds.id) in single_valid_uuid.output assert index.datasets.has(ds.id) is True # Single invalid UUID is detected @@ -476,7 +498,7 @@ valid_and_invalid_uuid = clirunner(['dataset', 'archive', '--dry-run', - ds.id, + str(ds.id), non_existent_uuid], expect_success=False) assert non_existent_uuid in valid_and_invalid_uuid.output @@ -487,7 +509,7 @@ 'archive', '--dry-run', '--archive-derived', - ds.id, + str(ds.id), non_existent_uuid ], expect_success=False) @@ -497,9 +519,11 @@ # Multiple Valid UUIDs # Not archived in the database and are shown in output - multiple_valid_uuid = clirunner(['dataset', 'archive', '--dry-run', ds.sources['ae'].id, ds.sources['ab'].id]) - assert ds.sources['ae'].id in multiple_valid_uuid.output - assert ds.sources['ab'].id in multiple_valid_uuid.output + multiple_valid_uuid = clirunner([ + 'dataset', 'archive', '--dry-run', + str(ds.sources['ae'].id), str(ds.sources['ab'].id)]) + assert str(ds.sources['ae'].id) in multiple_valid_uuid.output + assert str(ds.sources['ab'].id) in multiple_valid_uuid.output assert index.datasets.has(ds.sources['ae'].id) is True assert index.datasets.has(ds.sources['ab'].id) is True @@ -507,66 +531,72 @@ 'archive', '--dry-run', '--archive-derived', - ds.sources['ae'].id, - ds.sources['ab'].id + str(ds.sources['ae'].id), + str(ds.sources['ab'].id) ]) - assert ds.id in archive_derived.output - assert ds.sources['ae'].id in archive_derived.output + assert str(ds.id) in archive_derived.output + assert str(ds.sources['ae'].id) in archive_derived.output assert index.datasets.has(ds.id) is True +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_dataset_archive_restore_invalid(dataset_add_configs, index_empty, clirunner): p, index, ds = dataset_archive_prep(dataset_add_configs, index_empty, clirunner) non_existent_uuid = '00000000-1036-5607-a62f-fde5e3fec985' # With non-existent uuid, operations should halt. - r = clirunner(['dataset', 'archive', ds.id, non_existent_uuid], expect_success=False) - r = clirunner(['dataset', 'info', ds.id]) + r = clirunner(['dataset', 'archive', str(ds.id), non_existent_uuid], expect_success=False) + r = clirunner(['dataset', 'info', str(ds.id)]) assert 'status: archived' not in r.output assert index.datasets.has(ds.id) is True # With non-existent uuid, operations should halt. d_id = ds.sources['ac'].sources['cd'].id - r = clirunner(['dataset', 'archive', '--archive-derived', d_id, non_existent_uuid], expect_success=False) - r = clirunner(['dataset', 'info', ds.id, ds.sources['ab'].id, ds.sources['ac'].id]) + r = clirunner(['dataset', 'archive', '--archive-derived', str(d_id), non_existent_uuid], expect_success=False) + r = clirunner(['dataset', 'info', str(ds.id), str(ds.sources['ab'].id), str(ds.sources['ac'].id)]) assert 'status: active' in r.output assert 'status: archived' not in r.output assert index.datasets.has(ds.id) is True +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_dataset_archive_restore(dataset_add_configs, index_empty, clirunner): p, index, ds = dataset_archive_prep(dataset_add_configs, index_empty, clirunner) # Run for real - r = clirunner(['dataset', 'archive', ds.id]) - r = clirunner(['dataset', 'info', ds.id]) + r = clirunner(['dataset', 'archive', str(ds.id)]) + r = clirunner(['dataset', 'info', str(ds.id)]) assert 'status: archived' in r.output # restore dry run - r = clirunner(['dataset', 'restore', '--dry-run', ds.id]) - r = clirunner(['dataset', 'info', ds.id]) + r = clirunner(['dataset', 'restore', '--dry-run', str(ds.id)]) + r = clirunner(['dataset', 'info', str(ds.id)]) assert 'status: archived' in r.output # restore for real - r = clirunner(['dataset', 'restore', ds.id]) - r = clirunner(['dataset', 'info', ds.id]) + r = clirunner(['dataset', 'restore', str(ds.id)]) + r = clirunner(['dataset', 'info', str(ds.id)]) assert 'status: active' in r.output # archive derived d_id = ds.sources['ac'].sources['cd'].id - r = clirunner(['dataset', 'archive', '--archive-derived', d_id]) - r = clirunner(['dataset', 'info', ds.id, ds.sources['ab'].id, ds.sources['ac'].id]) + r = clirunner(['dataset', 'archive', '--archive-derived', str(d_id)]) + r = clirunner(['dataset', 'info', str(ds.id), str(ds.sources['ab'].id), str(ds.sources['ac'].id)]) assert 'status: active' not in r.output assert 'status: archived' in r.output # restore derived - r = clirunner(['dataset', 'restore', '--restore-derived', d_id]) - r = clirunner(['dataset', 'info', ds.id, ds.sources['ab'].id, ds.sources['ac'].id]) + r = clirunner(['dataset', 'restore', '--restore-derived', str(d_id)]) + r = clirunner(['dataset', 'info', str(ds.id), str(ds.sources['ab'].id), str(ds.sources['ac'].id)]) assert 'status: active' in r.output assert 'status: archived' not in r.output +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) def test_dataset_add_http(dataset_add_configs, index: Index, default_metadata_type: MetadataType, httpserver, clirunner): # pytest-localserver also looks good, it's been around for ages, but httpserver is the new cool diff -Nru datacube-1.8.7/integration_tests/test_double_ingestion.py datacube-1.8.9/integration_tests/test_double_ingestion.py --- datacube-1.8.7/integration_tests/test_double_ingestion.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_double_ingestion.py 2022-11-17 00:47:28.000000000 +0000 @@ -15,6 +15,8 @@ from integration_tests.test_end_to_end import INGESTER_CONFIGS +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) @pytest.mark.usefixtures('default_metadata_type', 'indexed_ls5_scene_products') def test_double_ingestion(clirunner, index, tmpdir, ingest_configs, example_ls5_dataset_paths): diff -Nru datacube-1.8.7/integration_tests/test_end_to_end.py datacube-1.8.9/integration_tests/test_end_to_end.py --- datacube-1.8.7/integration_tests/test_end_to_end.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_end_to_end.py 2022-11-17 00:47:28.000000000 +0000 @@ -11,7 +11,7 @@ from datacube.api.query import query_group_by from datacube.api.core import Datacube -from integration_tests.utils import assert_click_command, prepare_test_ingestion_configuration +from integration_tests.utils import prepare_test_ingestion_configuration PROJECT_ROOT = Path(__file__).parents[1] CONFIG_SAMPLES = PROJECT_ROOT / 'docs/config_samples/' diff -Nru datacube-1.8.7/integration_tests/test_full_ingestion.py datacube-1.8.9/integration_tests/test_full_ingestion.py --- datacube-1.8.7/integration_tests/test_full_ingestion.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_full_ingestion.py 2022-11-17 00:47:28.000000000 +0000 @@ -21,6 +21,8 @@ COMPLIANCE_CHECKER_NORMAL_LIMIT = 2 +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) @pytest.mark.timeout(20) @pytest.mark.usefixtures('default_metadata_type', 'indexed_ls5_scene_products') @@ -68,6 +70,8 @@ check_open_with_xarray(ds_path) +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) @pytest.mark.timeout(20) @pytest.mark.usefixtures('default_metadata_type', 'indexed_ls5_scene_products') diff -Nru datacube-1.8.7/integration_tests/test_index_datasets_search.py datacube-1.8.9/integration_tests/test_index_datasets_search.py --- datacube-1.8.7/integration_tests/test_index_datasets_search.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_index_datasets_search.py 2022-11-17 00:47:28.000000000 +0000 @@ -8,6 +8,8 @@ from integration_tests.test_full_ingestion import ensure_datasets_are_indexed +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) @pytest.mark.usefixtures('default_metadata_type', 'indexed_ls5_scene_products') def test_index_datasets_search_light(index, tmpdir, clirunner, @@ -98,6 +100,8 @@ assert len(results_with_uri[0].uri) == 2 +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) @pytest.mark.usefixtures('default_metadata_type', 'indexed_ls5_scene_products') def test_index_get_product_time_bounds(index, clirunner, example_ls5_dataset_paths): diff -Nru datacube-1.8.7/integration_tests/test_index_out_of_bound.py datacube-1.8.9/integration_tests/test_index_out_of_bound.py --- datacube-1.8.7/integration_tests/test_index_out_of_bound.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_index_out_of_bound.py 2022-11-17 00:47:28.000000000 +0000 @@ -15,6 +15,8 @@ import netCDF4 +# Current formulation of this test relies on non-EO3 test data +@pytest.mark.parametrize('datacube_env_name', ('datacube', )) @pytest.mark.timeout(20) @pytest.mark.usefixtures('default_metadata_type', 'indexed_ls5_scene_products') diff -Nru datacube-1.8.7/integration_tests/test_model.py datacube-1.8.9/integration_tests/test_model.py --- datacube-1.8.7/integration_tests/test_model.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/integration_tests/test_model.py 2022-11-17 00:47:28.000000000 +0000 @@ -2,7 +2,6 @@ # # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 -import pytest from datacube.model import Dataset, DatasetType from typing import List @@ -38,49 +37,6 @@ assert str(d.crs) == 'EPSG:3577' assert d.extent is not None - def mk_ds(zone, datum="GDA94"): - return Dataset(product, { - "grid_spatial": { - "projection": { - "zone": zone, - "datum": datum, - "ellipsoid": "GRS80", - "orientation": "NORTH_UP", - "geo_ref_points": { - "ll": {"x": 537437.5, "y": 5900512.5}, - "lr": {"x": 781687.5, "y": 5900512.5}, - "ul": {"x": 537437.5, "y": 6117112.5}, - "ur": {"x": 781687.5, "y": 6117112.5} - }, - "map_projection": "UTM", - "resampling_option": "CUBIC_CONVOLUTION" - } - } - }) - - # Valid datum/zone as seen on our LS5 scene, should infer crs. - ds = mk_ds(-51, "GDA94") - with pytest.warns(DeprecationWarning): - assert str(ds.crs) == 'EPSG:28351' - assert ds.extent is not None - - ds = mk_ds("51S", "WGS84") - with pytest.warns(DeprecationWarning): - assert str(ds.crs) == 'EPSG:32751' - assert ds.extent is not None - - ds = mk_ds("51N", "WGS84") - with pytest.warns(DeprecationWarning): - assert str(ds.crs) == 'EPSG:32651' - assert ds.extent is not None - - # Invalid datum/zone, can't infer - ds = mk_ds(-60, "GDA94") - # Prints warning: Can't figure out projection: possibly invalid zone (-60) for datum ('GDA94')." - # We still return None, rather than error, as they didn't specify a CRS explicitly - with pytest.warns(DeprecationWarning): - assert ds.crs is None - # No projection specified in the dataset ds = Dataset(product, {}) assert ds.crs is None diff -Nru datacube-1.8.7/PKG-INFO datacube-1.8.9/PKG-INFO --- datacube-1.8.7/PKG-INFO 2022-06-07 00:40:30.637904600 +0000 +++ datacube-1.8.9/PKG-INFO 2022-11-17 00:48:13.818910100 +0000 @@ -1,148 +1,12 @@ Metadata-Version: 2.1 Name: datacube -Version: 1.8.7 +Version: 1.8.9 Summary: An analysis environment for satellite and other earth observation data Home-page: https://github.com/opendatacube/datacube-core Author: Open Data Cube Maintainer: Open Data Cube Maintainer-email: License: Apache License 2.0 -Description: Open Data Cube Core - =================== - - |Build Status| |Coverage Status| |Documentation Status| - - Overview - ======== - - The Open Data Cube Core provides an integrated gridded data - analysis environment for decades of analysis ready earth observation - satellite and related data from multiple satellite and other acquisition - systems. - - Documentation - ============= - - See the `user guide `__ for - installation and usage of the datacube, and for documentation of the API. - - `Join our Slack `__ if you need help - setting up or using the Open Data Cube. - - Please help us to keep the Open Data Cube community open and inclusive by - reading and following our `Code of Conduct `__. - - Requirements - ============ - - System - ~~~~~~ - - - PostgreSQL 10+ - - Python 3.8+ - - Developer setup - =============== - - 1. Clone: - - - ``git clone https://github.com/opendatacube/datacube-core.git`` - - 2. Create a Python environment for using the ODC. We recommend `conda `__ as the - easiest way to handle Python dependencies. - - :: - - conda create -n odc -c conda-forge python=3.8 datacube pre_commit - conda activate odc - - 3. Install a develop version of datacube-core. - - :: - - cd datacube-core - pip install --upgrade -e . - - 4. Install the `pre-commit `__ hooks to help follow ODC coding - conventions when committing with git. - - :: - - pre-commit install - - 5. Run unit tests + PyLint - ``./check-code.sh`` - - (this script approximates what is run by Travis. You can - alternatively run ``pytest`` yourself). Some test dependencies may need to be installed, attempt to install these using: - - ``pip install --upgrade -e '.[test]'`` - - If install for these fails please lodge them as issues. - - 6. **(or)** Run all tests, including integration tests. - - ``./check-code.sh integration_tests`` - - - Assumes a password-less Postgres database running on localhost called - - ``agdcintegration`` - - - Otherwise copy ``integration_tests/agdcintegration.conf`` to - ``~/.datacube_integration.conf`` and edit to customise. - - - Alternatively one can use the ``opendatacube/datacube-tests`` docker image to run - tests. This docker includes database server pre-configured for running - integration tests. Add ``--with-docker`` command line option as a first argument - to ``./check-code.sh`` script. - - :: - - ./check-code.sh --with-docker integration_tests - - - Developer setup on Ubuntu - ~~~~~~~~~~~~~~~~~~~~~~~~~ - - Building a Python virtual environment on Ubuntu suitable for development work. - - Install dependencies: - - :: - - sudo apt-get update - sudo apt-get install -y \ - autoconf automake build-essential make cmake \ - graphviz \ - python3-venv \ - python3-dev \ - libpq-dev \ - libyaml-dev \ - libnetcdf-dev \ - libudunits2-dev - - - Build the python virtual environment: - - :: - - pyenv="${HOME}/.envs/odc" # Change to suit your needs - mkdir -p "${pyenv}" - python3 -m venv "${pyenv}" - source "${pyenv}/bin/activate" - pip install -U pip wheel cython numpy - pip install -e '.[dev]' - pip install flake8 mypy pylint autoflake black - - - .. |Build Status| image:: https://github.com/opendatacube/datacube-core/workflows/build/badge.svg - :target: https://github.com/opendatacube/datacube-core/actions - .. |Coverage Status| image:: https://codecov.io/gh/opendatacube/datacube-core/branch/develop/graph/badge.svg - :target: https://codecov.io/gh/opendatacube/datacube-core - .. |Documentation Status| image:: https://readthedocs.org/projects/datacube-core/badge/?version=latest - :target: http://datacube-core.readthedocs.org/en/latest/ - Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Intended Audience :: Developers @@ -158,12 +22,12 @@ Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 Classifier: Topic :: Scientific/Engineering :: GIS Classifier: Topic :: Scientific/Engineering :: Information Analysis Requires-Python: >=3.8.0 Description-Content-Type: text/x-rst Provides-Extra: all -Provides-Extra: celery Provides-Extra: cf Provides-Extra: dev Provides-Extra: distributed @@ -171,3 +35,151 @@ Provides-Extra: performance Provides-Extra: s3 Provides-Extra: test +License-File: LICENSE + +Open Data Cube Core +=================== + +.. image:: https://github.com/opendatacube/datacube-core/workflows/build/badge.svg + :alt: Build Status + :target: https://github.com/opendatacube/datacube-core/actions + +.. image:: https://codecov.io/gh/opendatacube/datacube-core/branch/develop/graph/badge.svg + :alt: Coverage Status + :target: https://codecov.io/gh/opendatacube/datacube-core + +.. image:: https://readthedocs.org/projects/datacube-core/badge/?version=latest + :alt: Documentation Status + :target: http://datacube-core.readthedocs.org/en/latest/ + +Overview +======== + +The Open Data Cube Core provides an integrated gridded data +analysis environment for decades of analysis ready earth observation +satellite and related data from multiple satellite and other acquisition +systems. + +Documentation +============= + +See the `user guide `__ for +installation and usage of the datacube, and for documentation of the API. + +`Join our Slack `__ if you need help +setting up or using the Open Data Cube. + +Please help us to keep the Open Data Cube community open and inclusive by +reading and following our `Code of Conduct `__. + +Requirements +============ + +System +~~~~~~ + +- PostgreSQL 10+ +- Python 3.8+ + +Developer setup +=============== + +1. Clone: + + - ``git clone https://github.com/opendatacube/datacube-core.git`` + +2. Create a Python environment for using the ODC. We recommend `conda `__ as the + easiest way to handle Python dependencies. + +:: + + conda create -n odc -c conda-forge python=3.8 datacube pre_commit + conda activate odc + +3. Install a develop version of datacube-core. + +:: + + cd datacube-core + pip install --upgrade -e . + +4. Install the `pre-commit `__ hooks to help follow ODC coding + conventions when committing with git. + +:: + + pre-commit install + +5. Run unit tests + PyLint + ``./check-code.sh`` + + (this script approximates what is run by Travis. You can + alternatively run ``pytest`` yourself). Some test dependencies may need to be installed, attempt to install these using: + + ``pip install --upgrade -e '.[test]'`` + + If install for these fails please lodge them as issues. + +6. **(or)** Run all tests, including integration tests. + + ``./check-code.sh integration_tests`` + + - Assumes a password-less Postgres database running on localhost called + + ``agdcintegration`` + + - Otherwise copy ``integration_tests/agdcintegration.conf`` to + ``~/.datacube_integration.conf`` and edit to customise. + + +Alternatively one can use the ``opendatacube/datacube-tests`` docker image to run +tests. This docker includes database server pre-configured for running +integration tests. Add ``--with-docker`` command line option as a first argument +to ``./check-code.sh`` script. + +:: + + ./check-code.sh --with-docker integration_tests + + +To run individual test in docker container + +:: + + docker run -ti -v /home/ubuntu/datacube-core:/code opendatacube/datacube-tests:latest pytest integration_tests/test_filename.py::test_function_name + + +Developer setup on Ubuntu +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Building a Python virtual environment on Ubuntu suitable for development work. + +Install dependencies: + +:: + + sudo apt-get update + sudo apt-get install -y \ + autoconf automake build-essential make cmake \ + graphviz \ + python3-venv \ + python3-dev \ + libpq-dev \ + libyaml-dev \ + libnetcdf-dev \ + libudunits2-dev + + +Build the python virtual environment: + +:: + + pyenv="${HOME}/.envs/odc" # Change to suit your needs + mkdir -p "${pyenv}" + python3 -m venv "${pyenv}" + source "${pyenv}/bin/activate" + pip install -U pip wheel cython numpy + pip install -e '.[dev]' + pip install flake8 mypy pylint autoflake black + + diff -Nru datacube-1.8.7/.pre-commit-config.yaml datacube-1.8.9/.pre-commit-config.yaml --- datacube-1.8.7/.pre-commit-config.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/.pre-commit-config.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -1,6 +1,6 @@ repos: - repo: https://github.com/adrienverge/yamllint.git - rev: v1.19.0 + rev: v1.28.0 hooks: - id: yamllint - repo: https://github.com/pre-commit/pre-commit-hooks @@ -15,10 +15,11 @@ - id: debug-statements - id: name-tests-test args: ['--django'] + exclude: ^tests/drivers/fail_drivers - id: requirements-txt-fixer - id: check-added-large-files - id: check-merge-conflict - repo: https://github.com/pre-commit/mirrors-pylint - rev: v2.4.4 # Use the sha / tag you want to point at + rev: v3.0.0a5 # Use the sha / tag you want to point at hooks: - id: pylint diff -Nru datacube-1.8.7/pytest.ini datacube-1.8.9/pytest.ini --- datacube-1.8.7/pytest.ini 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/pytest.ini 2022-11-17 00:47:28.000000000 +0000 @@ -3,4 +3,3 @@ testpaths = datacube tests integration_tests norecursedirs = .* build dist .git tmp* filterwarnings = ignore::FutureWarning - diff -Nru datacube-1.8.7/README.rst datacube-1.8.9/README.rst --- datacube-1.8.7/README.rst 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/README.rst 2022-11-17 00:47:28.000000000 +0000 @@ -1,7 +1,17 @@ Open Data Cube Core =================== -|Build Status| |Coverage Status| |Documentation Status| +.. image:: https://github.com/opendatacube/datacube-core/workflows/build/badge.svg + :alt: Build Status + :target: https://github.com/opendatacube/datacube-core/actions + +.. image:: https://codecov.io/gh/opendatacube/datacube-core/branch/develop/graph/badge.svg + :alt: Coverage Status + :target: https://codecov.io/gh/opendatacube/datacube-core + +.. image:: https://readthedocs.org/projects/datacube-core/badge/?version=latest + :alt: Documentation Status + :target: http://datacube-core.readthedocs.org/en/latest/ Overview ======== @@ -66,9 +76,9 @@ (this script approximates what is run by Travis. You can alternatively run ``pytest`` yourself). Some test dependencies may need to be installed, attempt to install these using: - + ``pip install --upgrade -e '.[test]'`` - + If install for these fails please lodge them as issues. 6. **(or)** Run all tests, including integration tests. @@ -93,6 +103,13 @@ ./check-code.sh --with-docker integration_tests +To run individual test in docker container + +:: + + docker run -ti -v /home/ubuntu/datacube-core:/code opendatacube/datacube-tests:latest pytest integration_tests/test_filename.py::test_function_name + + Developer setup on Ubuntu ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -102,34 +119,26 @@ :: - sudo apt-get update - sudo apt-get install -y \ - autoconf automake build-essential make cmake \ - graphviz \ - python3-venv \ - python3-dev \ - libpq-dev \ - libyaml-dev \ - libnetcdf-dev \ - libudunits2-dev + sudo apt-get update + sudo apt-get install -y \ + autoconf automake build-essential make cmake \ + graphviz \ + python3-venv \ + python3-dev \ + libpq-dev \ + libyaml-dev \ + libnetcdf-dev \ + libudunits2-dev Build the python virtual environment: :: - pyenv="${HOME}/.envs/odc" # Change to suit your needs - mkdir -p "${pyenv}" - python3 -m venv "${pyenv}" - source "${pyenv}/bin/activate" - pip install -U pip wheel cython numpy - pip install -e '.[dev]' - pip install flake8 mypy pylint autoflake black - - -.. |Build Status| image:: https://github.com/opendatacube/datacube-core/workflows/build/badge.svg - :target: https://github.com/opendatacube/datacube-core/actions -.. |Coverage Status| image:: https://codecov.io/gh/opendatacube/datacube-core/branch/develop/graph/badge.svg - :target: https://codecov.io/gh/opendatacube/datacube-core -.. |Documentation Status| image:: https://readthedocs.org/projects/datacube-core/badge/?version=latest - :target: http://datacube-core.readthedocs.org/en/latest/ + pyenv="${HOME}/.envs/odc" # Change to suit your needs + mkdir -p "${pyenv}" + python3 -m venv "${pyenv}" + source "${pyenv}/bin/activate" + pip install -U pip wheel cython numpy + pip install -e '.[dev]' + pip install flake8 mypy pylint autoflake black diff -Nru datacube-1.8.7/setup.cfg datacube-1.8.9/setup.cfg --- datacube-1.8.7/setup.cfg 2022-06-07 00:40:30.641904800 +0000 +++ datacube-1.8.9/setup.cfg 2022-11-17 00:48:13.818910100 +0000 @@ -11,6 +11,9 @@ E226 W503 W504 + E124 + F841 + W605 ignore-names = W,H,A,S,R,T,WS,X,Y,Z,XX,YY,XY,B,M,N,L,NX,NY [egg_info] diff -Nru datacube-1.8.7/setup.py datacube-1.8.9/setup.py --- datacube-1.8.7/setup.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/setup.py 2022-11-17 00:47:28.000000000 +0000 @@ -26,7 +26,6 @@ 'performance': ['ciso8601', 'bottleneck'], 'distributed': ['distributed', 'dask[distributed]'], 'doc': doc_require, - 'celery': ['celery>=4,<5', 'redis', 'kombu'], 's3': ['boto3', 'botocore'], 'test': tests_require, 'cf': ['compliance-checker>=4.0.0'], @@ -72,6 +71,7 @@ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Topic :: Scientific/Engineering :: GIS", "Topic :: Scientific/Engineering :: Information Analysis", ], @@ -82,6 +82,7 @@ ), package_data={ '': ['*.yaml', '*/*.yaml'], + 'datacube': ['py.typed'], }, scripts=[], install_requires=[ @@ -97,14 +98,15 @@ 'netcdf4', 'numpy', 'psycopg2', - 'lark-parser>=0.6.7', + 'lark', 'pandas', 'python-dateutil', 'pyyaml', - 'rasterio>=1.0.2', # Multi-band re-project fixed in that version + 'rasterio>=1.3.2', # Warping broken in 1.3.0 and 1.3.1 'sqlalchemy', + 'GeoAlchemy2', 'toolz', - 'xarray>=0.9', # >0.9 fixes most problems with `crs` attributes being lost + 'xarray>=0.9,!=2022.6.0', # >0.9 fixes most problems with `crs` attributes being lost ], extras_require=extras_require, tests_require=tests_require, diff -Nru datacube-1.8.7/spellcheck.yaml datacube-1.8.9/spellcheck.yaml --- datacube-1.8.7/spellcheck.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/spellcheck.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,21 @@ +matrix: +- name: Markdown + sources: + - 'docs/**/*.rst' + - 'docs/**/**/*.rst' + - 'docs/*.rst' + - '*.rst' + default_encoding: utf-8 + aspell: + lang: en + dictionary: + wordlists: + - wordlist.txt + encoding: utf-8 + pipeline: + - pyspelling.filters.text: + - pyspelling.filters.html: + comments: false + ignores: + - code + - pre diff -Nru datacube-1.8.7/tests/conftest.py datacube-1.8.9/tests/conftest.py --- datacube-1.8.7/tests/conftest.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/conftest.py 2022-11-17 00:47:28.000000000 +0000 @@ -20,7 +20,6 @@ from datacube.model import Measurement, MetadataType, DatasetType, Dataset from datacube.index.eo3 import prep_eo3 - AWS_ENV_VARS = ( "AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN" "AWS_DEFAULT_REGION AWS_DEFAULT_OUTPUT AWS_PROFILE " @@ -58,6 +57,39 @@ @pytest.fixture +def non_geo_dataset_file(data_folder): + return os.path.join(data_folder, "ds_non-geo.yaml") + + +@pytest.fixture +def non_geo_dataset_doc(non_geo_dataset_file): + (_, doc), *_ = read_documents(non_geo_dataset_file) + return doc + + +@pytest.fixture +def eo_dataset_file(data_folder): + return os.path.join(data_folder, "ds_eo.yaml") + + +@pytest.fixture +def eo_dataset_doc(eo_dataset_file): + (_, doc), *_ = read_documents(eo_dataset_file) + return doc + + +@pytest.fixture +def eo3_dataset_file(data_folder): + return os.path.join(data_folder, "ds_eo3.yaml") + + +@pytest.fixture +def eo3_dataset_doc(eo3_dataset_file): + (_, doc), *_ = read_documents(eo3_dataset_file) + return doc + + +@pytest.fixture def eo3_metadata_file(data_folder): return os.path.join(data_folder, "eo3.yaml") diff -Nru datacube-1.8.7/tests/data/ds_eo3.yml datacube-1.8.9/tests/data/ds_eo3.yml --- datacube-1.8.7/tests/data/ds_eo3.yml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/tests/data/ds_eo3.yml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,21 @@ +$schema: https://schemas.opendatacube.org/dataset +id: 7d41a4d0-2ab3-4da1-a010-ef48662ae8ef +product: + name: eo3_test + +location: "http://example.com/a.yml" + +crs: "epsg:3857" +properties: + datetime: 2020-04-20 00:26:43Z + odc:processing_datetime: 2020-05-16 10:56:18Z + +grids: + default: + shape: [100, 200] + transform: [10, 0, 100000, 0, -10, 200000, 0, 0, 1] +lineage: + a: [f80c30a5-1036-5607-a62f-fde5e3fec985] + bc: + - fb077e47-f62e-5869-9bd1-03584c2d7380 + - 13d3d75a-1d90-5ec0-8b86-e8be78275660 diff -Nru datacube-1.8.7/tests/data/ds_eo.yaml datacube-1.8.9/tests/data/ds_eo.yaml --- datacube-1.8.7/tests/data/ds_eo.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/tests/data/ds_eo.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,238 @@ +id: bbf3e21c-82b0-11e5-9ba1-a0000100fe80 +ga_label: LS5_TM_NBAR_P54_GANBAR01-002_090_084_19900302 +ga_level: P54 +product_type: nbar +creation_dt: 2015-03-22 01:49:21 +checksum_path: package.sha1 +platform: + code: LANDSAT_5 +instrument: + name: TM +format: + name: GeoTiff +acquisition: + aos: 1990-03-02 23:06:44 + los: 1990-03-02 23:14:02 + groundstation: + code: ASA + label: Alice Springs + eods_domain_code: '002' +extent: + coord: + ul: + lat: -33.64690729375812 + lon: 148.48815577279413 + ur: + lat: -33.588046887860685 + lon: 151.0941224695904 + ll: + lat: -35.61237326356207 + lon: 148.52368201445722 + lr: + lat: -35.549061820028044 + lon: 151.19156117169499 + from_dt: 1990-03-02 23:11:04 + center_dt: 1990-03-02 23:11:16 + to_dt: 1990-03-02 23:11:28 +grid_spatial: + projection: + geo_ref_points: + ul: + x: 638000.0 + y: 6276000.0 + ur: + x: 880025.0 + y: 6276000.0 + ll: + x: 638000.0 + y: 6057975.0 + lr: + x: 880025.0 + y: 6057975.0 + zone: -55 # keeping zone for test_index_datasets_search_light + spatial_reference: "EPSG:28355" +image: + satellite_ref_point_start: + x: 90 + y: 84 + satellite_ref_point_end: + x: 90 + y: 84 + bands: + '1': + path: product/scene01/LS5_TM_NBAR_P54_GANBAR01-002_090_084_19900302_B10.tif + number: '10' + shape: + x: 9681 + y: 8721 + cell_size: + x: 25.0 + y: 25.0 + '2': + path: product/scene01/LS5_TM_NBAR_P54_GANBAR01-002_090_084_19900302_B20.tif + number: '20' + shape: + x: 9681 + y: 8721 + cell_size: + x: 25.0 + y: 25.0 + '3': + path: product/scene01/LS5_TM_NBAR_P54_GANBAR01-002_090_084_19900302_B30.tif + number: '30' + shape: + x: 9681 + y: 8721 + cell_size: + x: 25.0 + y: 25.0 +lineage: + machine: {} + source_datasets: + level1: + id: ee983642-1cd3-11e6-aaba-a0000100fe80 + ga_label: LS5_TM_OTH_P51_GALPGS01-002_090_084_20110107 + product_type: level1 + product_level: L1T + creation_dt: 2016-05-18 08:24:02 + size_bytes: 266897444 + checksum_path: package.sha1 + platform: + code: LANDSAT_5 + instrument: + name: TM + operation_mode: BUMPER + format: + name: GeoTIFF + usgs: + scene_id: LT50900842011007ASA00 + acquisition: + groundstation: + code: ASA + label: Alice Springs + eods_domain_code: '002' + extent: + coord: + ul: + lat: -33.658988 + lon: 148.467739 + ur: + lat: -33.599109 + lon: 151.117123 + ll: + lat: -35.622441 + lon: 148.502756 + lr: + lat: -35.558039 + lon: 151.215043 + from_dt: 2011-01-07 23:40:00 + center_dt: 2011-01-07 23:40:13 + to_dt: 2011-01-07 23:40:26 + grid_spatial: + projection: + geo_ref_points: + ul: + x: 636087.5 + y: 6274687.5 + ur: + x: 882112.5 + y: 6274687.5 + ll: + x: 636087.5 + y: 6056887.5 + lr: + x: 882112.5 + y: 6056887.5 + spatial_reference: "EPSG:28355" + orientation: NORTH_UP + resampling_option: CUBIC_CONVOLUTION + zone: -55 # keeping zone for test_index_datasets_search_light + image: + satellite_ref_point_start: + x: 90 + y: 84 + bands: + '1': + path: product/LT50900842011007ASA00_B1.TIF + type: reflective + label: Visible Blue + number: '1' + cell_size: 25.0 + '2': + path: product/LT50900842011007ASA00_B2.TIF + type: reflective + label: Visible Green + number: '2' + cell_size: 25.0 + '3': + path: product/LT50900842011007ASA00_B3.TIF + type: reflective + label: Visible Red + number: '3' + cell_size: 25.0 + '4': + path: product/LT50900842011007ASA00_B4.TIF + type: reflective + label: Near Infrared + number: '4' + cell_size: 25.0 + '5': + path: product/LT50900842011007ASA00_B5.TIF + type: reflective + label: Middle Infrared 1 + number: '5' + cell_size: 25.0 + '6': + path: product/LT50900842011007ASA00_B6.TIF + type: thermal + label: Thermal Infrared + number: '6' + cell_size: 100.0 + '7': + path: product/LT50900842011007ASA00_B7.TIF + type: reflective + label: Middle Infrared 2 + number: '7' + cell_size: 25.0 + lineage: + algorithm: + name: LPGS + version: 12.7.0 + parameters: {} + machine: {} + source_datasets: + satellite_telemetry_data: + id: 100a8412-6017-11e5-b4fe-ac162d791418 + ga_label: LS5_TM_STD-RCC_P00_L5TB2011007233607ASA114_0_0_20110107T233607Z20110107T234256 + ga_level: P00 + product_type: satellite_telemetry_data + creation_dt: 2015-09-21 04:13:07 + size_bytes: 4334527032 + checksum_path: package.sha1 + platform: + code: LANDSAT_5 + instrument: + name: TM + operation_mode: BUMPER + format: + name: RCC + usgs: + interval_id: L5TB2011007233607ASA114 + acquisition: + aos: 2011-01-07 23:36:07 + los: 2011-01-07 23:42:56 + groundstation: + code: ASA + label: Alice Springs + eods_domain_code: '002' + platform_orbit: 142833 + image: + bands: {} + lineage: + machine: {} + source_datasets: {} +ancillary_files: +- type: xml + path: product/metadata.xml +- type: txt + path: product/scene01/report.txt diff -Nru datacube-1.8.7/tests/data/ds_non-geo.yaml datacube-1.8.9/tests/data/ds_non-geo.yaml --- datacube-1.8.7/tests/data/ds_non-geo.yaml 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/tests/data/ds_non-geo.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,19 @@ +creation_dt: '2017-11-05T00:00:00' +id: f80c30a5-1036-5607-a62f-fde5e3fec985 +label: A0001 +ga_label: LS5_TM_NBAR_P54_GANBAR01-002_090_084_19900302 +ga_level: P54 +checksum_path: package.sha1 +platform: + code: LANDSAT_5 +instrument: + name: TM +acquisition: + aos: 1990-03-02 23:06:44 + los: 1990-03-02 23:14:02 + groundstation: + code: ASA + label: Alice Springs + eods_domain_code: '002' +n: 1 +product_type: A diff -Nru datacube-1.8.7/tests/data/ingest_config.yaml datacube-1.8.9/tests/data/ingest_config.yaml --- datacube-1.8.7/tests/data/ingest_config.yaml 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/data/ingest_config.yaml 2022-11-17 00:47:28.000000000 +0000 @@ -21,4 +21,4 @@ '8': dtype: int16 nodata: -999 - interpolation: linear \ No newline at end of file + interpolation: linear diff -Nru datacube-1.8.7/tests/data/ls8-eods-nbar/data/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012/scene01/report.txt datacube-1.8.9/tests/data/ls8-eods-nbar/data/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012/scene01/report.txt --- datacube-1.8.7/tests/data/ls8-eods-nbar/data/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012/scene01/report.txt 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/data/ls8-eods-nbar/data/LS8_OLI_TIRS_NBAR_P54_GANBAR01-015_101_078_20141012/scene01/report.txt 2022-11-17 00:47:28.000000000 +0000 @@ -50,4 +50,3 @@ Long: 133.9623300 Long: 136.2696200 North: 7016987.500 North: 7016987.500 East: 397012.500 ------------ East: 626012.500 - diff -Nru datacube-1.8.7/tests/drivers/test_rio_reader.py datacube-1.8.9/tests/drivers/test_rio_reader.py --- datacube-1.8.7/tests/drivers/test_rio_reader.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/drivers/test_rio_reader.py 2022-11-17 00:47:28.000000000 +0000 @@ -53,7 +53,7 @@ def test_rd_internals_crs(): - from rasterio.crs import CRS as RioCRS + from rasterio.crs import CRS as RioCRS # noqa: N811 assert _dc_crs(None) is None assert _dc_crs(RioCRS()) is None diff -Nru datacube-1.8.7/tests/index/test_api_index_dataset.py datacube-1.8.9/tests/index/test_api_index_dataset.py --- datacube-1.8.7/tests/index/test_api_index_dataset.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/index/test_api_index_dataset.py 2022-11-17 00:47:28.000000000 +0000 @@ -155,23 +155,23 @@ 'added', 'added_by', 'archived']) -class MockIndex(object): - def __init__(self, db): - self._db = db - - class MockDb(object): def __init__(self): self.dataset = {} self.dataset_source = set() @contextmanager - def begin(self): + def _connect(self): yield self - @contextmanager - def connect(self): - yield self + def begin(self): + pass + + def commit(self): + pass + + def rollback(self): + pass def get_dataset(self, id): return self.dataset.get(id, None) @@ -198,7 +198,7 @@ self.dataset_source.add((classifier, dataset_id, source_dataset_id)) -class MockTypesResource(object): +class MockTypesResource: def __init__(self, type_): self.type = type_ @@ -208,11 +208,28 @@ def get_by_name(self, *args, **kwargs): return self.type + @contextmanager + def _db_connection(self, transaction=False): + yield MockDb() + + +class MockIndex: + def __init__(self, db, product): + self._db = db + self.products = MockTypesResource(product) + + def thread_transaction(self): + return None + + @contextmanager + def _active_connection(self, transaction=False): + yield self._db + def test_index_dataset(): mock_db = MockDb() - mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) - datasets = DatasetResource(mock_db, mock_types) + mock_index = MockIndex(mock_db, _EXAMPLE_DATASET_TYPE) + datasets = DatasetResource(mock_db, mock_index) dataset = datasets.add(_EXAMPLE_NBAR_DATASET) ids = {d.id for d in mock_db.dataset.values()} @@ -237,8 +254,8 @@ def test_index_already_ingested_source_dataset(): mock_db = MockDb() - mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) - datasets = DatasetResource(mock_db, mock_types) + mock_index = MockIndex(mock_db, _EXAMPLE_DATASET_TYPE) + datasets = DatasetResource(mock_db, mock_index) dataset = datasets.add(_EXAMPLE_NBAR_DATASET.sources['ortho']) assert len(mock_db.dataset) == 2 @@ -251,8 +268,8 @@ def test_index_two_levels_already_ingested(): mock_db = MockDb() - mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) - datasets = DatasetResource(mock_db, mock_types) + mock_index = MockIndex(mock_db, _EXAMPLE_DATASET_TYPE) + datasets = DatasetResource(mock_db, mock_index) dataset = datasets.add(_EXAMPLE_NBAR_DATASET.sources['ortho'].sources['satellite_telemetry_data']) assert len(mock_db.dataset) == 1 diff -Nru datacube-1.8.7/tests/index/test_hl_index.py datacube-1.8.9/tests/index/test_hl_index.py --- datacube-1.8.7/tests/index/test_hl_index.py 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/tests/index/test_hl_index.py 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,26 @@ +import pytest + +from unittest.mock import MagicMock + +from datacube.index.hl import Doc2Dataset + + +def test_support_validation(non_geo_dataset_doc, eo_dataset_doc): + idx = MagicMock() + + idx.supports_legacy = False + idx.supports_nongeo = False + with pytest.raises(ValueError, match="EO3 cannot be set to False"): + resolver = Doc2Dataset(idx, eo3=False) + + idx.supports_legacy = True + idx.supports_nongeo = False + resolver = Doc2Dataset(idx, products=["product_a"], eo3=False) + _, err = resolver(non_geo_dataset_doc, "//location/") + assert "Non-geospatial metadata formats" in err + + idx.supports_legacy = False + idx.supports_nongeo = True + resolver = Doc2Dataset(idx, products=["product_a"], eo3=False) + _, err = resolver(eo_dataset_doc, "//location/") + assert "Legacy metadata formats" in err diff -Nru datacube-1.8.7/tests/storage/test_base.py datacube-1.8.9/tests/storage/test_base.py --- datacube-1.8.7/tests/storage/test_base.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/storage/test_base.py 2022-11-17 00:47:28.000000000 +0000 @@ -77,4 +77,24 @@ ds = mk_sample_dataset(bands, uri='/not/a/uri') band = BandInfo(ds, 'a') - assert(band.uri_scheme is '') + assert band.uri_scheme is '' # noqa: F632 + + +def test_band_info_with_url_mangling(): + def url_mangler(raw): + return raw.replace("tmp", "tmp/mangled") + + bands = [dict(name=n, + dtype='uint8', + units='K', + nodata=33, + path=n+'.tiff') + for n in 'a b c'.split(' ')] + + ds = mk_sample_dataset(bands, + uri='file:///tmp/datataset.yml', + format='GeoTIFF') + + binfo = BandInfo(ds, 'b', patch_url=url_mangler) + assert binfo.name == 'b' + assert binfo.uri == 'file:///tmp/mangled/b.tiff' diff -Nru datacube-1.8.7/tests/storage/test_netcdfwriter.py datacube-1.8.9/tests/storage/test_netcdfwriter.py --- datacube-1.8.7/tests/storage/test_netcdfwriter.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/storage/test_netcdfwriter.py 2022-11-17 00:47:28.000000000 +0000 @@ -238,35 +238,39 @@ EXAMPLE_FLAGS_DEF = { - 'band_1_saturated': { - 'bits': 0, - 'values': { - 0: True, - 1: False - }, - 'description': 'Band 1 is saturated'}, - 'band_2_saturated': { - 'bits': 1, - 'values': { - 0: True, - 1: False - }, - 'description': 'Band 2 is saturated'}, - 'band_3_saturated': { - 'bits': 2, - 'values': { - 0: True, - 1: False - }, - 'description': 'Band 3 is saturated'}, - 'land_sea': { - 'bits': 9, - 'values': { - 0: 'sea', - 1: 'land' - }, - 'description': 'Land/Sea observation'}, - } + 'band_1_saturated': { + 'bits': 0, + 'values': { + 0: True, + 1: False + }, + 'description': 'Band 1 is saturated' + }, + 'band_2_saturated': { + 'bits': 1, + 'values': { + 0: True, + 1: False + }, + 'description': 'Band 2 is saturated' + }, + 'band_3_saturated': { + 'bits': 2, + 'values': { + 0: True, + 1: False + }, + 'description': 'Band 3 is saturated' + }, + 'land_sea': { + 'bits': 9, + 'values': { + 0: 'sea', + 1: 'land' + }, + 'description': 'Land/Sea observation' + }, +} def test_measurements_model_netcdfflags(): diff -Nru datacube-1.8.7/tests/storage/test_storage.py datacube-1.8.9/tests/storage/test_storage.py --- datacube-1.8.7/tests/storage/test_storage.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/storage/test_storage.py 2022-11-17 00:47:28.000000000 +0000 @@ -494,22 +494,6 @@ assert np.all(dest == -999) - def test_read_from_file_with_missing_crs(self, no_crs_gdal_path): - """ - We need to be able to read from data files even when GDAL can't automatically gather all the metdata. - - The :class:`RasterFileDataSource` is able to override the nodata, CRS and transform attributes if necessary. - """ - crs = epsg4326 - nodata = -999 - transform = Affine(0.01, 0.0, 111.975, - 0.0, 0.01, -9.975) - data_source = RasterFileDataSource(no_crs_gdal_path, bandnumber=1, nodata=nodata, crs=crs, transform=transform) - with pytest.warns(DeprecationWarning): - with data_source.open() as src: - dest1 = src.read() - assert dest1.shape == (10, 10) - @pytest.fixture def make_sample_netcdf(tmpdir): diff -Nru datacube-1.8.7/tests/test_dynamic_db_passwd.py datacube-1.8.9/tests/test_dynamic_db_passwd.py --- datacube-1.8.7/tests/test_dynamic_db_passwd.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_dynamic_db_passwd.py 2022-11-17 00:47:28.000000000 +0000 @@ -20,11 +20,9 @@ def test_dynamic_password(): - url = URL.create( - 'postgresql', - host="fake_host", database="fake_database", port=6543, - username="fake_username", password="fake_password" - ) + url = URL.create('postgresql', + host="fake_host", database="fake_database", port=6543, + username="fake_username", password="fake_password") engine = PostgresDb._create_engine(url) counter[0] = 0 last_base[0] = None diff -Nru datacube-1.8.7/tests/test_eo3.py datacube-1.8.9/tests/test_eo3.py --- datacube-1.8.7/tests/test_eo3.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_eo3.py 2022-11-17 00:47:28.000000000 +0000 @@ -9,11 +9,10 @@ from datacube.model import Dataset from datacube.index.eo3 import ( + EO3Grid, prep_eo3, - eo3_lonlat_bbox, add_eo3_parts, - is_doc_eo3, - grid2points, + is_doc_eo3, eo3_grid_spatial, is_doc_geo, ) SAMPLE_DOC = '''--- @@ -67,32 +66,105 @@ return mk_sample_product("eo3_product", metadata_type=eo3_metadata) -def test_grid2points(): +def test_grid_points(): identity = list(Affine.translation(0, 0)) - grid = dict(shape=(11, 22), - transform=identity) + grid = EO3Grid({ + "shape": (11, 22), + "transform": identity + }) - pts = grid2points(grid) + pts = grid.points() assert len(pts) == 4 assert pts == [(0, 0), (22, 0), (22, 11), (0, 11)] - pts_ = grid2points(grid, ring=True) + pts_ = grid.points(ring=True) assert len(pts_) == 5 assert pts == pts_[:4] assert pts_[0] == pts_[-1] - grid['transform'] = tuple(Affine.translation(100, 0)) - pts = grid2points(grid) + grid = EO3Grid({ + "shape": (11, 22), + "transform": tuple(Affine.translation(100, 0)) + }) + pts = grid.points() assert pts == [(100, 0), (122, 0), (122, 11), (100, 11)] for bad in [{}, dict(shape=(1, 1)), dict(transform=identity)]: with pytest.raises(ValueError): - grid2points(bad) + grid = EO3Grid(bad) -def test_is_eo3(sample_doc, sample_doc_180): +def test_bad_grids(): identity = list(Affine.translation(0, 0)) + bad_grids = [ + # No Shape + { + "transform": identity, + }, + # Non 2-d Shape (NB: geospatial dimensions only. Other dimensions are handled elsewhere.) + { + "shape": (1024,), + "transform": identity, + }, + { + "shape": (1024, 564, 256), + "transform": identity, + }, + # No Transform + { + "shape": (1024, 256), + }, + # Formally invalid affine transform (must be 6 or 9 elements) + { + "shape": (1024, 256), + "transform": [343.3], + }, + { + "shape": (1024, 256), + "transform": [343, 23345, 234, 9, -65.3], + }, + { + "shape": (1024, 256), + "transform": [343, 23345, 234, 9, -65.3, 1, 0], + }, + { + "shape": (1024, 256), + "transform": [343, 23345, 234, 9, -65.3, 1, 0, 7435.24563, 0.0001234, 888.888, 3, 3, 2], + }, + # Formally invalid affine transform (all elements must be numbers) + { + "shape": (1024, 256), + "transform": [343, 23345, 234, 9, -65.3, "six"] + }, + # Formally invalid affine transform (in 9 element form, last 3 numbers must be 0,0,1) + { + "shape": (1024, 256), + "transform": [343, 23345, 234, 9, -65.3, 1, 3, 3, 2], + }, + ] + for bad_grid in bad_grids: + with pytest.raises(ValueError): + grid = EO3Grid(bad_grid) + + +def test_eo3_grid_spatial_nogrids(): + with pytest.raises(ValueError, match="grids.foo"): + oo = eo3_grid_spatial( + { + "crs": "EPSG:4326", + "grids": { + "default": { + "shape": (1024, 256), + "transform": [343, 23345, 234, 9, -65.3, 1], + } + } + }, + grid_name="foo" + ) + + +def test_is_eo3(sample_doc, sample_doc_180): assert is_doc_eo3(sample_doc) is True assert is_doc_eo3(sample_doc_180) is True @@ -105,6 +177,15 @@ is_doc_eo3({'$schema': 'https://schemas.opendatacube.org/eo4'}) +def test_is_geo(sample_doc, sample_doc_180): + assert is_doc_geo(sample_doc) is True + assert is_doc_geo(sample_doc_180) is True + + assert is_doc_geo({}) is False + assert is_doc_geo({'crs': 'EPSG:4326'}) is False + assert is_doc_geo({'crs': 'EPSG:4326', 'extent': "dummy_extent"}) is True + + def test_add_eo3(sample_doc, sample_doc_180, eo3_product): doc = add_eo3_parts(sample_doc) assert doc is not sample_doc @@ -145,9 +226,6 @@ with pytest.raises(ValueError): add_eo3_parts(doc) - with pytest.raises(ValueError): - eo3_lonlat_bbox({}) - def test_prep_eo3(sample_doc, sample_doc_180, eo3_metadata): rdr = eo3_metadata.dataset_reader(prep_eo3(sample_doc)) diff -Nru datacube-1.8.7/tests/test_geometry.py datacube-1.8.9/tests/test_geometry.py --- datacube-1.8.7/tests/test_geometry.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_geometry.py 2022-11-17 00:47:28.000000000 +0000 @@ -922,7 +922,6 @@ def test_crs(): - CRS = geometry.CRS custom_crs = geometry.CRS("""PROJCS["unnamed", GEOGCS["Unknown datum based upon the custom spheroid", DATUM["Not specified (based on custom spheroid)", SPHEROID["Custom spheroid",6371007.181,0]], @@ -947,7 +946,7 @@ assert crs.dimensions == ('latitude', 'longitude') assert crs.epsg == 4326 - crs2 = CRS(crs) + crs2 = geometry.CRS(crs) assert crs2 == crs assert crs.proj == crs2.proj @@ -1217,8 +1216,8 @@ def run_test(A, n, tol=1e-5): X = [(uniform(0, 1), uniform(0, 1)) for _ in range(n)] - Y = [A*x for x in X] - A_ = affine_from_pts(X, Y) + Y = [A*x for x in X] # noqa: N806 + A_ = affine_from_pts(X, Y) # noqa: N806 assert get_diff(A, A_) < tol diff -Nru datacube-1.8.7/tests/test_load_data.py datacube-1.8.9/tests/test_load_data.py --- datacube-1.8.7/tests/test_load_data.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_load_data.py 2022-11-17 00:47:28.000000000 +0000 @@ -75,6 +75,74 @@ assert progress_call_data == [(1, 2), (2, 2)] +def test_load_data_with_url_mangling(tmpdir): + actual_tmpdir = Path(str(tmpdir)) + recorded_tmpdir = Path(str(tmpdir / "not" / "actual" / "location")) + + def url_mangler(raw): + actual_uri_root = actual_tmpdir.absolute().as_uri() + recorded_uri_root = recorded_tmpdir.absolute().as_uri() + return raw.replace(recorded_uri_root, actual_uri_root) + + group_by = query_group_by('time') + spatial = dict(resolution=(15, -15), + offset=(11230, 1381110),) + + nodata = -999 + aa = mk_test_image(96, 64, 'int16', nodata=nodata) + + ds, gbox = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)], + tmpdir, + prefix='ds1-', + timestamp='2018-07-19', + base_folder_of_record=recorded_tmpdir, + **spatial) + assert ds.time is not None + + ds2, _ = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)], + tmpdir, + prefix='ds2-', + timestamp='2018-07-19', + base_folder_of_record=recorded_tmpdir, + **spatial) + assert ds.time is not None + assert ds.time == ds2.time + + sources = Datacube.group_datasets([ds], 'time') + sources2 = Datacube.group_datasets([ds, ds2], group_by) + + mm = ['aa'] + mm = [ds.type.measurements[k] for k in mm] + + ds_data = Datacube.load_data(sources, gbox, mm, patch_url=url_mangler) + assert ds_data.aa.nodata == nodata + np.testing.assert_array_equal(aa, ds_data.aa.values[0]) + + ds_data = Datacube.load_data(sources, gbox, mm, dask_chunks={'x': 8, 'y': 8}, patch_url=url_mangler) + assert ds_data.aa.nodata == nodata + np.testing.assert_array_equal(aa, ds_data.aa.values[0]) + + custom_fuser_call_count = 0 + + def custom_fuser(dest, delta): + nonlocal custom_fuser_call_count + custom_fuser_call_count += 1 + dest[:] += delta + + progress_call_data = [] + + def progress_cbk(n, nt): + progress_call_data.append((n, nt)) + + ds_data = Datacube.load_data(sources2, gbox, mm, fuse_func=custom_fuser, + progress_cbk=progress_cbk, patch_url=url_mangler) + assert ds_data.aa.nodata == nodata + assert custom_fuser_call_count > 0 + np.testing.assert_array_equal(nodata + aa + aa, ds_data.aa.values[0]) + + assert progress_call_data == [(1, 2), (2, 2)] + + def test_load_data_cbk(tmpdir): from datacube.api import TerminateCurrentLoad diff -Nru datacube-1.8.7/tests/test_testutils.py datacube-1.8.9/tests/test_testutils.py --- datacube-1.8.7/tests/test_testutils.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_testutils.py 2022-11-17 00:47:28.000000000 +0000 @@ -3,9 +3,8 @@ # Copyright (c) 2015-2020 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import pytest -from datacube.model import Dataset from datacube.testutils.threads import FakeThreadPoolExecutor -from datacube.testutils import mk_sample_xr_dataset, mk_sample_product, mk_sample_dataset +from datacube.testutils import mk_sample_xr_dataset, mk_sample_dataset from datacube.testutils.io import native_geobox diff -Nru datacube-1.8.7/tests/test_utils_dask.py datacube-1.8.9/tests/test_utils_dask.py --- datacube-1.8.7/tests/test_utils_dask.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_utils_dask.py 2022-11-17 00:47:28.000000000 +0000 @@ -31,21 +31,26 @@ def test_compute_tasks(): - client = start_local_dask(threads_per_worker=1, - dashboard_address=None) - - tasks = (dask.delayed(x) for x in range(100)) - xx = [x for x in compute_tasks(tasks, client)] - assert xx == [x for x in range(100)] - - client.close() - del client + try: + client = start_local_dask(threads_per_worker=1, + dashboard_address=None) + + tasks = (dask.delayed(x) for x in range(100)) + xx = [x for x in compute_tasks(tasks, client)] + assert xx == [x for x in range(100)] + finally: + client.close() + del client def test_start_local_dask_dashboard_link(monkeypatch): monkeypatch.setenv('JUPYTERHUB_SERVICE_PREFIX', 'user/test/') - client = start_local_dask() - assert client.dashboard_link.startswith('user/test/proxy/') + try: + client = start_local_dask() + assert client.dashboard_link.startswith('user/test/proxy/') + finally: + client.close() + del client def test_partition_map(): @@ -61,16 +66,17 @@ def test_pmap(): - client = start_local_dask(threads_per_worker=1, - dashboard_address=None) - - xx_it = pmap(str, range(101), client=client) - xx = [x for x in xx_it] - - assert xx == [str(x) for x in range(101)] - - client.close() - del client + try: + client = start_local_dask(threads_per_worker=1, + dashboard_address=None) + + xx_it = pmap(str, range(101), client=client) + xx = [x for x in xx_it] + + assert xx == [str(x) for x in range(101)] + finally: + client.close() + del client @pytest.mark.parametrize("blob", [ diff -Nru datacube-1.8.7/tests/test_utils_docs.py datacube-1.8.9/tests/test_utils_docs.py --- datacube-1.8.7/tests/test_utils_docs.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_utils_docs.py 2022-11-17 00:47:28.000000000 +0000 @@ -12,6 +12,7 @@ from collections import OrderedDict from types import SimpleNamespace from typing import Tuple, Iterable +from uuid import UUID, uuid4 import numpy as np import pytest @@ -55,7 +56,23 @@ ({'a': 1}, {'b': 1}, [(('a',), 1, MISSING), (('b',), MISSING, 1)]), ({'a': {'b': 1}}, {'a': {'b': 2}}, [(('a', 'b'), 1, 2)]), ({}, {'b': 1}, [(('b',), MISSING, 1)]), - ({'a': {'c': 1}}, {'a': {'b': 1}}, [(('a', 'b'), MISSING, 1), (('a', 'c'), 1, MISSING)]) + ({'a': {'c': 1}}, {'a': {'b': 1}}, [(('a', 'b'), MISSING, 1), (('a', 'c'), 1, MISSING)]), + # Test tuple vs list, for geometry coordinates handling and expect no changes + ( + [[635235.0, -2930535.0], [635235.0, -2930535.0]], + ((635235.0, -2930535.0), (635235.0, -2930535.0),), + [] + ), + ( + [[[635235.0, -2930535.0], [635235.0, -2930535.0]]], + (((635235.0, -2930535.0), (635235.0, -2930535.0)),), + [] + ), + ( + {'coordinates': [[[635235.0, -2930535.0], [635235.0, -2930535.0]]]}, + {'coordinates': (((635235.0, -2930535.0), (635235.0, -2930535.0)),)}, + [] + ) ] @@ -221,7 +238,7 @@ assert a.id != b.id assert a.doc['creation_dt'] == b.doc['creation_dt'] - assert isinstance(a.id, str) + assert isinstance(a.id, UUID) assert a.sources == {} a1, a2 = [dataset_maker(i)('A', product_type='eo') for i in (0, 1)] @@ -305,10 +322,13 @@ +--> E """ + nu_map = {n: uuid4() for n in ['A', 'B', 'C', 'D', 'E']} + un_map = {u: n for n, u in nu_map.items()} + def node(name, **kwargs): - return dict(id=name, lineage=dict(source_datasets=kwargs)) + return dict(id=nu_map[name], lineage=dict(source_datasets=kwargs)) - A, _, C, _, _ = make_graph_abcde(node) + A, _, C, _, _ = make_graph_abcde(node) # noqa: N806 rdr = SimpleDocNav(A) assert rdr.doc == A @@ -320,7 +340,7 @@ assert isinstance(rdr.sources_path, tuple) def visitor(node, name=None, depth=0, out=None): - s = '{}:{}:{:d}'.format(node.id, name if name else '..', depth) + s = '{}:{}:{:d}'.format(un_map[node.id], name if name else '..', depth) out.append(s) expect_preorder = ''' @@ -351,17 +371,17 @@ fv = flatten_datasets(rdr) - assert len(fv['A']) == 1 - assert len(fv['C']) == 2 - assert len(fv['E']) == 1 - assert set(fv.keys()) == set('ABCDE') + assert len(fv[nu_map['A']]) == 1 + assert len(fv[nu_map['C']]) == 2 + assert len(fv[nu_map['E']]) == 1 + assert set(fv.keys()) == set(un_map.keys()) fv, dg = flatten_datasets(rdr, with_depth_grouping=True) - assert len(fv['A']) == 1 - assert len(fv['C']) == 2 - assert len(fv['E']) == 1 - assert set(fv.keys()) == set('ABCDE') + assert len(fv[nu_map['A']]) == 1 + assert len(fv[nu_map['C']]) == 2 + assert len(fv[nu_map['E']]) == 1 + assert set(fv.keys()) == set(un_map.keys()) assert isinstance(dg, list) assert len(dg) == 4 assert [len(dss) for dss in dg] == [1, 3, 2, 1] @@ -369,10 +389,9 @@ def to_set(xx): return set(x.id for x in xx) - assert [set(s) for s in ('A', - 'BCE', - 'CD', - 'D')] == [to_set(xx) for xx in dg] + assert [set(nu_map[n] for n in s) + for s in ('A', 'BCE', 'CD', 'D') + ] == [to_set(xx) for xx in dg] with pytest.raises(ValueError): SimpleDocNav([]) @@ -509,11 +528,10 @@ 'c': datetime(2016, 3, 11), 'd': np.dtype('int16'), }).items()) == [ - ('a', (1.0, 2.0, 3.0)), - ('b', 'Infinity'), - ('c', '2016-03-11T00:00:00'), - ('d', 'int16'), - ] + ('a', (1.0, 2.0, 3.0)), + ('b', 'Infinity'), + ('c', '2016-03-11T00:00:00'), + ('d', 'int16'), ] # Converts keys to strings: assert sorted(jsonify_document({1: 'a', '2': Decimal('2')}).items()) == [ diff -Nru datacube-1.8.7/tests/test_utils_other.py datacube-1.8.9/tests/test_utils_other.py --- datacube-1.8.7/tests/test_utils_other.py 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/tests/test_utils_other.py 2022-11-17 00:47:28.000000000 +0000 @@ -14,14 +14,12 @@ import numpy as np import pytest -import rasterio import xarray as xr from dateutil.parser import parse from hypothesis import given from hypothesis.strategies import integers, text from pandas import to_datetime -from datacube.helpers import write_geotiff from datacube.utils import gen_password, write_user_secret_file, slurp from datacube.model.utils import xr_apply from datacube.utils.dates import date_sequence @@ -45,7 +43,7 @@ pick_uri, uri_resolve, is_vsipath, normalise_path, default_base_dir) from datacube.utils.io import check_write_path -from datacube.testutils import mk_sample_product, remove_crs +from datacube.testutils import mk_sample_product def test_stats_dates(): @@ -187,47 +185,6 @@ assert slurp(fname) is None -def test_write_geotiff(tmpdir, odc_style_xr_dataset): - """Ensure the geotiff helper writer works, and supports datasets smaller than 256x256.""" - filename = tmpdir + '/test.tif' - - assert len(odc_style_xr_dataset.latitude) < 256 - - with pytest.warns(DeprecationWarning): - write_geotiff(filename, odc_style_xr_dataset) - - assert filename.exists() - - with rasterio.open(str(filename)) as src: - written_data = src.read(1) - - assert (written_data == odc_style_xr_dataset['B10']).all() - - -def test_write_geotiff_str_crs(tmpdir, odc_style_xr_dataset): - """Ensure the geotiff helper writer works, and supports crs as a string.""" - filename = tmpdir + '/test.tif' - - original_crs = odc_style_xr_dataset.crs - - odc_style_xr_dataset.attrs['crs'] = str(original_crs) - - with pytest.warns(DeprecationWarning): - write_geotiff(filename, odc_style_xr_dataset) - - assert filename.exists() - - with rasterio.open(str(filename)) as src: - written_data = src.read(1) - - assert (written_data == odc_style_xr_dataset['B10']).all() - - odc_style_xr_dataset = remove_crs(odc_style_xr_dataset) - with pytest.raises(ValueError): - with pytest.warns(DeprecationWarning): - write_geotiff(filename, odc_style_xr_dataset) - - def test_testutils_mk_sample(): pp = mk_sample_product('tt', measurements=[('aa', 'int16', -999), ('bb', 'float32', np.nan)]) diff -Nru datacube-1.8.7/wordlist.txt datacube-1.8.9/wordlist.txt --- datacube-1.8.7/wordlist.txt 1970-01-01 00:00:00.000000000 +0000 +++ datacube-1.8.9/wordlist.txt 2022-11-17 00:47:28.000000000 +0000 @@ -0,0 +1,491 @@ +AbstractDataSource +AbstractDatasetResource +AbstractProductResource +AbstractReaderDriver +AbstractWriterDriver +acca +Affine +affine +africa +AGDC +agdc +agdcintegration +AGDCv +agdcv +AIO +aio +albers +Analytics +analytics +antimeridian +api +APIs +apis +ApplyMask +ARD +ard +arg +au +australia +autobuild +autoclass +autoconf +autoflake +automake +automethod +automodule +autoselectionlabel +autosummary +aws +backends +backported +BandDataSource +BandInfo +bbox +bc +bd +Bilby +bilinear +bom +bool +boto +BoundingBox +brazil +Bugfixes +carrotandcompany +cbk +cd +CEOS +CircleCI +CLI +cli +cloudpickle +cmake +cmd +codebase +codecov +cogeo +COGs +combinator +combinators +conda +conf +config +coord +cov +cp +createdb +createuser +creds +CRS +crs +CRSError +CRSMismatchError +CSIRO +csiro +CSV +cubeenv +currentmodule +cython +da +Dahu +dask +DataArray +DataCube +Datacube +datacube +dataflow +DataFrame +DataSet +Dataset +dataset +DatasetResource +datasets +DatasetType +datasetType +DatasetTypeResource +DataSource +datasource +datetime +dea +deafrica +dem +Dependabot +Deprecations +dev +df +digitalearth +digitalearthafrica +Dingley +Dockerfile +dropdb +ds +dsm +dst +dt +dtr +dtype +EarthData +ec +eg +EnterpriseDB +entrypoint +env +ENVI +envs +EO +eo +eodatasets +EOS +eosphere +EP +EPSG +epsg +ESPA +evironments +f'file +fd +feedstock +Festivus +fmask +fmt +fn +func +ga +gbox +GCS +GDAL +gdal +GEDI +gedi +geo +geobase +GeoBox +geobox +GeoboxTiles +GEOGCS +GeoJSON +Geomedian +geomedian +GeoPolygon +Geoscience +geoscience +GeoscienceAustralia +Geospatial +geospatial +GeoTIFF +GeoTiff +geotiff +GeoTIFFs +GeoTiffs +gh +GIS +Github +github +githubusercontent +GloVis +GQA +graphviz +gridded +GridSpec +GridWorkflow +gridWorkflow +GroupBy +HDF +hdf +hl +hoc +hostname +HPC +html +http +https +IAM +ident +identifer +img +INEGI +inegi +ing +Ingester +ingester +ingestor +init +installable +insted +io +ipynb +isel +isinstance +jfEZEOkxRXgNsAsHEC +jpg +JSON +jsonify +Jupyter +jupyter +JupyterLab +Juypter +juypter +KeyboardInterrupt +Kirill +Kubernetes +kwargs +Kyrgyz +kyrgyzstan +lanczos +landsat +LandsatLEDAPS +landsatonaws +lbg +LEDAPS +ledaps +libgdal +libhdf +libnetcdf +libpq +libs +libudunits +libyaml +linux +literalinclude +localhost +lon +lonlat +lr +LTS +lv +macosx +MakeMask +ManagingODC +mapbox +matplotlib +maxdepth +md +metadataType +MetadataTypes +mk +mkdir +MODIS +mongolia +MTL +multiband +multigeom +multiline +multipoint +multipolygon +mx +mypy +NaN +nan +nationalmap +nbar +nbart +nc +NCI +nci +ncml +ndarray +ndexpr +NDVI +ndvi +NetCDF +netCDF +netcdf +NetCDFs +NetcdfWriterDriver +nir +nodata +NodeJS +nosignatures +NotImplementedError +np +NPM +npm +nrt +ns +numpy +nx +ny +ODC +odc +ODCv +OGC +ogr +oldscripts +OLI +omad +OPeNDAP +opendatacube +ORM +osgeo +osr +OSX +ows +param +params +pc +petewa +pgadmin +pixelquality +pkgs +Pluggable +pmap +png +posgreSQL +PostGIS +Postgis +postgis +postgres +postgresql +PQ +pq +pre +precollection +prefetch +Preperation +preperation +PRIMEM +prog +Proj +provence +psql +pts +py +pydata +pyenv +PyLint +pylint +PyPEG +PyPI +pypi +pyproj +pyspellcheck +pytest +PythonAPI +QGIS +RasterDatasetDataSource +RasterIO +rasterio +rasters +RasterShape +RasterWindow +rc +ReadDriver +README +readthedoc +readthedocs +reampling +redis +Reflectance +reflectance +Reproject +reproject +reprojected +reprojecting +reprojection +resampled +Resampling +resampling +rightarrow +rio +Roadmap +roadmap +ROI +roi +rst +rtaib +rtd +rtype +runtime +rws +sameuser +scaleable +schemas +scipy +scm +SEO +serialiser +setuptools +SimpleDocNav +singledispatch +spatio +SpatioTemporal +SPDX +SQLAlchemy +SRTM +srtm +STAC +stac +stacker +stacspec +str +subcommands +sudo +sv +svg +swir +swissdatacube +TCP +Terria +th +TIF +tif +timeslice +timeslot +TIRS +tmp +toctree +TODO +ToFloat +txt +typechecking +ubuntu +UI +ui +uk +ul +unarchived +unary +Uncomment +ur +URI +uri +URIs +url +usablity +USGS +usgs +USGS's +usgslsprepare +UsingODC +utils +UUID +uuid +UUIDs +ValueError +VDI +vdi +venv +versioneer +VirtualDatasetBag +VirtualDatasetBox +vn +vp +vsi +VSR +VSRDataSource +VSRs +WCS +wget +WGS +WKT +WMS +WPS +www +XArray +xarray +xBddeS +xr +YAML +yaml +yourscript diff -Nru datacube-1.8.7/.yamllint datacube-1.8.9/.yamllint --- datacube-1.8.7/.yamllint 2022-06-07 00:39:59.000000000 +0000 +++ datacube-1.8.9/.yamllint 2022-11-17 00:47:28.000000000 +0000 @@ -28,7 +28,3 @@ colons: disable comments: disable comments-indentation: disable - - - -