From 30299439cae3f1102f60b75620abeced430fcd89 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 9 Apr 2024 17:43:28 -0700 Subject: [PATCH 01/32] temporary enable CI triggers on feature branch --- .github/workflows/ci-additional.yaml | 2 ++ .github/workflows/ci.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index c0f978fb0d8..bc2eb8d2cac 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -3,6 +3,7 @@ on: push: branches: - "main" + - "backend-indexing" pull_request: branches: - "main" @@ -12,6 +13,7 @@ on: - '/*' # covers files such as `pyproject.toml` - 'properties/**' - 'xarray/**' + - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b9b15d867a7..ca9ef397962 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,6 +3,7 @@ on: push: branches: - "main" + - "backend-indexing" pull_request: branches: - "main" @@ -12,6 +13,7 @@ on: - '/*' # covers files such as `pyproject.toml` - 'properties/**' - 'xarray/**' + - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: From ddd4cdb59a5793b9a15a28b6b0475eed95739916 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 16 Apr 2024 18:53:22 -0700 Subject: [PATCH 02/32] add `.oindex` and `.vindex` to `BackendArray` (#8885) * add .oindex and .vindex to BackendArray * Add support for .oindex and .vindex in H5NetCDFArrayWrapper * Add support for .oindex and .vindex in NetCDF4ArrayWrapper, PydapArrayWrapper, NioArrayWrapper, and ZarrArrayWrapper * add deprecation warning * Fix deprecation warning message formatting * add tests * Update xarray/core/indexing.py Co-authored-by: Deepak Cherian * Update ZarrArrayWrapper class in xarray/backends/zarr.py Co-authored-by: Deepak Cherian --------- Co-authored-by: Deepak Cherian --- xarray/backends/common.py | 18 +++++++++++++ xarray/backends/h5netcdf_.py | 12 ++++++++- xarray/backends/netCDF4_.py | 12 ++++++++- xarray/backends/pydap_.py | 12 ++++++++- xarray/backends/scipy_.py | 33 ++++++++++++++++------- xarray/backends/zarr.py | 49 ++++++++++++++++++++++------------- xarray/core/indexing.py | 36 ++++++++++++++++++++----- xarray/tests/test_backends.py | 46 ++++++++++++++++++++++++++++++++ 8 files changed, 182 insertions(+), 36 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index f318b4dd42f..f8f073f86a1 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -210,6 +210,24 @@ def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return self[key] # type: ignore [index] + def _oindex_get(self, key: indexing.OuterIndexer): + raise NotImplementedError( + f"{self.__class__.__name__}._oindex_get method should be overridden" + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + raise NotImplementedError( + f"{self.__class__.__name__}._vindex_get method should be overridden" + ) + + @property + def oindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._oindex_get) + + @property + def vindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._vindex_get) + class AbstractDataStore: __slots__ = () diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 71463193939..07973c3cbd9 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -48,7 +48,17 @@ def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index ae86c4ce384..33d636b59cf 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -97,7 +97,17 @@ def get_array(self, needs_lock=True): variable.set_auto_chartostring(False) return variable - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER, self._getitem ) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 5a475a7c3be..2ce3a579b2d 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -43,7 +43,17 @@ def shape(self) -> tuple[int, ...]: def dtype(self): return self.array.dtype - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.BASIC, self._getitem ) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index f8c486e512c..cd2217c567f 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -67,15 +67,7 @@ def get_variable(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] - def _getitem(self, key): - with self.datastore.lock: - data = self.get_variable(needs_lock=False).data - return data[key] - - def __getitem__(self, key): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem - ) + def _finalize_result(self, data): # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. @@ -88,6 +80,29 @@ def __getitem__(self, key): return np.array(data, dtype=self.dtype, copy=copy) + def _getitem(self, key): + with self.datastore.lock: + data = self.get_variable(needs_lock=False).data + return data[key] + + def _vindex_get(self, key: indexing.VectorizedIndexer): + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + + def _oindex_get(self, key: indexing.OuterIndexer): + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + + def __getitem__(self, key): + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + def __setitem__(self, key, value): with self.datastore.lock: data = self.get_variable(needs_lock=False) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e4a684e945d..4c2e8be0c16 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -85,25 +85,38 @@ def __init__(self, zarr_array): def get_array(self): return self._array - def _oindex(self, key): - return self._array.oindex[key] - - def _vindex(self, key): - return self._array.vindex[key] - - def _getitem(self, key): - return self._array[key] - - def __getitem__(self, key): - array = self._array - if isinstance(key, indexing.BasicIndexer): - method = self._getitem - elif isinstance(key, indexing.VectorizedIndexer): - method = self._vindex - elif isinstance(key, indexing.OuterIndexer): - method = self._oindex + def _oindex_get(self, key: indexing.OuterIndexer): + def raw_indexing_method(key): + return self._array.oindex[key] + + return indexing.explicit_indexing_adapter( + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + + def raw_indexing_method(key): + return self._array.vindex[key] + + return indexing.explicit_indexing_adapter( + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, + ) + + def __getitem__(self, key: indexing.BasicIndexer): + def raw_indexing_method(key): + return self._array[key] + return indexing.explicit_indexing_adapter( - key, array.shape, indexing.IndexingSupport.VECTORIZED, method + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, ) # if self.ndim == 0: diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 0926da6fd80..7d6191883e1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -3,6 +3,7 @@ import enum import functools import operator +import warnings from collections import Counter, defaultdict from collections.abc import Hashable, Iterable, Mapping from contextlib import suppress @@ -588,6 +589,14 @@ def __getitem__(self, key: Any): return result +BackendArray_fallback_warning_message = ( + "The array `{0}` does not support indexing using the .vindex and .oindex properties. " + "The __getitem__ method is being used instead. This fallback behavior will be " + "removed in a future version. Please ensure that the backend array `{1}` implements " + "support for the .vindex and .oindex properties to avoid potential issues." +) + + class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" @@ -639,11 +648,18 @@ def shape(self) -> _Shape: return tuple(shape) def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + try: array = apply_indexer(self.array, self.key) - else: + except NotImplementedError as _: # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + warnings.warn( + BackendArray_fallback_warning_message.format( + self.array.__class__.__name__, self.array.__class__.__name__ + ), + category=DeprecationWarning, + stacklevel=2, + ) array = self.array[self.key] # self.array[self.key] is now a numpy array when @@ -715,12 +731,20 @@ def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + try: array = apply_indexer(self.array, self.key) - else: + except NotImplementedError as _: # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + warnings.warn( + BackendArray_fallback_warning_message.format( + self.array.__class__.__name__, self.array.__class__.__name__ + ), + category=PendingDeprecationWarning, + stacklevel=2, + ) array = self.array[self.key] + # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0126b130e7c..d7471ecbaf9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5787,3 +5787,49 @@ def test_zarr_region_chunk_partial_offset(tmp_path): # This write is unsafe, and should raise an error, but does not. # with pytest.raises(ValueError): # da.isel(x=slice(5, 25)).chunk(x=(10, 10)).to_zarr(store, region="auto") + + +def test_backend_array_deprecation_warning(capsys): + class CustomBackendArray(xr.backends.common.BackendArray): + def __init__(self): + array = self.get_array() + self.shape = array.shape + self.dtype = array.dtype + + def get_array(self): + return np.arange(10) + + def __getitem__(self, key): + return xr.core.indexing.explicit_indexing_adapter( + key, self.shape, xr.core.indexing.IndexingSupport.BASIC, self._getitem + ) + + def _getitem(self, key): + array = self.get_array() + return array[key] + + cba = CustomBackendArray() + indexer = xr.core.indexing.VectorizedIndexer(key=(np.array([0]),)) + + la = xr.core.indexing.LazilyIndexedArray(cba, indexer) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + la.vindex[indexer].get_duck_array() + + captured = capsys.readouterr() + assert len(w) == 1 + assert issubclass(w[-1].category, PendingDeprecationWarning) + assert ( + "The array `CustomBackendArray` does not support indexing using the .vindex and .oindex properties." + in str(w[-1].message) + ) + assert "The __getitem__ method is being used instead." in str(w[-1].message) + assert "This fallback behavior will be removed in a future version." in str( + w[-1].message + ) + assert ( + "Please ensure that the backend array `CustomBackendArray` implements support for the .vindex and .oindex properties to avoid potential issues." + in str(w[-1].message) + ) + assert captured.out == "" From 96ac4b7f2879268fe03e012114a96f3e680e44c6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 3 May 2024 08:27:22 -0700 Subject: [PATCH 03/32] Enable explicit use of key tuples (instead of *Indexer objects) in indexing adapters and explicitly indexed arrays (#8870) * pass key tuple to indexing adapters and explicitly indexed arrays * update indexing in StackedBytesArray * Update indexing in StackedBytesArray * Add _IndexerKey type to _typing.py * Update indexing in StackedBytesArray * use tuple indexing in test_backend_array_deprecation_warning * Add support for CompatIndexedTuple in explicit indexing adapter This commit updates the `explicit_indexing_adapter` function to accept both `ExplicitIndexer` and the new `CompatIndexedTuple`. The `CompatIndexedTuple` is designed to facilitate the transition towards using raw tuples by carrying additional metadata about the indexing type (basic, vectorized, or outer). * remove unused code * type hint fixes * fix docstrings * fix tests * fix docstrings * Apply suggestions from code review Co-authored-by: Deepak Cherian * update docstrings and pass tuples directly * Some test cleanup * update docstring * use `BasicIndexer` instead of `CompatIndexedTuple` * support explicit indexing with tuples * fix mypy errors * remove unused IndexerMaker * Update LazilyIndexedArray._updated_key to support explicit indexing with tuples --------- Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian --- xarray/coding/strings.py | 20 +- xarray/coding/variables.py | 6 +- xarray/core/indexing.py | 280 ++++++++++++++++------------ xarray/namedarray/_typing.py | 1 + xarray/tests/__init__.py | 10 - xarray/tests/test_backends.py | 2 +- xarray/tests/test_coding_strings.py | 15 +- xarray/tests/test_dataset.py | 32 ++-- xarray/tests/test_indexing.py | 45 +++-- 9 files changed, 212 insertions(+), 199 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index db95286f6aa..6df92c256b9 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -17,6 +17,7 @@ from xarray.core import indexing from xarray.core.utils import module_available from xarray.core.variable import Variable +from xarray.namedarray._typing import _IndexerKey from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array @@ -220,8 +221,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[(slice(None),)] array(b'abc', dtype='|S3') """ @@ -240,7 +240,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype("S" + str(self.array.shape[-1])) + return np.dtype(f"S{str(self.array.shape[-1])}") @property def shape(self) -> tuple[int, ...]: @@ -249,15 +249,17 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key): + def _vindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key): + def _oindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key): + def __getitem__(self, key: _IndexerKey): + from xarray.core.indexing import BasicIndexer + # require slicing the last dimension completely - key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) - if key.tuple[-1] != slice(None): + indexer = indexing.expanded_indexer(key, self.array.ndim) + if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return _numpy_char_to_bytes(self.array[BasicIndexer(indexer)]) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..98bbbbaeb2c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -99,8 +99,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> NativeEndiannessArray(x).dtype dtype('int16') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> NativeEndiannessArray(x)[indexer].dtype + >>> NativeEndiannessArray(x)[(slice(None),)].dtype dtype('int16') """ @@ -137,8 +136,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> BoolTypeArray(x).dtype dtype('bool') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> BoolTypeArray(x)[indexer].dtype + >>> BoolTypeArray(x)[(slice(None),)].dtype dtype('bool') """ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 7d6191883e1..2b8cd202e4e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Callable, overload +from typing import TYPE_CHECKING, Any, Callable, Literal, overload import numpy as np import pandas as pd @@ -36,7 +36,7 @@ from xarray.core.indexes import Index from xarray.core.variable import Variable - from xarray.namedarray._typing import _Shape, duckarray + from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -324,13 +324,13 @@ class ExplicitIndexer: __slots__ = ("_key",) - def __init__(self, key: tuple[Any, ...]): + def __init__(self, key: _IndexerKey): if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @property - def tuple(self) -> tuple[Any, ...]: + def tuple(self) -> _IndexerKey: return self._key def __repr__(self) -> str: @@ -516,30 +516,29 @@ class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () def get_duck_array(self): - key = BasicIndexer((slice(None),) * self.ndim) - return self[key] + return self[(slice(None),) * self.ndim] def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: # This is necessary because we apply the indexing key in self.get_duck_array() # Note this is the base class for all lazy indexing classes return np.asarray(self.get_duck_array(), dtype=dtype) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -575,9 +574,9 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: def get_duck_array(self): return self.array.get_duck_array() - def __getitem__(self, key: Any): - key = expanded_indexer(key, self.ndim) - indexer = self.indexer_cls(key) + def __getitem__(self, key: _IndexerKey | slice): + _key = expanded_indexer(key, self.ndim) + indexer = self.indexer_cls(_key) result = apply_indexer(self.array, indexer) @@ -623,8 +622,13 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): self.array = as_indexable(array) self.key = key - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) full_key = [] for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): @@ -673,31 +677,29 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, key: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: OuterIndexer, value: Any) -> None: - full_key = self._updated_key(key) - self.array.oindex[full_key] = value + def _oindex_set(self, key: _IndexerKey, value: Any) -> None: + full_key = self._updated_key(OuterIndexer(key)) + self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: BasicIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(key) - full_key = self._updated_key(key) - self.array[full_key] = value + def __setitem__(self, key: _IndexerKey, value: Any) -> None: + full_key = self._updated_key(BasicIndexer(key)) + self.array[full_key.tuple] = value def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -756,25 +758,25 @@ def get_duck_array(self): def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _oindex_get(self, indexer: _IndexerKey): + return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: VectorizedIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _vindex_get(self, indexer: _IndexerKey): + return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) + + def __getitem__(self, indexer: _IndexerKey): - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) # If the indexed array becomes a scalar, return LazilyIndexedArray - if all(isinstance(ind, integer_types) for ind in indexer.tuple): - key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) + if all(isinstance(ind, integer_types) for ind in indexer): + key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) - return type(self)(self.array, self._updated_key(indexer)) + return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) def transpose(self, order): key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -807,29 +809,27 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array[indexer] = value @@ -857,27 +857,25 @@ def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value @@ -1040,29 +1038,63 @@ def explicit_indexing_adapter( return result +class CompatIndexedTuple(tuple): + """ + A tuple subclass used to transition existing backend implementations towards the use of raw tuples + for indexing by carrying additional metadata about the type of indexing being + performed ('basic', 'vectorized', or 'outer'). This class serves as a bridge, allowing + backend arrays that currently expect this metadata to function correctly while + maintaining the outward behavior of a regular tuple. + + This class is particularly useful during the phase where the backend implementations are + not yet capable of directly accepting raw tuples without additional context about + the indexing type. It ensures that these backends can still correctly interpret and + process indexing operations by providing them with the necessary contextual information. + """ + + def __new__(cls, iterable, indexer_type: Literal["basic", "vectorized", "outer"]): + obj = super().__new__(cls, iterable) + obj.indexer_type = indexer_type # type: ignore[attr-defined] + return obj + + def __repr__(self): + return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" + + def apply_indexer(indexable, indexer: ExplicitIndexer): """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer] + return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer] + return indexable.oindex[CompatIndexedTuple(indexer.tuple, "outer")] else: - return indexable[indexer] + return indexable[CompatIndexedTuple(indexer.tuple, "basic")] def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: """Set values in an indexable object using an indexer.""" if isinstance(indexer, VectorizedIndexer): - indexable.vindex[indexer] = value + indexable.vindex[indexer.tuple] = value elif isinstance(indexer, OuterIndexer): - indexable.oindex[indexer] = value + indexable.oindex[indexer.tuple] = value else: - indexable[indexer] = value + indexable[indexer.tuple] = value def decompose_indexer( - indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport + indexer: ExplicitIndexer | CompatIndexedTuple, + shape: _Shape, + indexing_support: IndexingSupport, ) -> tuple[ExplicitIndexer, ExplicitIndexer]: + if isinstance(indexer, CompatIndexedTuple): + # recreate the indexer object from the tuple and the type of indexing. + # This is necessary to ensure that the backend array can correctly interpret the indexing operation. + if indexer.indexer_type == "vectorized": # type: ignore[attr-defined] + indexer = VectorizedIndexer(indexer) + elif indexer.indexer_type == "outer": # type: ignore[attr-defined] + indexer = OuterIndexer(indexer) + else: + indexer = BasicIndexer(indexer) if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, (BasicIndexer, OuterIndexer)): @@ -1131,10 +1163,10 @@ def _decompose_vectorized_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3]))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array).oindex[backend_indexer] + ... array = NumpyIndexingAdapter(array).oindex[backend_indexer.tuple] >>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # vectorized indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).vindex[np_indexer] + ... NumpyIndexingAdapter(array).vindex[np_indexer.tuple] array([ 2, 21, 8]) """ assert isinstance(indexer, VectorizedIndexer) @@ -1213,10 +1245,10 @@ def _decompose_outer_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = BasicIndexer((slice(0, 3), slice(2, 4))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array)[backend_indexer] + ... array = NumpyIndexingAdapter(array)[backend_indexer.tuple] >>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # outer indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).oindex[np_indexer] + ... NumpyIndexingAdapter(array).oindex[np_indexer.tuple] array([[ 2, 3, 2], [14, 15, 14], [ 8, 9, 8]]) @@ -1520,25 +1552,28 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_get(self, indexer: _IndexerKey): + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): array = NumpyVIndexAdapter(self.array) - return array[indexer.tuple] + return array[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) return array[key] - def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: + def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: @@ -1551,21 +1586,24 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) - self._safe_setitem(array, indexer.tuple, value) + self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey | ExplicitIndexer, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) self._safe_setitem(array, key, value) @@ -1594,30 +1632,28 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): # manual orthogonal indexing (implemented like DaskIndexingAdapter) - key = indexer.tuple + value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _IndexerKey): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - self.array[indexer.tuple] = value + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1635,38 +1671,35 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): - key = indexer.tuple + def _oindex_get(self, indexer: _IndexerKey): try: - return self.array[key] + return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: VectorizedIndexer): - return self.array.vindex[indexer.tuple] + def _vindex_get(self, indexer: _IndexerKey): + return self.array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _IndexerKey): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer.tuple) + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( "xarray can't set arrays with multiple " "array indices to dask yet." ) - self.array[indexer.tuple] = value + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: - self.array.vindex[indexer.tuple] = value + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + self.array.vindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): return self.array.transpose(order) @@ -1728,13 +1761,14 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: tuple[Any, ...]) -> tuple[Any, ...]: - if isinstance(key, tuple) and len(key) == 1: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if isinstance(_key, tuple) and len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - (key,) = key + (_key,) = _key - return key + return _key def _handle_result( self, result: Any @@ -1751,7 +1785,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1759,7 +1793,7 @@ def _oindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1770,7 +1804,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1778,7 +1812,7 @@ def _vindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1789,7 +1823,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: ExplicitIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1797,7 +1831,7 @@ def __getitem__( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1862,7 +1896,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1876,7 +1910,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1889,7 +1923,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: ExplicitIndexer): + def __getitem__(self, indexer: _IndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level @@ -1911,7 +1945,7 @@ def _get_array_subset(self) -> np.ndarray: if self.size > threshold: pos = threshold // 2 indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)]) - subset = self[OuterIndexer((indices,))] + subset = self[(indices,)] else: subset = self diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index b715973814f..243c2382472 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,7 @@ def dtype(self) -> _DType_co: ... _IndexKey = Union[int, slice, "ellipsis"] _IndexKeys = tuple[Union[_IndexKey], ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] +_IndexerKey = tuple[Any, ...] _AttrsLike = Union[Mapping[Any, Any], None] diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 23fd590f4dc..64a879369f8 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -226,16 +226,6 @@ def __getitem__(self, key): return key -class IndexerMaker: - def __init__(self, indexer_cls): - self._indexer_cls = indexer_cls - - def __getitem__(self, key): - if not isinstance(key, tuple): - key = (key,) - return self._indexer_cls(key) - - def source_ndarray(array): """Given an ndarray, return the base object which holds its memory, or the object itself. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d7471ecbaf9..eb5e2ef6cf0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5815,7 +5815,7 @@ def _getitem(self, key): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - la.vindex[indexer].get_duck_array() + la.vindex[indexer.tuple].get_duck_array() captured = capsys.readouterr() assert len(w) == 1 diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 51f63ea72dd..0feac5b15eb 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -7,9 +7,7 @@ from xarray import Variable from xarray.coding import strings -from xarray.core import indexing from xarray.tests import ( - IndexerMaker, assert_array_equal, assert_identical, requires_dask, @@ -150,10 +148,9 @@ def test_StackedBytesArray() -> None: assert len(actual) == len(expected) assert_array_equal(expected, actual) - B = IndexerMaker(indexing.BasicIndexer) - assert_array_equal(expected[:1], actual[B[:1]]) + assert_array_equal(expected[:1], actual[(slice(1),)]) with pytest.raises(IndexError): - actual[B[:, :2]] + actual[slice(None), slice(2)] def test_StackedBytesArray_scalar() -> None: @@ -168,10 +165,8 @@ def test_StackedBytesArray_scalar() -> None: with pytest.raises(TypeError): len(actual) np.testing.assert_array_equal(expected, actual) - - B = IndexerMaker(indexing.BasicIndexer) with pytest.raises(IndexError): - actual[B[:2]] + actual[(slice(2),)] def test_StackedBytesArray_vectorized_indexing() -> None: @@ -179,9 +174,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: stacked = strings.StackedBytesArray(array) expected = np.array([[b"abc", b"def"], [b"def", b"abc"]]) - V = IndexerMaker(indexing.VectorizedIndexer) - indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer] + actual = stacked.vindex[(np.array([[0, 1], [1, 0]]),)] assert_array_equal(actual, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 584776197e3..ecca8c0c79e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -244,7 +244,7 @@ def get_array(self): return self.array def __getitem__(self, key): - return self.array[key.tuple] + return self.array[(key if isinstance(key, tuple) else key.tuple)] class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore): @@ -5096,28 +5096,26 @@ def test_lazy_load(self) -> None: ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - def test_lazy_load_duck_array(self) -> None: + @pytest.mark.parametrize("decode_cf", [True, False]) + def test_lazy_load_duck_array(self, decode_cf) -> None: store = AccessibleAsDuckArrayDataStore() create_test_data().dump_to_store(store) - for decode_cf in [True, False]: - ds = open_dataset(store, decode_cf=decode_cf) - with pytest.raises(UnexpectedDataAccess): - ds["var1"].values + ds = open_dataset(store, decode_cf=decode_cf) + with pytest.raises(UnexpectedDataAccess): + ds["var1"].values - # these should not raise UnexpectedDataAccess: - ds.var1.data - ds.isel(time=10) - ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - repr(ds) + # these should not raise UnexpectedDataAccess: + ds.var1.data + ds.isel(time=10) + ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) + repr(ds) - # preserve the duck array type and don't cast to array - assert isinstance(ds["var1"].load().data, DuckArrayWrapper) - assert isinstance( - ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper - ) + # preserve the duck array type and don't cast to array + assert isinstance(ds["var1"].load().data, DuckArrayWrapper) + assert isinstance(ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper) - ds.close() + ds.close() def test_dropna(self) -> None: x = np.random.randn(4, 4) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index f019d3c789c..b5da4a75439 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -12,7 +12,6 @@ from xarray.core.indexes import PandasIndex, PandasMultiIndex from xarray.core.types import T_Xarray from xarray.tests import ( - IndexerMaker, ReturnItem, assert_array_equal, assert_identical, @@ -20,8 +19,6 @@ requires_dask, ) -B = IndexerMaker(indexing.BasicIndexer) - class TestIndexCallable: def test_getitem(self): @@ -433,7 +430,7 @@ def test_lazily_indexed_array_vindex_setitem(self) -> None: NotImplementedError, match=r"Lazy item assignment with the vectorized indexer is not yet", ): - lazy.vindex[indexer] = 0 + lazy.vindex[indexer.tuple] = 0 @pytest.mark.parametrize( "indexer_class, key, value", @@ -449,10 +446,10 @@ def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: if indexer_class is indexing.BasicIndexer: indexer = indexer_class(key) - lazy[indexer] = value + lazy[indexer.tuple] = value elif indexer_class is indexing.OuterIndexer: indexer = indexer_class(key) - lazy.oindex[indexer] = value + lazy.oindex[indexer.tuple] = value assert_array_equal(original[key], value) @@ -461,16 +458,16 @@ class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.CopyOnWriteArray) - child[B[:]] = 0 + child[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) @@ -478,7 +475,7 @@ def test_sub_array(self) -> None: def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" class TestMemoryCachedArray: @@ -491,7 +488,7 @@ def test_wrapper(self) -> None: def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) @@ -500,13 +497,13 @@ def test_sub_array(self) -> None: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" def test_base_explicit_indexer() -> None: @@ -615,7 +612,7 @@ def test_arrayize_vectorized_indexer(self) -> None: vindex, self.data.shape ) np.testing.assert_array_equal( - self.data.vindex[vindex], self.data.vindex[vindex_array] + self.data.vindex[vindex.tuple], self.data.vindex[vindex_array.tuple] ) actual = indexing._arrayize_vectorized_indexer( @@ -731,35 +728,35 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: # Dispatch to appropriate indexing method if indexer_mode.startswith("vectorized"): - expected = indexing_adapter.vindex[indexer] + expected = indexing_adapter.vindex[indexer.tuple] elif indexer_mode.startswith("outer"): - expected = indexing_adapter.oindex[indexer] + expected = indexing_adapter.oindex[indexer.tuple] else: - expected = indexing_adapter[indexer] # Basic indexing + expected = indexing_adapter[indexer.tuple] # Basic indexing if isinstance(backend_ind, indexing.VectorizedIndexer): - array = indexing_adapter.vindex[backend_ind] + array = indexing_adapter.vindex[backend_ind.tuple] elif isinstance(backend_ind, indexing.OuterIndexer): - array = indexing_adapter.oindex[backend_ind] + array = indexing_adapter.oindex[backend_ind.tuple] else: - array = indexing_adapter[backend_ind] + array = indexing_adapter[backend_ind.tuple] if len(np_ind.tuple) > 0: array_indexing_adapter = indexing.NumpyIndexingAdapter(array) if isinstance(np_ind, indexing.VectorizedIndexer): - array = array_indexing_adapter.vindex[np_ind] + array = array_indexing_adapter.vindex[np_ind.tuple] elif isinstance(np_ind, indexing.OuterIndexer): - array = array_indexing_adapter.oindex[np_ind] + array = array_indexing_adapter.oindex[np_ind.tuple] else: - array = array_indexing_adapter[np_ind] + array = array_indexing_adapter[np_ind.tuple] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) assert isinstance(combined_ind, indexing.VectorizedIndexer) - array = indexing_adapter.vindex[combined_ind] + array = indexing_adapter.vindex[combined_ind.tuple] np.testing.assert_array_equal(expected, array) From 18c5c70c7c08414695f1f3abda86264f15fb88a5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 May 2024 13:21:14 -0600 Subject: [PATCH 04/32] Trigger CI only if code files are modified. (#9006) * Trigger CI only if code files are modified. Fixes #8705 * Apply suggestions from code review Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --------- Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- .github/workflows/ci-additional.yaml | 6 ++++++ .github/workflows/ci.yaml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index bc2eb8d2cac..49a9272e4f0 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -14,6 +14,12 @@ on: - 'properties/**' - 'xarray/**' - "backend-indexing" + paths: + - 'ci/**' + - '.github/**' + - '/*' # covers files such as `pyproject.toml` + - 'properties/**' + - 'xarray/**' workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ca9ef397962..a4b165db06c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,6 +14,12 @@ on: - 'properties/**' - 'xarray/**' - "backend-indexing" + paths: + - 'ci/**' + - '.github/**' + - '/*' # covers files such as `pyproject.toml` + - 'properties/**' + - 'xarray/**' workflow_dispatch: # allows you to trigger manually concurrency: From 795daf2db5ad0b0a2ebbad4cc694e15417a124db Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sat, 11 May 2024 18:24:10 -0700 Subject: [PATCH 05/32] fix bad merge --- .github/workflows/ci-additional.yaml | 7 +------ .github/workflows/ci.yaml | 6 ------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 49a9272e4f0..d9ab8a9fc3e 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -7,12 +7,6 @@ on: pull_request: branches: - "main" - paths: - - 'ci/**' - - '.github/**' - - '/*' # covers files such as `pyproject.toml` - - 'properties/**' - - 'xarray/**' - "backend-indexing" paths: - 'ci/**' @@ -20,6 +14,7 @@ on: - '/*' # covers files such as `pyproject.toml` - 'properties/**' - 'xarray/**' + workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a4b165db06c..4263c313cbc 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -7,12 +7,6 @@ on: pull_request: branches: - "main" - paths: - - 'ci/**' - - '.github/**' - - '/*' # covers files such as `pyproject.toml` - - 'properties/**' - - 'xarray/**' - "backend-indexing" paths: - 'ci/**' From f2c4659c179aaca2819dd13d86c005e8914d9cf3 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Sat, 11 May 2024 21:34:26 -0400 Subject: [PATCH 06/32] Micro optimization -- use tuples throughout backend indexing (#9009) Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Co-authored-by: Anderson Banihirwe --- xarray/core/indexing.py | 185 +++++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 89 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 2b8cd202e4e..18f2489a505 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -36,7 +36,7 @@ from xarray.core.indexes import Index from xarray.core.variable import Variable - from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -218,22 +218,22 @@ def expanded_indexer(key, ndim): if not isinstance(key, tuple): # numpy treats non-tuple keys equivalent to tuples of length 1 key = (key,) - new_key = [] + new_key = () # handling Ellipsis right is a little tricky, see: # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing found_ellipsis = False for k in key: if k is Ellipsis: if not found_ellipsis: - new_key.extend((ndim + 1 - len(key)) * [slice(None)]) + new_key += (slice(None),) * (ndim + 1 - len(key)) found_ellipsis = True else: - new_key.append(slice(None)) + new_key += (slice(None),) else: - new_key.append(k) + new_key += (k,) if len(new_key) > ndim: raise IndexError("too many indices") - new_key.extend((ndim - len(new_key)) * [slice(None)]) + new_key += (slice(None),) * (ndim - len(new_key)) return tuple(new_key) @@ -388,7 +388,7 @@ def __init__(self, key: tuple[int | np.integer | slice, ...]): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[int | np.integer | slice, ...] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -398,9 +398,9 @@ def __init__(self, key: tuple[int | np.integer | slice, ...]): raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class OuterIndexer(ExplicitIndexer): @@ -423,7 +423,9 @@ def __init__( if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + ] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -444,9 +446,9 @@ def __init__( raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class VectorizedIndexer(ExplicitIndexer): @@ -465,7 +467,7 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () ndim = None for k in key: if isinstance(k, slice): @@ -494,9 +496,9 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class ExplicitlyIndexed: @@ -599,7 +601,7 @@ def __getitem__(self, key: _IndexerKey | slice): class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" - __slots__ = ("array", "key") + __slots__ = ("array", "key", "_shape") def __init__(self, array: Any, key: ExplicitIndexer | None = None): """ @@ -622,6 +624,14 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): self.array = as_indexable(array) self.key = key + shape: _Shape = () + for size, k in zip(self.array.shape, self.key.tuple): + if isinstance(k, slice): + shape += (len(range(*k.indices(size))),) + elif isinstance(k, np.ndarray): + shape += (k.size,) + self._shape = shape + def _updated_key( self, new_key: ExplicitIndexer | _IndexerKey ) -> BasicIndexer | OuterIndexer: @@ -629,27 +639,20 @@ def _updated_key( new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key ) iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) - full_key = [] + full_key: tuple[int | np.integer, ...] = () for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): - full_key.append(k) + full_key += (k,) else: - full_key.append(_index_indexer_1d(k, next(iter_new_key), size)) - full_key_tuple = tuple(full_key) + full_key += (_index_indexer_1d(k, next(iter_new_key), size),) - if all(isinstance(k, integer_types + (slice,)) for k in full_key_tuple): - return BasicIndexer(full_key_tuple) - return OuterIndexer(full_key_tuple) + if all(isinstance(k, integer_types + (slice,)) for k in full_key): + return BasicIndexer(full_key) + return OuterIndexer(full_key) @property def shape(self) -> _Shape: - shape = [] - for size, k in zip(self.array.shape, self.key.tuple): - if isinstance(k, slice): - shape.append(len(range(*k.indices(size)))) - elif isinstance(k, np.ndarray): - shape.append(k.size) - return tuple(shape) + return self._shape def get_duck_array(self): try: @@ -924,18 +927,18 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for k, size in zip(key, shape): if isinstance(k, integer_types): - new_key.append(np.array(k).reshape((1,) * n_dim)) + new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice if isinstance(k, slice): k = np.arange(*k.indices(size)) assert k.dtype.kind in {"i", "u"} new_shape = [(1,) * i_dim + (k.size,) + (1,) * (n_dim - i_dim - 1)] - new_key.append(k.reshape(*new_shape)) + new_key += (k.reshape(*new_shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) def _outer_to_numpy_indexer(indexer: BasicIndexer | OuterIndexer, shape: _Shape): @@ -1174,8 +1177,10 @@ def _decompose_vectorized_indexer( if indexing_support is IndexingSupport.VECTORIZED: return indexer, BasicIndexer(()) - backend_indexer_elems = [] - np_indexer_elems = [] + backend_indexer_elems: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + ] = () + np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () # convert negative indices indexer_elems = [ np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k @@ -1188,17 +1193,17 @@ def _decompose_vectorized_indexer( # (but make its step positive) in the backend, # and then use all of it (slice(None)) for the in-memory portion. bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer_elems.append(bk_slice) - np_indexer_elems.append(np_slice) + backend_indexer_elems += (bk_slice,) + np_indexer_elems += (np_slice,) else: # If it is a (multidimensional) np.ndarray, just pickup the used # keys without duplication and store them as a 1d-np.ndarray. oind, vind = np.unique(k, return_inverse=True) - backend_indexer_elems.append(oind) - np_indexer_elems.append(vind.reshape(*k.shape)) + backend_indexer_elems += (oind,) + np_indexer_elems += (vind.reshape(*k.shape),) - backend_indexer = OuterIndexer(tuple(backend_indexer_elems)) - np_indexer = VectorizedIndexer(tuple(np_indexer_elems)) + backend_indexer = OuterIndexer(backend_indexer_elems) + np_indexer = VectorizedIndexer(np_indexer_elems) if indexing_support is IndexingSupport.OUTER: return backend_indexer, np_indexer @@ -1253,8 +1258,8 @@ def _decompose_outer_indexer( [14, 15, 14], [ 8, 9, 8]]) """ - backend_indexer: list[Any] = [] - np_indexer: list[Any] = [] + backend_indexer: tuple[Any, ...] = () + np_indexer: tuple[Any, ...] = () assert isinstance(indexer, (OuterIndexer, BasicIndexer)) @@ -1264,23 +1269,23 @@ def _decompose_outer_indexer( # If it is a slice, then we will slice it as-is # (but make its step positive) in the backend, bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) else: - backend_indexer.append(k) + backend_indexer += (k,) if not is_scalar(k): - np_indexer.append(slice(None)) - return type(indexer)(tuple(backend_indexer)), BasicIndexer(tuple(np_indexer)) + np_indexer += (slice(None),) + return type(indexer)(backend_indexer), BasicIndexer(np_indexer) # make indexer positive - pos_indexer: list[np.ndarray | int | np.number] = [] + pos_indexer: tuple[np.ndarray | int | np.number, ...] = () for k, s in zip(indexer.tuple, shape): if isinstance(k, np.ndarray): - pos_indexer.append(np.where(k < 0, k + s, k)) + pos_indexer += (np.where(k < 0, k + s, k),) elif isinstance(k, integer_types) and k < 0: - pos_indexer.append(k + s) + pos_indexer += (k + s,) else: - pos_indexer.append(k) + pos_indexer += (k,) indexer_elems = pos_indexer if indexing_support is IndexingSupport.OUTER_1VECTOR: @@ -1300,41 +1305,41 @@ def _decompose_outer_indexer( if isinstance(k, np.ndarray) and i != array_index: # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, np.ndarray): # Remove duplicates and sort them in the increasing order pkey, ekey = np.unique(k, return_inverse=True) - backend_indexer.append(pkey) - np_indexer.append(ekey) + backend_indexer += (pkey,) + np_indexer += (ekey,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) if indexing_support == IndexingSupport.OUTER: for k, s in zip(indexer_elems, shape): if isinstance(k, slice): # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) elif isinstance(k, np.ndarray) and (np.diff(k) >= 0).all(): - backend_indexer.append(k) - np_indexer.append(slice(None)) + backend_indexer += (k,) + np_indexer += (slice(None),) else: # Remove duplicates and sort them in the increasing order oind, vind = np.unique(k, return_inverse=True) - backend_indexer.append(oind) - np_indexer.append(vind.reshape(*k.shape)) + backend_indexer += (oind,) + np_indexer += (vind.reshape(*k.shape),) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) # basic indexer assert indexing_support == IndexingSupport.BASIC @@ -1343,16 +1348,16 @@ def _decompose_outer_indexer( if isinstance(k, np.ndarray): # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (BasicIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return BasicIndexer(backend_indexer), OuterIndexer(np_indexer) def _arrayize_vectorized_indexer( @@ -1366,15 +1371,15 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for v, size in zip(indexer.tuple, shape): if isinstance(v, np.ndarray): - new_key.append(np.reshape(v, v.shape + (1,) * len(slices))) + new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) else: # slice shape = (1,) * (n_dim + i_dim) + (-1,) + (1,) * (len(slices) - i_dim - 1) - new_key.append(np.arange(*v.indices(size)).reshape(shape)) + new_key += (np.arange(*v.indices(size)).reshape(shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) def _chunked_array_with_chunks_hint( @@ -1384,10 +1389,12 @@ def _chunked_array_with_chunks_hint( if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") - new_chunks = [] - for chunk, size in zip(chunks, array.shape): - new_chunks.append(chunk if size > 1 else (1,)) - return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] + + new_chunks: _Chunks = tuple( + chunk if size > 1 else 1 for chunk, size in zip(chunks, array.shape) + ) + + return chunkmanager.from_array(array, new_chunks) def _logical_any(args): @@ -1398,22 +1405,22 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): key = (k for k in key if not isinstance(k, slice)) chunks_hint = getattr(data, "chunks", None) - new_keys = [] + new_keys: tuple[Any, ...] = () for k in key: if isinstance(k, np.ndarray): if is_chunked_array(data): # type: ignore[arg-type] chunkmanager = get_chunked_array_type(data) - new_keys.append( - _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager) + new_keys += ( + _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager), ) elif isinstance(data, array_type("sparse")): import sparse - new_keys.append(sparse.COO.from_numpy(k)) + new_keys += (sparse.COO.from_numpy(k),) else: - new_keys.append(k) + new_keys += (k,) else: - new_keys.append(k) + new_keys += (k,) mask = _logical_any(k == -1 for k in new_keys) return mask From 8b591e0073efa86ef8f6b5b9deb7b2886a681c20 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 30 Oct 2024 19:41:21 +0000 Subject: [PATCH 07/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/zarr.py | 1 - xarray/core/indexing.py | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aeedb1d0369..60fa51456d1 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -158,7 +158,6 @@ def raw_indexing_method(key): ) def _vindex_get(self, key: indexing.VectorizedIndexer): - def raw_indexing_method(key): return self._array.vindex[key] diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 1a968debfd7..b9ed9c2a45f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Callable, Literal, overload +from typing import TYPE_CHECKING, Any, Literal, overload import numpy as np import pandas as pd @@ -38,7 +38,7 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -771,7 +771,6 @@ def _vindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) def __getitem__(self, indexer: _IndexerKey): - # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer): key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) @@ -1566,7 +1565,6 @@ def _vindex_get(self, indexer: _IndexerKey): return array[indexer] def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): - array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see From 8cc0d2916bb413673199046b4847464cf71fd389 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 30 Oct 2024 15:03:07 -0700 Subject: [PATCH 08/32] enhance type annotations and improve clarity --- xarray/core/indexing.py | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b9ed9c2a45f..74e9a9689ab 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -38,7 +38,7 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -640,9 +640,14 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): shape += (k.size,) self._shape = shape - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) - full_key = [] + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) + full_key: tuple[int | np.integer, ...] = () for size, k in zip(self.array.shape, self.key.tuple, strict=True): if isinstance(k, integer_types): full_key += (k,) @@ -855,6 +860,9 @@ def __init__(self, array): def _ensure_cached(self): self.array = as_indexable(self.array.get_duck_array()) + def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: + return np.asarray(self.get_duck_array(), dtype=dtype) + def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() @@ -895,10 +903,10 @@ def as_indexable(array): return PandasIndexingAdapter(array) if is_duck_dask_array(array): return DaskIndexingAdapter(array) - if hasattr(array, "__array_namespace__"): - return ArrayApiIndexingAdapter(array) if hasattr(array, "__array_function__"): return NdArrayLikeIndexingAdapter(array) + if hasattr(array, "__array_namespace__"): + return ArrayApiIndexingAdapter(array) raise TypeError(f"Invalid array type: {type(array)}") @@ -926,7 +934,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) @@ -1277,7 +1285,7 @@ def _decompose_outer_indexer( return type(indexer)(backend_indexer), BasicIndexer(np_indexer) # make indexer positive - pos_indexer: list[np.ndarray | int | np.number] = [] + pos_indexer: tuple[np.ndarray | int | np.number, ...] = () for k, s in zip(indexer.tuple, shape, strict=False): if isinstance(k, np.ndarray): pos_indexer += (np.where(k < 0, k + s, k),) @@ -1370,7 +1378,7 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for v, size in zip(indexer.tuple, shape, strict=True): if isinstance(v, np.ndarray): new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) @@ -1388,9 +1396,12 @@ def _chunked_array_with_chunks_hint( if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") - new_chunks = [] - for chunk, size in zip(chunks, array.shape, strict=False): - new_chunks.append(chunk if size > 1 else (1,)) + + new_chunks: _Chunks = tuple( + chunk if size > 1 else 1 + for chunk, size in zip(chunks, array.shape, strict=False) + ) + return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] @@ -1773,8 +1784,9 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: Any | tuple[Any, ...]) -> tuple[Any, ...]: - if isinstance(key, tuple) and len(key) == 1: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if isinstance(_key, tuple) and len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) (_key,) = _key From 58846055a3d735688ebd2b3aae823590d461955e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 30 Oct 2024 16:55:54 -0700 Subject: [PATCH 09/32] Fix indexing logic to correctly handle array with __array_function__ attribute --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 74e9a9689ab..e59bea42daf 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -903,10 +903,10 @@ def as_indexable(array): return PandasIndexingAdapter(array) if is_duck_dask_array(array): return DaskIndexingAdapter(array) - if hasattr(array, "__array_function__"): - return NdArrayLikeIndexingAdapter(array) if hasattr(array, "__array_namespace__"): return ArrayApiIndexingAdapter(array) + if hasattr(array, "__array_function__"): + return NdArrayLikeIndexingAdapter(array) raise TypeError(f"Invalid array type: {type(array)}") From 50791e018d29890a9c23292060f155db6f9b83af Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 18:49:46 -0700 Subject: [PATCH 10/32] update indexing methods to use OuterIndexer type --- xarray/coding/strings.py | 2 +- xarray/core/indexing.py | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 6d8dc03d8de..140f31a17e2 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -252,7 +252,7 @@ def __repr__(self): def _vindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key: _IndexerKey): + def _oindex_get(self, key: indexing.OuterIndexer): return _numpy_char_to_bytes(self.array.oindex[key]) def __getitem__(self, key: _IndexerKey): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e59bea42daf..86e747aedc4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -577,7 +577,7 @@ def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: if Version(np.__version__) >= Version("2.0.0"): - return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) # type: ignore[call-overload] else: return np.asarray(self.get_duck_array(), dtype=dtype) @@ -688,7 +688,7 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(self.array, self._updated_key(indexer)) def _vindex_get(self, indexer: _IndexerKey): @@ -769,7 +769,7 @@ def get_duck_array(self): def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) def _vindex_get(self, indexer: _IndexerKey): @@ -819,7 +819,7 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) def _vindex_get(self, indexer: _IndexerKey): @@ -860,14 +860,16 @@ def __init__(self, array): def _ensure_cached(self): self.array = as_indexable(self.array.get_duck_array()) - def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: - return np.asarray(self.get_duck_array(), dtype=dtype) + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) def _vindex_get(self, indexer: _IndexerKey): @@ -1402,7 +1404,7 @@ def _chunked_array_with_chunks_hint( for chunk, size in zip(chunks, array.shape, strict=False) ) - return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] + return chunkmanager.from_array(array, new_chunks) def _logical_any(args): @@ -1567,7 +1569,7 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] @@ -1646,7 +1648,7 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array @@ -1685,7 +1687,7 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): try: return self.array[indexer] except NotImplementedError: @@ -1786,7 +1788,7 @@ def _convert_scalar(self, item): def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: _key = key.tuple if isinstance(key, ExplicitIndexer) else key - if isinstance(_key, tuple) and len(_key) == 1: + if len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) (_key,) = _key @@ -1808,7 +1810,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: _IndexerKey + self, indexer: OuterIndexer ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1924,7 +1926,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: _IndexerKey + self, indexer: OuterIndexer ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter From 014e7cf9d6871848cc1345cb2e7fa5814426874f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 18:56:56 -0700 Subject: [PATCH 11/32] remove unnecessary copy argument from __array__ method in MemoryCachedArray --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 86e747aedc4..1447f00df05 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -863,7 +863,7 @@ def _ensure_cached(self): def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: - return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) + return np.asarray(self.get_duck_array(), dtype=dtype) def get_duck_array(self): self._ensure_cached() From 5e22be6e5baf77f0c77fc7203a046ef4ac3b1616 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:05:00 -0700 Subject: [PATCH 12/32] another attempt at fixing types --- xarray/core/indexing.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 1447f00df05..8fe172f6d25 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -577,7 +577,7 @@ def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: if Version(np.__version__) >= Version("2.0.0"): - return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) # type: ignore[call-overload] + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) else: return np.asarray(self.get_duck_array(), dtype=dtype) @@ -1652,7 +1652,8 @@ def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array - for axis, subkey in reversed(list(enumerate(indexer))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value @@ -1693,7 +1694,8 @@ def _oindex_get(self, indexer: OuterIndexer): except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(indexer))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore value = value[(slice(None),) * axis + (subkey,)] return value From 7056aba3d8c3ea1a8c05c9554c745b2aeea1106a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:06:51 -0700 Subject: [PATCH 13/32] remove backend-indexing branch from CI workflows --- .github/workflows/ci-additional.yaml | 2 -- .github/workflows/ci.yaml | 2 -- 2 files changed, 4 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 251a11f7e99..aeac92250b6 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -3,11 +3,9 @@ on: push: branches: - "main" - - "backend-indexing" pull_request: branches: - "main" - - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 276629791f0..e0f9489e325 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,11 +3,9 @@ on: push: branches: - "main" - - "backend-indexing" pull_request: branches: - "main" - - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: From 43046e88d24b637275c7da7041d7ca0922c30b3a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:09:14 -0700 Subject: [PATCH 14/32] remove unnecessary type ignore comments --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8fe172f6d25..5b0ea92fd84 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1653,7 +1653,7 @@ def _oindex_get(self, indexer: OuterIndexer): value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value @@ -1695,7 +1695,7 @@ def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value From 2a7e2f2e7bbecfe9d62531c19459082f03ed6759 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:16:57 -0700 Subject: [PATCH 15/32] fix: update indexing to use tuple from indexer for improved compatibility --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 5b0ea92fd84..8a33ae35709 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1653,7 +1653,7 @@ def _oindex_get(self, indexer: OuterIndexer): value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): + for axis, subkey in reversed(list(enumerate(indexer.tuple))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value @@ -1695,7 +1695,7 @@ def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): + for axis, subkey in reversed(list(enumerate(indexer.tuple))): value = value[(slice(None),) * axis + (subkey,)] return value From ead425196ac3177207081102d0c059d6d5cc21f2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:35:29 -0700 Subject: [PATCH 16/32] more type hints --- xarray/core/indexing.py | 102 ++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8a33ae35709..b010cb6ce66 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -209,7 +209,7 @@ def map_index_queries( return merged -def expanded_indexer(key, ndim): +def expanded_indexer(key: Any, ndim: int) -> tuple[Any, ...]: """Given a key for indexing an ndarray, return an equivalent key which is a tuple with length equal to the number of dimensions. @@ -298,7 +298,7 @@ def slice_slice(old_slice: slice, applied_slice: slice, size: int) -> slice: return slice(start, stop, step) -def _index_indexer_1d(old_indexer, applied_indexer, size: int): +def _index_indexer_1d(old_indexer: Any, applied_indexer: Any, size: int) -> Any: if isinstance(applied_indexer, slice) and applied_indexer == slice(None): # shortcut for the usual case return old_indexer @@ -525,15 +525,15 @@ def get_duck_array(self): class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () - def get_duck_array(self): + def get_duck_array(self) -> Any: return self[(slice(None),) * self.ndim] - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) @@ -581,10 +581,10 @@ def __array__( else: return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def __getitem__(self, key: _IndexerKey | slice): + def __getitem__(self, key: _IndexerKey | slice) -> Any: _key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(_key) @@ -662,7 +662,7 @@ def _updated_key( def shape(self) -> _Shape: return self._shape - def get_duck_array(self): + def get_duck_array(self) -> Any: try: array = apply_indexer(self.array, self.key) except NotImplementedError as _: @@ -685,17 +685,17 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def transpose(self, order): + def transpose(self, order) -> Any: return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) def _vindex_set(self, key: _IndexerKey, value: Any) -> None: @@ -743,7 +743,7 @@ def __init__(self, array: duckarray[Any, Any], key: ExplicitIndexer): def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape - def get_duck_array(self): + def get_duck_array(self) -> Any: try: array = apply_indexer(self.array, self.key) except NotImplementedError as _: @@ -766,23 +766,23 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def _updated_key(self, new_key: ExplicitIndexer): + def _updated_key(self, new_key: ExplicitIndexer) -> VectorizedIndexer: return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> Any: # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer): key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) - def transpose(self, order): + def transpose(self, order) -> LazilyVectorizedIndexedArray: key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) @@ -811,24 +811,24 @@ def __init__(self, array: duckarray[Any, Any]): self.array = as_indexable(array) self._copied = False - def _ensure_copied(self): + def _ensure_copied(self) -> None: if not self._copied: self.array = as_indexable(np.array(self.array)) self._copied = True - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -844,7 +844,7 @@ def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value - def __deepcopy__(self, memo): + def __deepcopy__(self, memo) -> CopyOnWriteArray: # CopyOnWriteArray is used to wrap backend array objects, which might # point to files on disk, so we can't rely on the default deepcopy # implementation. @@ -857,7 +857,7 @@ class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin): def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) - def _ensure_cached(self): + def _ensure_cached(self) -> None: self.array = as_indexable(self.array.get_duck_array()) def __array__( @@ -865,20 +865,20 @@ def __array__( ) -> np.ndarray: return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): + def get_duck_array(self) -> Any: self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -891,7 +891,7 @@ def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value -def as_indexable(array): +def as_indexable(array: Any): """ This function always returns a ExplicitlyIndexed subclass, so that the vectorized indexing is always possible with the returned @@ -950,7 +950,9 @@ def _outer_to_vectorized_indexer( return VectorizedIndexer(new_key) -def _outer_to_numpy_indexer(indexer: BasicIndexer | OuterIndexer, shape: _Shape): +def _outer_to_numpy_indexer( + indexer: BasicIndexer | OuterIndexer, shape: _Shape +) -> tuple[Any, ...]: """Convert an OuterIndexer into an indexer for NumPy. Parameters @@ -1073,7 +1075,7 @@ def __repr__(self): return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" -def apply_indexer(indexable, indexer: ExplicitIndexer): +def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] @@ -1411,7 +1413,7 @@ def _logical_any(args): return functools.reduce(operator.or_, args) -def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): +def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None) -> Any: key = (k for k in key if not isinstance(k, slice)) chunks_hint = getattr(data, "chunks", None) @@ -1438,7 +1440,7 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): def create_mask( indexer: ExplicitIndexer, shape: _Shape, data: duckarray[Any, Any] | None = None -): +) -> duckarray[bool, Any]: """Create a mask for indexing with a fill-value. Parameters @@ -1566,18 +1568,18 @@ def __init__(self, array): ) self.array = array - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: array = NumpyVIndexAdapter(self.array) return array[indexer] - def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): + def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer) -> Any: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see @@ -1648,19 +1650,19 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer.tuple))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> Any: return self.array[indexer] def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -1688,21 +1690,21 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: try: return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer.tuple))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: return self.array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> Any: return self.array[indexer] def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -1719,7 +1721,7 @@ def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) @@ -1768,7 +1770,7 @@ def get_duck_array(self) -> np.ndarray: def shape(self) -> _Shape: return (len(self.array),) - def _convert_scalar(self, item): + def _convert_scalar(self, item) -> Any: if item is pd.NaT: # work around the impossibility of casting NaT with asarray # note: it probably would be better in general to return From 76b2d5abadb5228488d702743f6baa6215da3f49 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 4 Nov 2024 16:44:37 -0800 Subject: [PATCH 17/32] update type hints for `expanded_indexer()` function --- xarray/core/indexing.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b010cb6ce66..92ab48f4b95 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -38,7 +38,14 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import ( + _Chunks, + _IndexerKey, + _IndexKey, + _IndexKeys, + _Shape, + duckarray, + ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -209,7 +216,7 @@ def map_index_queries( return merged -def expanded_indexer(key: Any, ndim: int) -> tuple[Any, ...]: +def expanded_indexer(key: _IndexerKey | _IndexKeys, ndim: int) -> _IndexKeys: """Given a key for indexing an ndarray, return an equivalent key which is a tuple with length equal to the number of dimensions. @@ -220,22 +227,22 @@ def expanded_indexer(key: Any, ndim: int) -> tuple[Any, ...]: if not isinstance(key, tuple): # numpy treats non-tuple keys equivalent to tuples of length 1 key = (key,) - new_key = () + new_key: list[_IndexKey] = [] # handling Ellipsis right is a little tricky, see: # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing found_ellipsis = False for k in key: if k is Ellipsis: if not found_ellipsis: - new_key += (slice(None),) * (ndim + 1 - len(key)) + new_key.extend([slice(None)] * (ndim + 1 - len(key))) found_ellipsis = True else: - new_key += (slice(None),) + new_key.append(slice(None)) else: - new_key += (k,) + new_key.append(k) if len(new_key) > ndim: raise IndexError("too many indices") - new_key += (slice(None),) * (ndim - len(new_key)) + new_key.extend([slice(None)] * (ndim - len(new_key))) return tuple(new_key) From ee81af3502dcf692d2972d4dcc8e0008fb22e302 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 Nov 2024 20:56:13 +0000 Subject: [PATCH 18/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 140f31a17e2..1b5eaeb36ae 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -240,7 +240,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype(f"S{str(self.array.shape[-1])}") + return np.dtype(f"S{self.array.shape[-1]!s}") @property def shape(self) -> tuple[int, ...]: From 0c86622b7549c79034b76c4378fdeaa16fc3aa49 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 10 Nov 2024 20:42:00 -0700 Subject: [PATCH 19/32] Use tuples for indexing --- xarray/backends/common.py | 9 ++- xarray/backends/h5netcdf_.py | 25 +++--- xarray/backends/netCDF4_.py | 25 +++--- xarray/backends/pydap_.py | 27 ++++--- xarray/backends/scipy_.py | 27 ++++--- xarray/backends/zarr.py | 34 ++++----- xarray/coding/strings.py | 4 +- xarray/core/indexing.py | 143 ++++++++++++++++++++++++++++++++--- xarray/namedarray/_typing.py | 3 + 9 files changed, 222 insertions(+), 75 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 6113298c8f5..c2db815fc9e 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -22,6 +22,7 @@ from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence + from xarray.namedarray._typing import _IndexerKey # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) @@ -219,18 +220,18 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): - __slots__ = () + __slots__ = ("indexing_support",) def get_duck_array(self, dtype: np.typing.DTypeLike = None): - key = indexing.BasicIndexer((slice(None),) * self.ndim) + key = (slice(None),) * self.ndim return self[key] # type: ignore [index] - def _oindex_get(self, key: indexing.OuterIndexer): + def _oindex_get(self, key: _IndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, key: indexing.VectorizedIndexer): + def _vindex_get(self, key: _IndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index b1756330d69..89360a6ebe6 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -44,26 +44,33 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) class H5NetCDFArrayWrapper(BaseNetCDF4Array): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] - def _oindex_get(self, key: indexing.OuterIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def _vindex_get(self, key: indexing.VectorizedIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def __getitem__(self, key: indexing.BasicIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index c097738b11d..8a96317453c 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -49,6 +49,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -89,7 +94,7 @@ def get_array(self, needs_lock=True): class NetCDF4ArrayWrapper(BaseNetCDF4Array): - __slots__ = () + indexing_support = indexing.IndexingSupport.OUTER def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) @@ -100,19 +105,19 @@ def get_array(self, needs_lock=True): variable.set_auto_chartostring(False) return variable - def _oindex_get(self, key: indexing.OuterIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def _oindex_get(self, key: _OuterIndexerKey): + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def _vindex_get(self, key: indexing.VectorizedIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey): + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def __getitem__(self, key: indexing.BasicIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def __getitem__(self, key: _BasicIndexerKey): + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 2ce3a579b2d..44b22e07036 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -29,10 +29,17 @@ from io import BufferedIOBase from xarray.core.dataset import Dataset + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) class PydapArrayWrapper(BackendArray): - def __init__(self, array): + indexing_support = indexing.IndexingSupport.BASIC + + def __init__(self, array) -> None: self.array = array @property @@ -43,19 +50,19 @@ def shape(self) -> tuple[int, ...]: def dtype(self): return self.array.dtype - def _oindex_get(self, key: indexing.OuterIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def _vindex_get(self, key: indexing.VectorizedIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def __getitem__(self, key: indexing.BasicIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index c9991b15a13..e25ff829d1f 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -38,6 +38,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -56,6 +61,8 @@ def _decode_attrs(d): class ScipyArrayWrapper(BackendArray): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name @@ -85,25 +92,25 @@ def _getitem(self, key): data = self.get_variable(needs_lock=False).data return data[key] - def _vindex_get(self, key: indexing.VectorizedIndexer): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + data = indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) return self._finalize_result(data) - def _oindex_get(self, key: indexing.OuterIndexer): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + data = indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) return self._finalize_result(data) - def __getitem__(self, key): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def __getitem__(self, key: _BasicIndexerKey) -> Any: + data = indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) return self._finalize_result(data) - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: with self.datastore.lock: data = self.get_variable(needs_lock=False) try: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aef8ffd0b63..dc91ee57629 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -43,6 +43,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) def _get_mappers(*, storage_options, store, chunk_store): @@ -182,7 +187,7 @@ def encode_zarr_attr_value(value): class ZarrArrayWrapper(BackendArray): - __slots__ = ("_array", "dtype", "shape") + indexing_support = indexing.IndexingSupport.VECTORIZED def __init__(self, zarr_array): # some callers attempt to evaluate an array if an `array` property exists on the object. @@ -205,37 +210,28 @@ def __init__(self, zarr_array): def get_array(self): return self._array - def _oindex_get(self, key: indexing.OuterIndexer): + def _oindex_get(self, key: _OuterIndexerKey) -> Any: def raw_indexing_method(key): return self._array.oindex[key] - return indexing.explicit_indexing_adapter( - key, - self._array.shape, - indexing.IndexingSupport.VECTORIZED, - raw_indexing_method, + return indexing.outer_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) - def _vindex_get(self, key: indexing.VectorizedIndexer): + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: def raw_indexing_method(key): return self._array.vindex[key] - return indexing.explicit_indexing_adapter( - key, - self._array.shape, - indexing.IndexingSupport.VECTORIZED, - raw_indexing_method, + return indexing.vectorized_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) - def __getitem__(self, key: indexing.BasicIndexer): + def __getitem__(self, key: _BasicIndexerKey) -> Any: def raw_indexing_method(key): return self._array[key] - return indexing.explicit_indexing_adapter( - key, - self._array.shape, - indexing.IndexingSupport.VECTORIZED, - raw_indexing_method, + return indexing.basic_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) # if self.ndim == 0: diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 1b5eaeb36ae..fd2e396a17b 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -256,10 +256,8 @@ def _oindex_get(self, key: indexing.OuterIndexer): return _numpy_char_to_bytes(self.array.oindex[key]) def __getitem__(self, key: _IndexerKey): - from xarray.core.indexing import BasicIndexer - # require slicing the last dimension completely indexer = indexing.expanded_indexer(key, self.array.ndim) if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[BasicIndexer(indexer)]) + return _numpy_char_to_bytes(self.array[indexer]) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index f98774581b1..716ad9670de 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1023,6 +1023,124 @@ class IndexingSupport(enum.Enum): VECTORIZED = 3 +def _finish_indexing( + raw_indexing_method: Callable[..., Any], + *, + raw_key, + numpy_indices, +) -> Any: + result = raw_indexing_method(raw_key.tuple) + if numpy_indices.tuple: + # index the loaded np.ndarray + result = apply_indexer(NumpyIndexingAdapter(result), numpy_indices) + return result + + +def basic_indexing_adapter( + key: _IndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : ExplicitIndexer + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + BasicIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def outer_indexing_adapter( + key: _IndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : ExplicitIndexer + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + OuterIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def vectorized_indexing_adapter( + key: _IndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : ExplicitIndexer + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_vectorized_indexer( + VectorizedIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + def explicit_indexing_adapter( key: ExplicitIndexer, shape: _Shape, @@ -1050,13 +1168,13 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ - raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) - result = raw_indexing_method(raw_key.tuple) - if numpy_indices.tuple: - # index the loaded np.ndarray - indexable = NumpyIndexingAdapter(result) - result = apply_indexer(indexable, numpy_indices) - return result + if isinstance(key, VectorizedIndexer): + return vectorized_indexing_adapter(key.tuple, shape, indexing_support) + elif isinstance(key, OuterIndexer): + return outer_indexing_adapter(key.tuple, shape, indexing_support) + elif isinstance(key, BasicIndexer): + return basic_indexing_adapter(key.tuple, shape, indexing_support) + raise TypeError(f"unexpected key type: {key}") class CompatIndexedTuple(tuple): @@ -1085,11 +1203,16 @@ def __repr__(self): def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] + return indexable.vindex[indexer.tuple] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[CompatIndexedTuple(indexer.tuple, "outer")] + return indexable.oindex[indexer.tuple] + elif isinstance(indexer, BasicIndexer): + return indexable[indexer.tuple] else: - return indexable[CompatIndexedTuple(indexer.tuple, "basic")] + raise TypeError( + f"Received indexer of type {type(indexer)!r}. " + "Expected BasicIndexer, OuterIndexer, or VectorizedIndexer" + ) def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 1c26924a67d..a062882ae43 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,9 @@ def dtype(self) -> _DType_co: ... _IndexKeys = tuple[_IndexKey, ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] _IndexerKey = tuple[Any, ...] +_BasicIndexerKey = tuple[Any, ...] +_OuterIndexerKey = tuple[Any, ...] +_VectorizedIndexerKey = tuple[Any, ...] _AttrsLike = Union[Mapping[Any, Any], None] From 0f54b64e4f4226564e4975afe7b2f8ba0f03e9ea Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 17:51:16 -0700 Subject: [PATCH 20/32] Remove CompatIndexedTuple --- xarray/core/indexing.py | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 716ad9670de..674dfe5c0e7 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Literal, overload +from typing import TYPE_CHECKING, Any, overload import numpy as np import pandas as pd @@ -1177,29 +1177,6 @@ def explicit_indexing_adapter( raise TypeError(f"unexpected key type: {key}") -class CompatIndexedTuple(tuple): - """ - A tuple subclass used to transition existing backend implementations towards the use of raw tuples - for indexing by carrying additional metadata about the type of indexing being - performed ('basic', 'vectorized', or 'outer'). This class serves as a bridge, allowing - backend arrays that currently expect this metadata to function correctly while - maintaining the outward behavior of a regular tuple. - - This class is particularly useful during the phase where the backend implementations are - not yet capable of directly accepting raw tuples without additional context about - the indexing type. It ensures that these backends can still correctly interpret and - process indexing operations by providing them with the necessary contextual information. - """ - - def __new__(cls, iterable, indexer_type: Literal["basic", "vectorized", "outer"]): - obj = super().__new__(cls, iterable) - obj.indexer_type = indexer_type # type: ignore[attr-defined] - return obj - - def __repr__(self): - return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" - - def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): @@ -1226,19 +1203,8 @@ def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: def decompose_indexer( - indexer: ExplicitIndexer | CompatIndexedTuple, - shape: _Shape, - indexing_support: IndexingSupport, + indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: - if isinstance(indexer, CompatIndexedTuple): - # recreate the indexer object from the tuple and the type of indexing. - # This is necessary to ensure that the backend array can correctly interpret the indexing operation. - if indexer.indexer_type == "vectorized": # type: ignore[attr-defined] - indexer = VectorizedIndexer(indexer) - elif indexer.indexer_type == "outer": # type: ignore[attr-defined] - indexer = OuterIndexer(indexer) - else: - indexer = BasicIndexer(indexer) if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, BasicIndexer | OuterIndexer): From b60accd36de49ebf20ed4d8fd42af79e7028d66a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 20:36:07 -0700 Subject: [PATCH 21/32] Some typing work --- xarray/coding/strings.py | 15 +++- xarray/core/indexing.py | 147 ++++++++++++++++++----------------- xarray/namedarray/_typing.py | 8 +- 3 files changed, 91 insertions(+), 79 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index cd190e769af..37b9e548e0f 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -3,6 +3,7 @@ from __future__ import annotations from functools import partial +from typing import TYPE_CHECKING import numpy as np @@ -17,12 +18,18 @@ from xarray.core import indexing from xarray.core.utils import module_available from xarray.core.variable import Variable -from xarray.namedarray._typing import _IndexerKey from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") +if TYPE_CHECKING: + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) + def create_vlen_dtype(element_type): if element_type not in (str, bytes): @@ -249,13 +256,13 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key: _IndexerKey): + def _vindex_get(self, key: _VectorizedIndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key: indexing.OuterIndexer): + def _oindex_get(self, key: _OuterIndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key: _IndexerKey): + def __getitem__(self, key: _BasicIndexerKey): # require slicing the last dimension completely indexer = indexing.expanded_indexer(key, self.array.ndim) if indexer[-1] != slice(None): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 674dfe5c0e7..c02e43c3eb4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -39,11 +39,14 @@ from xarray.core.types import Self from xarray.core.variable import Variable from xarray.namedarray._typing import ( + _BasicIndexerKey, _Chunks, _IndexerKey, _IndexKey, _IndexKeys, + _OuterIndexerKey, _Shape, + _VectorizedIndexerKey, duckarray, ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -394,7 +397,7 @@ class BasicIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[int | np.integer | slice, ...]): + def __init__(self, key: _BasicIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -426,9 +429,7 @@ class OuterIndexer(ExplicitIndexer): def __init__( self, - key: tuple[ - int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... - ], + key: _OuterIndexerKey, ): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -473,7 +474,7 @@ class VectorizedIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...]): + def __init__(self, key: _VectorizedIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -482,7 +483,7 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... for k in key: if isinstance(k, slice): k = as_integer_slice(k) - elif is_duck_dask_array(k): + elif is_duck_dask_array(k): # type: ignore[arg-type] raise ValueError( "Vectorized indexing with Dask arrays is not supported. " "Please pass a numpy array by calling ``.compute``. " @@ -535,22 +536,22 @@ class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): def get_duck_array(self) -> Any: return self[(slice(None),) * self.ndim] - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -591,7 +592,7 @@ def __array__( def get_duck_array(self) -> Any: return self.array.get_duck_array() - def __getitem__(self, key: _IndexerKey | slice) -> Any: + def __getitem__(self, key) -> Any: _key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(_key) @@ -695,27 +696,27 @@ def get_duck_array(self) -> Any: def transpose(self, order) -> Any: return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer) -> LazilyIndexedArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey) -> LazilyIndexedArray: + def __getitem__(self, indexer: _BasicIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key: _IndexerKey, value: Any) -> None: + def _vindex_set(self, key: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: _IndexerKey, value: Any) -> None: + def _oindex_set(self, key: _OuterIndexerKey, value: Any) -> None: full_key = self._updated_key(OuterIndexer(key)) self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: _IndexerKey, value: Any) -> None: + def __setitem__(self, key: _BasicIndexerKey, value: Any) -> None: full_key = self._updated_key(BasicIndexer(key)) self.array[full_key.tuple] = value @@ -776,13 +777,15 @@ def get_duck_array(self) -> Any: def _updated_key(self, new_key: ExplicitIndexer) -> VectorizedIndexer: return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer) -> LazilyVectorizedIndexedArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: _IndexerKey) -> LazilyVectorizedIndexedArray: + def _vindex_get( + self, indexer: _VectorizedIndexerKey + ) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) - def __getitem__(self, indexer: _IndexerKey) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer): key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) @@ -826,29 +829,28 @@ def _ensure_copied(self) -> None: def get_duck_array(self) -> Any: return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer) -> CopyOnWriteArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey) -> CopyOnWriteArray: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey) -> CopyOnWriteArray: + def __getitem__(self, indexer: _BasicIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self._ensure_copied() - self.array[indexer] = value def __deepcopy__(self, memo) -> CopyOnWriteArray: @@ -876,25 +878,25 @@ def get_duck_array(self) -> Any: self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer) -> MemoryCachedArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey) -> MemoryCachedArray: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey) -> MemoryCachedArray: + def __getitem__(self, indexer: _BasicIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value @@ -943,7 +945,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) @@ -1031,8 +1033,7 @@ def _finish_indexing( ) -> Any: result = raw_indexing_method(raw_key.tuple) if numpy_indices.tuple: - # index the loaded np.ndarray - result = apply_indexer(NumpyIndexingAdapter(result), numpy_indices) + result = apply_indexer(as_indexable(result), numpy_indices) return result @@ -1042,15 +1043,15 @@ def basic_indexing_adapter( indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], ) -> Any: - """Support explicit indexing by delegating to a raw indexing method. + """Support explicit basic indexing by delegating to a raw indexing method. Outer and/or vectorized indexers are supported by indexing a second time with a NumPy array. Parameters ---------- - key : ExplicitIndexer - Explicit indexing object. + key : IndexerKey + Tuple indexer shape : Tuple[int, ...] Shape of the indexed array. indexing_support : IndexingSupport enum @@ -1077,15 +1078,12 @@ def outer_indexing_adapter( indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], ) -> Any: - """Support explicit indexing by delegating to a raw indexing method. - - Outer and/or vectorized indexers are supported by indexing a second time - with a NumPy array. + """Support explicit outer indexing by delegating to a raw indexing method. Parameters ---------- - key : ExplicitIndexer - Explicit indexing object. + key : IndexerKey + tuple indexer shape : Tuple[int, ...] Shape of the indexed array. indexing_support : IndexingSupport enum @@ -1112,14 +1110,11 @@ def vectorized_indexing_adapter( indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], ) -> Any: - """Support explicit indexing by delegating to a raw indexing method. - - Outer and/or vectorized indexers are supported by indexing a second time - with a NumPy array. + """Support explicit vectorized indexing by delegating to a raw indexing method. Parameters ---------- - key : ExplicitIndexer + key : IndexerKey Explicit indexing object. shape : Tuple[int, ...] Shape of the indexed array. @@ -1168,16 +1163,25 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ + # TODO: raise PendingDeprecationWarning here. if isinstance(key, VectorizedIndexer): - return vectorized_indexing_adapter(key.tuple, shape, indexing_support) + return vectorized_indexing_adapter( + key.tuple, shape, indexing_support, raw_indexing_method + ) elif isinstance(key, OuterIndexer): - return outer_indexing_adapter(key.tuple, shape, indexing_support) + return outer_indexing_adapter( + key.tuple, shape, indexing_support, raw_indexing_method + ) elif isinstance(key, BasicIndexer): - return basic_indexing_adapter(key.tuple, shape, indexing_support) + return basic_indexing_adapter( + key.tuple, shape, indexing_support, raw_indexing_method + ) raise TypeError(f"unexpected key type: {key}") -def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: +def apply_indexer( + indexable: ExplicitlyIndexedNDArrayMixin, indexer: ExplicitIndexer +) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): return indexable.vindex[indexer.tuple] @@ -1285,9 +1289,9 @@ def _decompose_vectorized_indexer( return indexer, BasicIndexer(()) backend_indexer_elems: tuple[ - int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + int | np.integer | slice | np.ndarray[Any, np.dtype[np.unsignedinteger]], ... ] = () - np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () # convert negative indices indexer_elems = [ np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k @@ -1478,7 +1482,7 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for v, size in zip(indexer.tuple, shape, strict=True): if isinstance(v, np.ndarray): new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) @@ -1494,12 +1498,12 @@ def _chunked_array_with_chunks_hint( ): """Create a chunked array using the chunks hint for dimensions of size > 1.""" - if len(chunks) < array.ndim: + if len(chunks) != array.ndim: raise ValueError("not enough chunks in hint") new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=False) + for chunk, size in zip(chunks, array.shape, strict=True) ) return chunkmanager.from_array(array, new_chunks) @@ -1667,7 +1671,7 @@ def __init__(self, array): def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _IndexerKey) -> Any: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] @@ -1746,9 +1750,8 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _IndexerKey) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) - value = self.array subkey: Any for axis, subkey in reversed(list(enumerate(indexer))): @@ -1786,7 +1789,7 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: try: return self.array[indexer] except NotImplementedError: @@ -1797,13 +1800,13 @@ def _oindex_get(self, indexer: OuterIndexer) -> Any: value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: return self.array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: return self.array[indexer] - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( @@ -1811,10 +1814,10 @@ def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: ) self.array[indexer] = value - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value def transpose(self, order) -> Any: @@ -1910,7 +1913,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -2026,7 +2029,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -2040,7 +2043,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: _IndexerKey + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -2053,7 +2056,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _BasicIndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index a062882ae43..b0cf7e2fd40 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,9 +95,11 @@ def dtype(self) -> _DType_co: ... _IndexKeys = tuple[_IndexKey, ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] _IndexerKey = tuple[Any, ...] -_BasicIndexerKey = tuple[Any, ...] -_OuterIndexerKey = tuple[Any, ...] -_VectorizedIndexerKey = tuple[Any, ...] +_BasicIndexerKey = tuple[int | np.integer | slice, ...] +_OuterIndexerKey = tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.integer]], ... +] +_VectorizedIndexerKey = tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] _AttrsLike = Union[Mapping[Any, Any], None] From 222c5c2bb4940ca5aeaddb5684251cc2d46d48b1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 20:50:00 -0700 Subject: [PATCH 22/32] more typing --- xarray/backends/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 4f1febf228e..d9ed89901a9 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -21,7 +21,7 @@ if TYPE_CHECKING: from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence - from xarray.namedarray._typing import _IndexerKey + from xarray.namedarray._typing import _OuterIndexerKey, _VectorizedIndexerKey # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) @@ -256,12 +256,12 @@ def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = (slice(None),) * self.ndim return self[key] # type: ignore [index] - def _oindex_get(self, key: _IndexerKey) -> Any: + def _oindex_get(self, key: _OuterIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, key: _IndexerKey) -> Any: + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) From 810b8224ebfeb6912f5e614b14e794ef6c0c9545 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:05:29 -0700 Subject: [PATCH 23/32] Fix test --- xarray/core/indexing.py | 6 +++--- xarray/tests/test_indexing.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c02e43c3eb4..c5eaf036027 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -946,7 +946,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () - for k, size in zip(key, shape, strict=True): + for k, size in zip(key, shape, strict=False): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice @@ -1498,12 +1498,12 @@ def _chunked_array_with_chunks_hint( ): """Create a chunked array using the chunks hint for dimensions of size > 1.""" - if len(chunks) != array.ndim: + if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=True) + for chunk, size in zip(chunks, array.shape, strict=False) ) return chunkmanager.from_array(array, new_chunks) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index b48fa76f589..fcaa637c332 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -842,13 +842,14 @@ def test_create_mask_basic_indexer() -> None: np.testing.assert_array_equal(False, actual) +@requires_dask def test_create_mask_dask() -> None: - da = pytest.importorskip("dask.array") + import dask.array as da indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2]))) expected = np.array(2 * [[False, True, False]]) actual = indexing.create_mask( - indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1))) + indexer, (5, 5, 5), da.empty((2, 3, 3), chunks=((1, 1), (2, 1), (3,))) ) assert actual.chunks == ((1, 1), (2, 1)) np.testing.assert_array_equal(expected, actual) From a4149654996549e955d48524243a60cd0d9c6db3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:12:50 -0700 Subject: [PATCH 24/32] strict=True --- xarray/core/indexing.py | 19 +++++++++++-------- xarray/tests/test_indexing.py | 8 ++++---- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c5eaf036027..738cc1252d2 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -946,7 +946,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () - for k, size in zip(key, shape, strict=False): + for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice @@ -1375,7 +1375,7 @@ def _decompose_outer_indexer( assert isinstance(indexer, OuterIndexer | BasicIndexer) if indexing_support == IndexingSupport.VECTORIZED: - for k, s in zip(indexer.tuple, shape, strict=False): + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, slice): # If it is a slice, then we will slice it as-is # (but make its step positive) in the backend, @@ -1390,7 +1390,7 @@ def _decompose_outer_indexer( # make indexer positive pos_indexer: tuple[np.ndarray | int | np.number, ...] = () - for k, s in zip(indexer.tuple, shape, strict=False): + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, np.ndarray): pos_indexer += (np.where(k < 0, k + s, k),) elif isinstance(k, integer_types) and k < 0: @@ -1412,7 +1412,7 @@ def _decompose_outer_indexer( ] array_index = np.argmax(np.array(gains)) if len(gains) > 0 else None - for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=False)): + for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=True)): if isinstance(k, np.ndarray) and i != array_index: # np.ndarray key is converted to slice that covers the entire # entries of this key. @@ -1433,7 +1433,7 @@ def _decompose_outer_indexer( return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) if indexing_support == IndexingSupport.OUTER: - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, slice): # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) @@ -1455,7 +1455,7 @@ def _decompose_outer_indexer( # basic indexer assert indexing_support == IndexingSupport.BASIC - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, np.ndarray): # np.ndarray key is converted to slice that covers the entire # entries of this key. @@ -1503,7 +1503,7 @@ def _chunked_array_with_chunks_hint( new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=False) + for chunk, size in zip(chunks, array.shape, strict=True) ) return chunkmanager.from_array(array, new_chunks) @@ -1522,6 +1522,9 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None) -> A if isinstance(k, np.ndarray): if is_chunked_array(data): # type: ignore[arg-type] chunkmanager = get_chunked_array_type(data) + # TODO: the chunks_hint is the chunks for the whole array, + # and has nothing to do with the axes indexed by `k` + # This is why we need to use `strict-False` :/ new_keys += ( _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager), ) @@ -1570,7 +1573,7 @@ def create_mask( base_mask = _masked_result_drop_slice(key, data) slice_shape = tuple( np.arange(*k.indices(size)).size - for k, size in zip(key, shape, strict=False) + for k, size in zip(key, shape, strict=True) if isinstance(k, slice) ) expanded_mask = base_mask[(Ellipsis,) + (np.newaxis,) * len(slice_shape)] diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index fcaa637c332..ada0db889ee 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -633,7 +633,7 @@ def test_arrayize_vectorized_indexer(self) -> None: np.testing.assert_array_equal(b, np.arange(5)[:, np.newaxis]) -def get_indexers(shape, mode): +def get_indexers(shape: tuple[int, ...], mode) -> indexing.ExplicitIndexer: if mode == "vectorized": indexed_shape = (3, 4) indexer = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape) @@ -662,7 +662,7 @@ def get_indexers(shape, mode): return indexing.BasicIndexer(tuple(indexer)) elif mode == "basic1": # basic indexer - return indexing.BasicIndexer((3,)) + return indexing.BasicIndexer((2,) * len(shape)) elif mode == "basic2": # basic indexer indexer = [0, 2, 4] @@ -821,14 +821,14 @@ def test_create_mask_outer_indexer() -> None: def test_create_mask_vectorized_indexer() -> None: indexer = indexing.VectorizedIndexer((np.array([0, -1, 2]), np.array([0, 1, -1]))) expected = np.array([False, True, True]) - actual = indexing.create_mask(indexer, (5,)) + actual = indexing.create_mask(indexer, (5, 5)) np.testing.assert_array_equal(expected, actual) indexer = indexing.VectorizedIndexer( (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1])) ) expected = np.array([[False, True, True]] * 2).T - actual = indexing.create_mask(indexer, (5, 2)) + actual = indexing.create_mask(indexer, (5, 2, 5)) np.testing.assert_array_equal(expected, actual) From 0b99aea25eba1c736bf06c1ab9a9b2afc4d74537 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:21:24 -0700 Subject: [PATCH 25/32] more typing --- xarray/core/indexing.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 738cc1252d2..fdc35948a88 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1038,7 +1038,7 @@ def _finish_indexing( def basic_indexing_adapter( - key: _IndexerKey, + key: _BasicIndexerKey, shape: _Shape, indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], @@ -1073,7 +1073,7 @@ def basic_indexing_adapter( def outer_indexing_adapter( - key: _IndexerKey, + key: _OuterIndexerKey, shape: _Shape, indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], @@ -1105,7 +1105,7 @@ def outer_indexing_adapter( def vectorized_indexing_adapter( - key: _IndexerKey, + key: _VectorizedIndexerKey, shape: _Shape, indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], @@ -1674,15 +1674,15 @@ def __init__(self, array): def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: _IndexerKey) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> np.ndarray: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> np.ndarray: array = NumpyVIndexAdapter(self.array) return array[indexer] - def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> np.ndarray: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see @@ -1707,15 +1707,15 @@ def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: _IndexerKey | ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see @@ -1753,7 +1753,7 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: _IndexerKey) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array subkey: Any @@ -1761,19 +1761,19 @@ def _oindex_get(self, indexer: _IndexerKey) -> Any: value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: _IndexerKey) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: return self.array[indexer] - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self.array[indexer] = value - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value def transpose(self, order): @@ -1916,7 +1916,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: _IndexerKey + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1935,7 +1935,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: _IndexerKey + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1954,7 +1954,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: _IndexerKey + self, indexer: _BasicIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter From 2ceaeac934a693aa36be192c29fab69c8845ea34 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:21:58 -0700 Subject: [PATCH 26/32] fix --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index fdc35948a88..380700eb72e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1503,7 +1503,7 @@ def _chunked_array_with_chunks_hint( new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=True) + for chunk, size in zip(chunks, array.shape, strict=False) ) return chunkmanager.from_array(array, new_chunks) From 749da0bc0de71580230eef0152883929d9305c7e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:28:42 -0700 Subject: [PATCH 27/32] some more fixes --- xarray/core/indexing.py | 4 ++-- xarray/tests/test_indexing.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 380700eb72e..e6e2848b157 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -478,7 +478,7 @@ def __init__(self, key: _VectorizedIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () ndim = None for k in key: if isinstance(k, slice): @@ -1573,7 +1573,7 @@ def create_mask( base_mask = _masked_result_drop_slice(key, data) slice_shape = tuple( np.arange(*k.indices(size)).size - for k, size in zip(key, shape, strict=True) + for k, size in zip(key, shape, strict=False) if isinstance(k, slice) ) expanded_mask = base_mask[(Ellipsis,) + (np.newaxis,) * len(slice_shape)] diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index ada0db889ee..5e1572cfa20 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -634,6 +634,7 @@ def test_arrayize_vectorized_indexer(self) -> None: def get_indexers(shape: tuple[int, ...], mode) -> indexing.ExplicitIndexer: + indexer: tuple[Any, ...] if mode == "vectorized": indexed_shape = (3, 4) indexer = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape) @@ -859,7 +860,7 @@ def test_create_mask_dask() -> None: ) expected = np.array([[False, True, True]] * 2).T actual = indexing.create_mask( - indexer_vec, (5, 2), da.empty((3, 2), chunks=((3,), (2,))) + indexer_vec, (3, 2), da.empty((3, 2, 3), chunks=((3,), (2,), (3,))) ) assert isinstance(actual, da.Array) np.testing.assert_array_equal(expected, actual) From f58262a426ae263f40cb37c80823073e21cbe0be Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 07:33:59 -0700 Subject: [PATCH 28/32] little more type narrowing --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e6e2848b157..a27e1c0543d 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -796,7 +796,7 @@ def transpose(self, order) -> LazilyVectorizedIndexedArray: key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -1694,7 +1694,7 @@ def __getitem__(self, indexer: _BasicIndexerKey) -> np.ndarray: ) return array[key] - def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: + def _safe_setitem(self, array, key: _BasicIndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: From dcd3ac9b396b2bdbb823024ca30d87ac187ef0c8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 09:14:20 -0700 Subject: [PATCH 29/32] Refactor backend indexing tests --- xarray/tests/test_backends.py | 274 +++++++++++++++++----------------- 1 file changed, 138 insertions(+), 136 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fd866cae5ee..021cf5df1d4 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -351,7 +351,144 @@ def test_dtype_coercion_error(self) -> None: ds.to_netcdf(path, format=format) -class DatasetIOBase: +class BackendIndexingTestsMixin: + def test_orthogonal_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual) + # do it twice, to make sure we're switched from orthogonal -> numpy + # when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def test_vectorized_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = { + "dim1": DataArray([0, 2, 0], dims="a"), + "dim2": DataArray([0, 2, 3], dims="a"), + } + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual.load()) + # do it twice, to make sure we're switched from + # vectorized -> numpy when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # two-staged vectorized-indexing + indexers2 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), + }, + {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, + ] + multiple_indexing(indexers2) + + # vectorized-slice mixed + indexers3 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(None, 10), + } + ] + multiple_indexing(indexers3) + + # vectorized-integer mixed + indexers4 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": slice(None, None, 2)}, + ] + multiple_indexing(indexers4) + + # vectorized-integer mixed + indexers5 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": 1, "b": 0}, + ] + multiple_indexing(indexers5) + + def test_vectorized_indexing_negative_step(self) -> None: + # use dask explicitly when present + open_kwargs: dict[str, Any] | None + if has_dask: + open_kwargs = {"chunks": {}} + else: + open_kwargs = None + in_memory = create_test_data() + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -1), + } + ] + multiple_indexing(indexers) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -2), + } + ] + multiple_indexing(indexers) + + def test_outer_indexing_reversed(self) -> None: + # regression test for GH6560 + ds = xr.Dataset( + {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, + ) + + with self.roundtrip(ds) as on_disk: + subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] + assert subset.sizes == subset.load().sizes + + def test_isel_dataarray(self) -> None: + # Make sure isel works lazily. GH:issue:1688 + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + expected = in_memory.isel(dim2=in_memory["dim2"] < 3) + actual = on_disk.isel(dim2=on_disk["dim2"] < 3) + assert_identical(expected, actual) + + +class DatasetIOBase(BackendIndexingTestsMixin): engine: T_NetcdfEngine | None = None file_format: T_NetcdfTypes | None = None @@ -695,141 +832,6 @@ def test_roundtrip_boolean_dtype(self) -> None: assert_identical(original, actual2) assert actual2["x"].dtype == "bool" - def test_orthogonal_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual) - # do it twice, to make sure we're switched from orthogonal -> numpy - # when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def test_vectorized_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = { - "dim1": DataArray([0, 2, 0], dims="a"), - "dim2": DataArray([0, 2, 3], dims="a"), - } - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual.load()) - # do it twice, to make sure we're switched from - # vectorized -> numpy when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # two-staged vectorized-indexing - indexers2 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), - }, - {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, - ] - multiple_indexing(indexers2) - - # vectorized-slice mixed - indexers3 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(None, 10), - } - ] - multiple_indexing(indexers3) - - # vectorized-integer mixed - indexers4 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": slice(None, None, 2)}, - ] - multiple_indexing(indexers4) - - # vectorized-integer mixed - indexers5 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": 1, "b": 0}, - ] - multiple_indexing(indexers5) - - def test_vectorized_indexing_negative_step(self) -> None: - # use dask explicitly when present - open_kwargs: dict[str, Any] | None - if has_dask: - open_kwargs = {"chunks": {}} - else: - open_kwargs = None - in_memory = create_test_data() - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -1), - } - ] - multiple_indexing(indexers) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -2), - } - ] - multiple_indexing(indexers) - - def test_outer_indexing_reversed(self) -> None: - # regression test for GH6560 - ds = xr.Dataset( - {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, - ) - - with self.roundtrip(ds) as on_disk: - subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] - assert subset.sizes == subset.load().sizes - - def test_isel_dataarray(self) -> None: - # Make sure isel works lazily. GH:issue:1688 - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - expected = in_memory.isel(dim2=in_memory["dim2"] < 3) - actual = on_disk.isel(dim2=on_disk["dim2"] < 3) - assert_identical(expected, actual) - def validate_array_type(self, ds): # Make sure that only NumpyIndexingAdapter stores a bare np.ndarray. def find_and_validate_array(obj): From 2105aa049d3ac70af2b05e237ed973f0e9e7d653 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 09:43:05 -0700 Subject: [PATCH 30/32] Add legacy backend indexing tests --- xarray/backends/common.py | 6 +++ xarray/backends/netCDF4_.py | 4 +- xarray/backends/pydap_.py | 4 +- xarray/backends/scipy_.py | 4 +- xarray/backends/zarr.py | 4 +- xarray/core/indexing.py | 77 +++++++++++++------------------ xarray/tests/test_backends.py | 85 +++++++++++++++++++++++++++++++++++ 7 files changed, 131 insertions(+), 53 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index d9ed89901a9..3cd2079f909 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -250,6 +250,12 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): + def get_duck_array(self, dtype: np.typing.DTypeLike = None): + key = indexing.BasicIndexer((slice(None),) * self.ndim) + return self[key] # type: ignore [index] + + +class NewBackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): __slots__ = ("indexing_support",) def get_duck_array(self, dtype: np.typing.DTypeLike = None): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 42e593b4816..8130c264021 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -12,8 +12,8 @@ from xarray import coding from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -61,7 +61,7 @@ NETCDF4_PYTHON_LOCK = combine_locks([NETCDFC_LOCK, HDF5_LOCK]) -class BaseNetCDF4Array(BackendArray): +class BaseNetCDF4Array(NewBackendArray): __slots__ = ("datastore", "dtype", "shape", "variable_name") def __init__(self, variable_name, datastore): diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 49b22d78463..2ec260a3d11 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -8,8 +8,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, robust_getitem, ) from xarray.backends.store import StoreBackendEntrypoint @@ -36,7 +36,7 @@ ) -class PydapArrayWrapper(BackendArray): +class PydapArrayWrapper(NewBackendArray): indexing_support = indexing.IndexingSupport.BASIC def __init__(self, array) -> None: diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 30cd9927489..1793f619a85 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -10,8 +10,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, ) @@ -59,7 +59,7 @@ def _decode_attrs(d): return {k: v if k == "_FillValue" else _decode_string(v) for (k, v) in d.items()} -class ScipyArrayWrapper(BackendArray): +class ScipyArrayWrapper(NewBackendArray): indexing_support = indexing.IndexingSupport.OUTER_1VECTOR def __init__(self, variable_name, datastore): diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index ca09e06137b..b1435a039d0 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -14,8 +14,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractWritableDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, _encode_variable_name, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -185,7 +185,7 @@ def encode_zarr_attr_value(value): return encoded -class ZarrArrayWrapper(BackendArray): +class ZarrArrayWrapper(NewBackendArray): indexing_support = indexing.IndexingSupport.VECTORIZED def __init__(self, zarr_array): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index a27e1c0543d..92af1fcd146 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -3,7 +3,6 @@ import enum import functools import operator -import warnings from collections import Counter, defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress @@ -23,6 +22,7 @@ from xarray.core.utils import ( NDArrayMixin, either_dict_or_kwargs, + emit_user_level_warning, get_valid_numpy_dtype, is_duck_array, is_duck_dask_array, @@ -609,7 +609,7 @@ def __getitem__(self, key) -> Any: BackendArray_fallback_warning_message = ( "The array `{0}` does not support indexing using the .vindex and .oindex properties. " "The __getitem__ method is being used instead. This fallback behavior will be " - "removed in a future version. Please ensure that the backend array `{1}` implements " + "removed in a future version. Please ensure that the backend array `{0}` implements " "support for the .vindex and .oindex properties to avoid potential issues." ) @@ -671,21 +671,8 @@ def shape(self) -> _Shape: return self._shape def get_duck_array(self) -> Any: - try: - array = apply_indexer(self.array, self.key) - except NotImplementedError as _: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - warnings.warn( - BackendArray_fallback_warning_message.format( - self.array.__class__.__name__, self.array.__class__.__name__ - ), - category=DeprecationWarning, - stacklevel=2, - ) - array = self.array[self.key] - - # self.array[self.key] is now a numpy array when + array = apply_indexer(self.array, self.key) + # array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -752,21 +739,9 @@ def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self) -> Any: - try: - array = apply_indexer(self.array, self.key) - except NotImplementedError as _: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - warnings.warn( - BackendArray_fallback_warning_message.format( - self.array.__class__.__name__, self.array.__class__.__name__ - ), - category=PendingDeprecationWarning, - stacklevel=2, - ) - array = self.array[self.key] + array = apply_indexer(self.array, self.key) - # self.array[self.key] is now a numpy array when + # array is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -1136,6 +1111,7 @@ def vectorized_indexing_adapter( ) +# TODO: deprecate and delete this method once it is no longer used externally def explicit_indexing_adapter( key: ExplicitIndexer, shape: _Shape, @@ -1163,26 +1139,36 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ - # TODO: raise PendingDeprecationWarning here. - if isinstance(key, VectorizedIndexer): - return vectorized_indexing_adapter( - key.tuple, shape, indexing_support, raw_indexing_method - ) - elif isinstance(key, OuterIndexer): - return outer_indexing_adapter( - key.tuple, shape, indexing_support, raw_indexing_method - ) - elif isinstance(key, BasicIndexer): - return basic_indexing_adapter( - key.tuple, shape, indexing_support, raw_indexing_method - ) - raise TypeError(f"unexpected key type: {key}") + + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + emit_user_level_warning( + BackendArray_fallback_warning_message.format(""), + category=PendingDeprecationWarning, + ) + raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) + result = raw_indexing_method(raw_key.tuple) + if numpy_indices.tuple: + indexable = NumpyIndexingAdapter(result) + result = apply_indexer(indexable, numpy_indices) + return result def apply_indexer( indexable: ExplicitlyIndexedNDArrayMixin, indexer: ExplicitIndexer ) -> Any: """Apply an indexer to an indexable object.""" + if not hasattr(indexable, "vindex") and not hasattr(indexable, "oindex"): + # This path is used by Lazily*IndexedArray.get_duck_array() + classname = type(indexable).__name__ + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + emit_user_level_warning( + BackendArray_fallback_warning_message.format(classname), + category=PendingDeprecationWarning, + ) + return indexable[indexer] + if isinstance(indexer, VectorizedIndexer): return indexable.vindex[indexer.tuple] elif isinstance(indexer, OuterIndexer): @@ -1206,6 +1192,7 @@ def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: indexable[indexer.tuple] = value +# TODO: delete this method once explicit_indexing_adapter is no longer used externally def decompose_indexer( indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 021cf5df1d4..0c9813c971d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -39,6 +39,7 @@ open_mfdataset, save_mfdataset, ) +from xarray.backends.common import BackendArray as LegacyBackendArray from xarray.backends.common import robust_getitem from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint from xarray.backends.netcdf3 import _nc3_dtype_coercions @@ -53,6 +54,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing +from xarray.core.indexing import IndexingSupport from xarray.core.options import set_options from xarray.core.utils import module_available from xarray.namedarray.pycompat import array_type @@ -352,6 +354,9 @@ def test_dtype_coercion_error(self) -> None: class BackendIndexingTestsMixin: + def roundtrip(self, ds: Dataset, open_kwargs=None) -> Dataset: + raise NotImplementedError + def test_orthogonal_indexing(self) -> None: in_memory = create_test_data() with self.roundtrip(in_memory) as on_disk: @@ -6491,3 +6496,83 @@ def test_zarr_safe_chunk_region(tmp_path): chunk = ds.isel(region) chunk = chunk.chunk() chunk.chunk().to_zarr(store, region=region) + + +class LegacyBackendArrayWrapper(LegacyBackendArray): + def __init__(self, array: np.ndarray, indexing_support: IndexingSupport): + self.shape = array.shape + self.dtype = array.dtype + self.array = array + self.indexing_support = indexing_support + + def __getitem__(self, key: indexing.ExplicitIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _getitem(self, key: tuple[Any, ...]) -> np.ndarray: + return self.array[key] + + +def indexing_tests(*, indexing_support: IndexingSupport): + def wrapper(cls): + class NewClass(cls): + cls.indexing_support = indexing_support + + def roundtrip(self, ds: Dataset, *, open_kwargs=None) -> Dataset: + ds = ds.copy(deep=True) + for name in list(ds.data_vars) + list( + set(ds.coords) - set(ds.xindexes) + ): + var = ds._variables[name] + ds._variables[name] = var.copy( + # These tests assume that indexing is lazy (checks ._in_memory), + # so wrapping by LazilyIndexedArray is required. + data=indexing.LazilyIndexedArray( + LegacyBackendArrayWrapper(var.data, self.indexing_support) + ) + ) + return ds + + def test_vectorized_indexing_negative_step(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_vectorized_indexing_negative_step() + + def test_isel_dataarray(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_isel_dataarray() + + def test_vectorized_indexing(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_vectorized_indexing() + + def test_orthogonal_indexing(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_orthogonal_indexing() + + def test_outer_indexing_reversed(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_outer_indexing_reversed() + + return NewClass + + return wrapper + + +@indexing_tests(indexing_support=IndexingSupport.BASIC) +class TestBasicIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +@indexing_tests(indexing_support=IndexingSupport.OUTER_1VECTOR) +class TestOuter1VectorIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +# @indexing_tests(indexing_support=IndexingSupport.OUTER) +# class TestOuterIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass + +# @indexing_tests(indexing_support=IndexingSupport.VECTORIZED) +# class TestVectorizedIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass From fb24e9cb2d2434ce455b6c8c3686ce8033902c33 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 11:51:17 -0700 Subject: [PATCH 31/32] Avoid raising deprecation warning now. --- xarray/core/indexing.py | 19 +++++++++---------- xarray/tests/test_backends.py | 30 +++++++++++++++--------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 92af1fcd146..b3c6400e4d9 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -22,7 +22,6 @@ from xarray.core.utils import ( NDArrayMixin, either_dict_or_kwargs, - emit_user_level_warning, get_valid_numpy_dtype, is_duck_array, is_duck_dask_array, @@ -1142,10 +1141,10 @@ def explicit_indexing_adapter( # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - emit_user_level_warning( - BackendArray_fallback_warning_message.format(""), - category=PendingDeprecationWarning, - ) + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(""), + # category=PendingDeprecationWarning, + # ) raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) result = raw_indexing_method(raw_key.tuple) if numpy_indices.tuple: @@ -1160,13 +1159,13 @@ def apply_indexer( """Apply an indexer to an indexable object.""" if not hasattr(indexable, "vindex") and not hasattr(indexable, "oindex"): # This path is used by Lazily*IndexedArray.get_duck_array() - classname = type(indexable).__name__ + # classname = type(indexable).__name__ # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - emit_user_level_warning( - BackendArray_fallback_warning_message.format(classname), - category=PendingDeprecationWarning, - ) + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(classname), + # category=PendingDeprecationWarning, + # ) return indexable[indexer] if isinstance(indexer, VectorizedIndexer): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0c9813c971d..298e46e6a67 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -6534,25 +6534,25 @@ def roundtrip(self, ds: Dataset, *, open_kwargs=None) -> Dataset: ) return ds - def test_vectorized_indexing_negative_step(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_vectorized_indexing_negative_step() + # def test_vectorized_indexing_negative_step(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing_negative_step() - def test_isel_dataarray(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_isel_dataarray() + # def test_isel_dataarray(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_isel_dataarray() - def test_vectorized_indexing(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_vectorized_indexing() + # def test_vectorized_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing() - def test_orthogonal_indexing(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_orthogonal_indexing() + # def test_orthogonal_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_orthogonal_indexing() - def test_outer_indexing_reversed(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_outer_indexing_reversed() + # def test_outer_indexing_reversed(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_outer_indexing_reversed() return NewClass From 60640466bd98c3a77bd48ece1d9eca07ecf6fe4d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 29 Jan 2025 22:33:47 -0700 Subject: [PATCH 32/32] fix --- xarray/core/indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 76bf5e6a027..3fda88956d1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1812,18 +1812,18 @@ def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: return self.array.vindex[indexer] except IndexError as e: # TODO: upstream to dask - has_dask = any(is_duck_dask_array(i) for i in indexer.tuple) + has_dask = any(is_duck_dask_array(i) for i in indexer) # this only works for "small" 1d coordinate arrays with one chunk # it is intended for idxmin, idxmax, and allows indexing with # the nD array output of argmin, argmax if ( not has_dask - or len(indexer.tuple) > 1 + or len(indexer) > 1 or math.prod(self.array.numblocks) > 1 or self.array.ndim > 1 ): raise e - (idxr,) = indexer.tuple + (idxr,) = indexer if idxr.ndim == 0: return self.array[idxr.data] else: