diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index 6aa692155..c20530fbe 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -210,18 +210,12 @@ jobs: else psgver="" fi - # TODO: drop 0.57.0rc1 and use 0.57 once numba 0.57 is properly released if [[ ${npver} == "=1.24" || ${{ startsWith(steps.pyver.outputs.selected, '3.11') }} == true ]] ; then - numbaver=$(python -c 'import random ; print(random.choice(["=0.57.0rc1", ""]))') + numbaver=$(python -c 'import random ; print(random.choice(["=0.57", ""]))') elif [[ ${npver} == "=1.21" ]] ; then - numbaver=$(python -c 'import random ; print(random.choice(["=0.55", "=0.56", "=0.57.0rc1", ""]))') + numbaver=$(python -c 'import random ; print(random.choice(["=0.55", "=0.56", "=0.57", ""]))') else - numbaver=$(python -c 'import random ; print(random.choice(["=0.56", "=0.57.0rc1", ""]))') - fi - if [[ ${{ matrix.os == 'windows-latest' }} == true && ( ${npver} == "=1.24" || ${numbaver} == "=0.57.0rc1" ) ]] ; then - # TODO: numba 0.57.0rc1 currently crashes sometimes on windows, so skip it for now - npver="" - numbaver="" + numbaver=$(python -c 'import random ; print(random.choice(["=0.56", "=0.57", ""]))') fi fmm=fast_matrix_market${fmmver} awkward=awkward${akver} @@ -254,7 +248,7 @@ jobs: fi echo "versions: np${npver} sp${spver} pd${pdver} ak${akver} nx${nxver} numba${numbaver} yaml${yamlver} sparse${sparsever} psgver${psgver}" - # TODO: remove `-c numba` when numba 0.57 is properly released + # TODO: remove `-c numba` when numba 0.57 is properly released on conda-forge $(command -v mamba || command -v conda) install -c numba packaging pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig tomli \ pyyaml${yamlver} ${sparse} pandas${pdver} scipy${spver} numpy${npver} ${awkward} \ networkx${nxver} ${numba} ${fmm} ${psg} \ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 426153fee..8f4fac317 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,8 +30,8 @@ repos: - id: validate-pyproject name: Validate pyproject.toml # I don't yet trust ruff to do what autoflake does - - repo: https://github.com/myint/autoflake - rev: v2.0.2 + - repo: https://github.com/PyCQA/autoflake + rev: v2.1.1 hooks: - id: autoflake args: [--in-place] @@ -43,7 +43,7 @@ repos: - id: isort # Let's keep `pyupgrade` even though `ruff --fix` probably does most of it - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.3.2 hooks: - id: pyupgrade args: [--py38-plus] @@ -58,7 +58,7 @@ repos: - id: black - id: black-jupyter - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.261 + rev: v0.0.264 hooks: - id: ruff args: [--fix-only, --show-fixes] @@ -86,7 +86,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.261 + rev: v0.0.264 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/docs/conf.py b/docs/conf.py index dc73c8304..3e1a8c85b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,7 @@ # The full version, including alpha/beta/rc tags # release = "1.3.2" # See: https://github.com/pypa/setuptools_scm/#usage-from-sphinx -from importlib.metadata import version # noqa: E402 isort: skip +from importlib.metadata import version # noqa: E402 isort:skip release = version("python-graphblas") del version diff --git a/graphblas/binary/__init__.py b/graphblas/binary/__init__.py index 68bab4d55..1b8985f73 100644 --- a/graphblas/binary/__init__.py +++ b/graphblas/binary/__init__.py @@ -1,6 +1,6 @@ # All items are dynamically added by classes in operator.py # This module acts as a container of BinaryOp instances -from ..core import _supports_udfs # isort:skip +from ..core import _supports_udfs _delayed = {} _delayed_commutes_to = { diff --git a/graphblas/core/agg.py b/graphblas/core/agg.py index 3418daffc..b9f1977ab 100644 --- a/graphblas/core/agg.py +++ b/graphblas/core/agg.py @@ -8,7 +8,7 @@ """ import warnings -from .operator.agg import * +from .operator.agg import * # pylint: disable=wildcard-import,unused-wildcard-import warnings.warn( "graphblas.core.agg namespace is deprecated; please use graphblas.core.operator.agg instead.", diff --git a/graphblas/core/operator/binary.py b/graphblas/core/operator/binary.py index 8d41a097e..406405a80 100644 --- a/graphblas/core/operator/binary.py +++ b/graphblas/core/operator/binary.py @@ -630,6 +630,34 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals """Register a BinaryOp without registering it in the ``graphblas.binary`` namespace. Because it is not registered in the namespace, the name is optional. + + Parameters + ---------- + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes two input parameters of any dtype and returns any dtype. + name : str, optional + The name of the operator. This *does not* show up as ``gb.binary.{name}``. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized function that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + Setting ``is_udt=True`` is also helpful when the left and right + dtypes need to be different. + + Returns + ------- + BinaryOp or ParameterizedBinaryOp """ cls._check_supports_udf("register_anonymous") if parameterized: @@ -638,19 +666,60 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals @classmethod def register_new(cls, name, func, *, parameterized=False, is_udt=False, lazy=False): - """Register a BinaryOp. The name will be used to identify the BinaryOp in the - ``graphblas.binary`` namespace. - - >>> def max_zero(x, y): - r = 0 - if x > r: - r = x - if y > r: - r = y - return r - >>> gb.core.operator.BinaryOp.register_new("max_zero", max_zero) - >>> dir(gb.binary) - [..., 'max_zero', ...] + """Register a new BinaryOp and save it to ``graphblas.binary`` namespace. + + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.binary.{name}``. + The name may contain periods, ".", which will result in nested objects + such as ``gb.binary.x.y.z`` for name ``"x.y.z"``. + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes two input parameters of any dtype and returns any dtype. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized function that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. See the ``user_isclose`` example below. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + Setting ``is_udt=True`` is also helpful when the left and right + dtypes need to be different. + lazy : bool, default False + If False (the default), then the function will be automatically + compiled for builtin types (unless ``is_udt`` is True). + Compiling functions can be slow, however, so you may want to + delay compilation and only compile when the operator is used, + which is done by setting ``lazy=True``. + + Examples + -------- + >>> def max_zero(x, y): + r = 0 + if x > r: + r = x + if y > r: + r = y + return r + >>> gb.core.operator.BinaryOp.register_new("max_zero", max_zero) + >>> dir(gb.binary) + [..., 'max_zero', ...] + + This is how ``gb.binary.isclose`` is defined: + + >>> def user_isclose(rel_tol=1e-7, abs_tol=0.0): + >>> def inner(x, y): + >>> return x == y or abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol) + >>> return inner + >>> gb.binary.register_new("user_isclose", user_isclose, parameterized=True) """ cls._check_supports_udf("register_new") module, funcname = cls._remove_nesting(name) diff --git a/graphblas/core/operator/indexunary.py b/graphblas/core/operator/indexunary.py index ad5d841d0..f6637ae6d 100644 --- a/graphblas/core/operator/indexunary.py +++ b/graphblas/core/operator/indexunary.py @@ -241,10 +241,42 @@ def _compile_udt(self, dtype, dtype2): @classmethod def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=False): - """Register an IndexUnaryOp without registering it in the - ``graphblas.indexunary`` namespace. + """Register a IndexUnary without registering it in the ``graphblas.indexunary`` namespace. Because it is not registered in the namespace, the name is optional. + + Parameters + ---------- + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes four input parameters--any dtype, int64, int64, + any dtype and returns any dtype. The first argument (any dtype) is + the value of the input Matrix or Vector, the second argument (int64) + is the row index of the Matrix or the index of the Vector, the third + argument (int64) is the column index of the Matrix or 0 for a Vector, + and the fourth argument (any dtype) is the value of the input Scalar. + name : str, optional + The name of the operator. This *does not* show up as ``gb.indexunary.{name}``. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized BinaryOp that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + Setting ``is_udt=True`` is also helpful when the left and right + dtypes need to be different. + + Returns + ------- + return IndexUnaryOp or ParameterizedIndexUnaryOp """ cls._check_supports_udf("register_anonymous") if parameterized: @@ -253,15 +285,53 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals @classmethod def register_new(cls, name, func, *, parameterized=False, is_udt=False, lazy=False): - """Register an IndexUnaryOp. The name will be used to identify the IndexUnaryOp in the - ``graphblas.indexunary`` namespace. + """Register a new IndexUnaryOp and save it to ``graphblas.indexunary`` namespace. If the return type is Boolean, the function will also be registered as a SelectOp - with the same name. - - >>> gb.indexunary.register_new("row_mod", lambda x, i, j, thunk: i % max(thunk, 2)) - >>> dir(gb.indexunary) - [..., 'row_mod', ...] + (and saved to ``grablas.select`` namespace) with the same name. + + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.indexunary.{name}``. + The name may contain periods, ".", which will result in nested objects + such as ``gb.indexunary.x.y.z`` for name ``"x.y.z"``. + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes four input parameters--any dtype, int64, int64, + any dtype and returns any dtype. The first argument (any dtype) is + the value of the input Matrix or Vector, the second argument (int64) + is the row index of the Matrix or the index of the Vector, the third + argument (int64) is the column index of the Matrix or 0 for a Vector, + and the fourth argument (any dtype) is the value of the input Scalar. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized BinaryOp that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + Setting ``is_udt=True`` is also helpful when the left and right + dtypes need to be different. + lazy : bool, default False + If False (the default), then the function will be automatically + compiled for builtin types (unless ``is_udt`` is True). + Compiling functions can be slow, however, so you may want to + delay compilation and only compile when the operator is used, + which is done by setting ``lazy=True``. + + Examples + -------- + >>> gb.indexunary.register_new("row_mod", lambda x, i, j, thunk: i % max(thunk, 2)) + >>> dir(gb.indexunary) + [..., 'row_mod', ...] """ cls._check_supports_udf("register_new") module, funcname = cls._remove_nesting(name) diff --git a/graphblas/core/operator/monoid.py b/graphblas/core/operator/monoid.py index 387652b63..fc327b4a7 100644 --- a/graphblas/core/operator/monoid.py +++ b/graphblas/core/operator/monoid.py @@ -269,22 +269,25 @@ def _compile_udt(self, dtype, dtype2): def register_anonymous(cls, binaryop, identity, name=None, *, is_idempotent=False): """Register a Monoid without registering it in the ``graphblas.monoid`` namespace. + A monoid is a binary operator whose inputs and output are the same dtype. Because it is not registered in the namespace, the name is optional. Parameters ---------- - binaryop : BinaryOp - Builtin or registered binary operator - identity : - Identity value of the monoid + binaryop: BinaryOp or ParameterizedBinaryOp + The binary operator of the monoid, which should be able to use the same + dtype for both inputs and the output. + identity: scalar or Mapping + The identity of the monoid such that ``op(x, identity) == x`` for any x. + ``identity`` may also be a mapping from dtype to scalar. name : str, optional - Name associated with the monoid + The name of the operator. This *does not* show up as ``gb.monoid.{name}``. is_idempotent : bool, default False Does ``op(x, x) == x`` for any x? Returns ------- - Function handle + Monoid or ParameterizedMonoid """ if type(binaryop) is ParameterizedBinaryOp: return ParameterizedMonoid( @@ -294,12 +297,36 @@ def register_anonymous(cls, binaryop, identity, name=None, *, is_idempotent=Fals @classmethod def register_new(cls, name, binaryop, identity, *, is_idempotent=False, lazy=False): - """Register a Monoid. The name will be used to identify the Monoid in the - ``graphblas.monoid`` namespace. + """Register a new Monoid and save it to ``graphblas.monoid`` namespace. - >>> gb.core.operator.Monoid.register_new("max_zero", gb.binary.max_zero, 0) - >>> dir(gb.monoid) - [..., 'max_zero', ...] + A monoid is a binary operator whose inputs and output are the same dtype. + + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.monoid.{name}``. + The name may contain periods, ".", which will result in nested objects + such as ``gb.monoid.x.y.z`` for name ``"x.y.z"``. + binaryop: BinaryOp or ParameterizedBinaryOp + The binary operator of the monoid, which should be able to use the same + dtype for both inputs and the output. + identity: scalar or Mapping + The identity of the monoid such that ``op(x, identity) == x`` for any x. + ``identity`` may also be a mapping from dtype to scalar. + is_idempotent : bool, default False + Does ``op(x, x) == x`` for any x? + lazy : bool, default False + If False (the default), then the function will be automatically + compiled for builtin types (unless ``is_udt`` was True for the binaryop). + Compiling functions can be slow, however, so you may want to + delay compilation and only compile when the operator is used, + which is done by setting ``lazy=True``. + + Examples + -------- + >>> gb.core.operator.Monoid.register_new("max_zero", gb.binary.max_zero, 0) + >>> dir(gb.monoid) + [..., 'max_zero', ...] """ module, funcname = cls._remove_nesting(name) if lazy: diff --git a/graphblas/core/operator/select.py b/graphblas/core/operator/select.py index 27567eb2f..4c9cd4639 100644 --- a/graphblas/core/operator/select.py +++ b/graphblas/core/operator/select.py @@ -125,6 +125,40 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals """Register a SelectOp without registering it in the ``graphblas.select`` namespace. Because it is not registered in the namespace, the name is optional. + The return type must be Boolean. + + Parameters + ---------- + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes four input parameters--any dtype, int64, int64, + any dtype and returns boolean. The first argument (any dtype) is + the value of the input Matrix or Vector, the second argument (int64) + is the row index of the Matrix or the index of the Vector, the third + argument (int64) is the column index of the Matrix or 0 for a Vector, + and the fourth argument (any dtype) is the value of the input Scalar. + name : str, optional + The name of the operator. This *does not* show up as ``gb.select.{name}``. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized BinaryOp that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + Setting ``is_udt=True`` is also helpful when the left and right + dtypes need to be different. + + Returns + ------- + SelectOp or ParameterizedSelectOp """ cls._check_supports_udf("register_anonymous") if parameterized: @@ -134,14 +168,53 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals @classmethod def register_new(cls, name, func, *, parameterized=False, is_udt=False, lazy=False): - """Register a SelectOp. The name will be used to identify the SelectOp in the - ``graphblas.select`` namespace. + """Register a new SelectOp and save it to ``graphblas.select`` namespace. The function will also be registered as a IndexUnaryOp with the same name. - - >>> gb.select.register_new("upper_left_triangle", lambda x, i, j, thunk: i + j <= thunk) - >>> dir(gb.select) - [..., 'upper_left_triangle', ...] + The return type must be Boolean. + + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.select.{name}``. + The name may contain periods, ".", which will result in nested objects + such as ``gb.select.x.y.z`` for name ``"x.y.z"``. + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes four input parameters--any dtype, int64, int64, + any dtype and returns boolean. The first argument (any dtype) is + the value of the input Matrix or Vector, the second argument (int64) + is the row index of the Matrix or the index of the Vector, the third + argument (int64) is the column index of the Matrix or 0 for a Vector, + and the fourth argument (any dtype) is the value of the input Scalar. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized BinaryOp that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + Setting ``is_udt=True`` is also helpful when the left and right + dtypes need to be different. + lazy : bool, default False + If False (the default), then the function will be automatically + compiled for builtin types (unless ``is_udt`` is True). + Compiling functions can be slow, however, so you may want to + delay compilation and only compile when the operator is used, + which is done by setting ``lazy=True``. + + Examples + -------- + >>> gb.select.register_new("upper_left_triangle", lambda x, i, j, thunk: i + j <= thunk) + >>> dir(gb.select) + [..., 'upper_left_triangle', ...] """ cls._check_supports_udf("register_new") iop = IndexUnaryOp.register_new( diff --git a/graphblas/core/operator/semiring.py b/graphblas/core/operator/semiring.py index ac716b9dd..035a1c43b 100644 --- a/graphblas/core/operator/semiring.py +++ b/graphblas/core/operator/semiring.py @@ -277,16 +277,16 @@ def register_anonymous(cls, monoid, binaryop, name=None): Parameters ---------- - monoid : Monoid - Builtin or registered monoid - binaryop : BinaryOp - Builtin or registered binary operator + monoid : Monoid or ParameterizedMonoid + The monoid of the semiring (like "plus" in the default "plus_times" semiring). + binaryop : BinaryOp or ParameterizedBinaryOp + The binaryop of the semiring (like "times" in the default "plus_times" semiring). name : str, optional - Name associated with the semiring + The name of the operator. This *does not* show up as ``gb.semiring.{name}``. Returns ------- - Function handle + Semiring or ParameterizedSemiring """ if type(monoid) is ParameterizedMonoid or type(binaryop) is ParameterizedBinaryOp: return ParameterizedSemiring(name, monoid, binaryop, anonymous=True) @@ -294,12 +294,30 @@ def register_anonymous(cls, monoid, binaryop, name=None): @classmethod def register_new(cls, name, monoid, binaryop, *, lazy=False): - """Register a Semiring. The name will be used to identify the Semiring in the - ``graphblas.semiring`` namespace. + """Register a new Semiring and save it to ``graphblas.semiring`` namespace. - >>> gb.core.operator.Semiring.register_new("max_max", gb.monoid.max, gb.binary.max) - >>> dir(gb.semiring) - [..., 'max_max', ...] + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.semiring.{name}``. + The name may contain periods, ".", which will result in nested objects + such as ``gb.semiring.x.y.z`` for name ``"x.y.z"``. + monoid : Monoid or ParameterizedMonoid + The monoid of the semiring (like "plus" in the default "plus_times" semiring). + binaryop : BinaryOp or ParameterizedBinaryOp + The binaryop of the semiring (like "times" in the default "plus_times" semiring). + lazy : bool, default False + If False (the default), then the function will be automatically + compiled for builtin types (unless ``is_udt`` is True). + Compiling functions can be slow, however, so you may want to + delay compilation and only compile when the operator is used, + which is done by setting ``lazy=True``. + + Examples + -------- + >>> gb.core.operator.Semiring.register_new("max_max", gb.monoid.max, gb.binary.max) + >>> dir(gb.semiring) + [..., 'max_max', ...] """ module, funcname = cls._remove_nesting(name) if lazy: diff --git a/graphblas/core/operator/unary.py b/graphblas/core/operator/unary.py index 1432a9387..a02445836 100644 --- a/graphblas/core/operator/unary.py +++ b/graphblas/core/operator/unary.py @@ -276,6 +276,32 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals """Register a UnaryOp without registering it in the ``graphblas.unary`` namespace. Because it is not registered in the namespace, the name is optional. + + Parameters + ---------- + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes one input parameters of any dtype and returns any dtype. + name : str, optional + The name of the operator. This *does not* show up as ``gb.unary.{name}``. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized function that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. See the ``user_isclose`` example below. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + + Returns + ------- + UnaryOp or ParameterizedUnaryOp """ cls._check_supports_udf("register_anonymous") if parameterized: @@ -284,12 +310,43 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals @classmethod def register_new(cls, name, func, *, parameterized=False, is_udt=False, lazy=False): - """Register a UnaryOp. The name will be used to identify the UnaryOp in the - ``graphblas.unary`` namespace. - - >>> gb.core.operator.UnaryOp.register_new("plus_one", lambda x: x + 1) - >>> dir(gb.unary) - [..., 'plus_one', ...] + """Register a new UnaryOp and save it to ``graphblas.unary`` namespace. + + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.unary.{name}``. + The name may contain periods, ".", which will result in nested objects + such as ``gb.unary.x.y.z`` for name ``"x.y.z"``. + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes one input parameters of any dtype and returns any dtype. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + For example, ``gb.binary.isclose`` is a parameterized function that + optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it + can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``. + When creating a parameterized user-defined operator, the ``func`` + parameter must be a callable that *returns* a function that will + then get compiled. See the ``user_isclose`` example below. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + If True, then the function will not be automatically compiled for + builtin types, and it will be compiled "just in time" when used. + lazy : bool, default False + If False (the default), then the function will be automatically + compiled for builtin types (unless ``is_udt`` is True). + Compiling functions can be slow, however, so you may want to + delay compilation and only compile when the operator is used, + which is done by setting ``lazy=True``. + + Examples + -------- + >>> gb.core.operator.UnaryOp.register_new("plus_one", lambda x: x + 1) + >>> dir(gb.unary) + [..., 'plus_one', ...] """ cls._check_supports_udf("register_new") module, funcname = cls._remove_nesting(name) diff --git a/pyproject.toml b/pyproject.toml index a3a5b8276..245dc35bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ dependencies = [ # These won't be installed by default after 2024.3.0 # Use e.g. "python-graphblas[suitesparse]" or "python-graphblas[default]" instead "suitesparse-graphblas >=7.4.0.0, <7.5", - "numba >=0.55; python_version<'3.11'", # make optional where numba is not supported + "numba >=0.55; python_version<'3.12'", # make optional where numba is not supported ] [project.urls] @@ -97,9 +97,9 @@ repr = [ ] io = [ "python-graphblas[networkx,scipy]", - "python-graphblas[numba]; python_version<'3.11'", + "python-graphblas[numba]; python_version<'3.12'", "awkward >=1.9", - "sparse >=0.13; python_version<'3.11'", # make optional, b/c sparse needs numba + "sparse >=0.13; python_version<'3.12'", # make optional, b/c sparse needs numba "fast-matrix-market >=1.4.5", ] viz = [ @@ -119,11 +119,11 @@ test = [ ] default = [ "python-graphblas[suitesparse,pandas,scipy]", - "python-graphblas[numba]; python_version<'3.11'", # make optional where numba is not supported + "python-graphblas[numba]; python_version<'3.12'", # make optional where numba is not supported ] -complete = [ +all = [ "python-graphblas[default,io,viz,test]", - "python-graphblas[datashade]; python_version<'3.11'", # make optional, b/c datashade needs numba + "python-graphblas[datashade]; python_version<'3.12'", # make optional, b/c datashade needs numba ] [tool.setuptools] @@ -321,6 +321,8 @@ ignore = [ "RET504", # Unnecessary variable assignment before `return` statement "S110", # `try`-`except`-`pass` detected, consider logging the exception (Note: good advice, but we don't log) "S112", # `try`-`except`-`continue` detected, consider logging the exception (Note: good advice, but we don't log) + "S603", # `subprocess` call: check for execution of untrusted input (Note: not important for us) + "S607", # Starting a process with a partial executable path (Note: not important for us) "SIM102", # Use a single `if` statement instead of nested `if` statements (Note: often necessary) "SIM105", # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster) "SIM108", # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) @@ -349,6 +351,7 @@ ignore = [ "graphblas/core/operator/base.py" = ["S102"] # exec is used for UDF "graphblas/core/ss/matrix.py" = ["NPY002"] # numba doesn't support rng generator yet "graphblas/core/ss/vector.py" = ["NPY002"] # numba doesn't support rng generator yet +"graphblas/core/utils.py" = ["PLE0302"] # `__set__` is used as a property "graphblas/ss/_core.py" = ["N999"] # We want _core.py to be underscopre # Allow useless expressions, assert, pickle, RNG, print, no docstring, and yoda in tests "graphblas/tests/*py" = ["B018", "S101", "S301", "S311", "T201", "D103", "D100", "SIM300"] @@ -358,6 +361,7 @@ ignore = [ "scripts/create_pickle.py" = ["F403", "F405"] # Allow `from foo import *` "docs/*.py" = ["INP001"] # Not a package + [tool.ruff.flake8-builtins] builtins-ignorelist = ["copyright", "format", "min", "max"] diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index 026f3a656..3809eb805 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -3,11 +3,11 @@ # Use, adjust, copy/paste, etc. as necessary to answer your questions. # This may be helpful when updating dependency versions in CI. # Tip: add `--json` for more information. -conda search 'numpy[channel=conda-forge]>=1.24.2' -conda search 'pandas[channel=conda-forge]>=2.0.0' +conda search 'numpy[channel=conda-forge]>=1.24.3' +conda search 'pandas[channel=conda-forge]>=2.0.1' conda search 'scipy[channel=conda-forge]>=1.10.1' conda search 'networkx[channel=conda-forge]>=3.1' -conda search 'awkward[channel=conda-forge]>=2.1.2' +conda search 'awkward[channel=conda-forge]>=2.1.4' conda search 'sparse[channel=conda-forge]>=0.14.0' conda search 'fast_matrix_market[channel=conda-forge]>=1.5.1' conda search 'numba[channel=conda-forge]>=0.56.4'