Skip to content

Commit 92ecbaf

Browse files
committed
MAINT: Reinstante the CONTIG flag (as buggy as it was)
1 parent d09be24 commit 92ecbaf

File tree

4 files changed

+110
-10
lines changed

4 files changed

+110
-10
lines changed

numpy/_core/src/multiarray/nditer_api.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,6 +1515,8 @@ NpyIter_DebugPrint(NpyIter *iter)
15151515
printf("WRITEMASKED ");
15161516
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUF_SINGLESTRIDE)
15171517
printf("BUF_SINGLESTRIDE ");
1518+
if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CONTIG)
1519+
printf("CONTIG ");
15181520
printf("\n");
15191521
}
15201522
printf("|\n");
@@ -1532,9 +1534,9 @@ NpyIter_DebugPrint(NpyIter *iter)
15321534
if (itflags&NPY_ITFLAG_REDUCE) {
15331535
printf("| REDUCE Pos: %d\n",
15341536
(int)NBF_REDUCE_POS(bufferdata));
1535-
printf("| BUFFER Reduce outersize: %d\n",
1537+
printf("| REDUCE OuterSize: %d\n",
15361538
(int)NBF_REDUCE_OUTERSIZE(bufferdata));
1537-
printf("| BUFFER OuterDim: %d\n",
1539+
printf("| REDUCE OuterDim: %d\n",
15381540
(int)NBF_OUTERDIM(bufferdata));
15391541
}
15401542
printf("| Strides: ");
@@ -1894,12 +1896,17 @@ npyiter_fill_buffercopy_params(
18941896
}
18951897

18961898
if (opitflags & NPY_OP_ITFLAG_BUF_SINGLESTRIDE) {
1897-
/* Flatten the copy into a single stride. */
18981899
*ndim_transfer = 1;
18991900
*op_shape = op_transfersize;
19001901
assert(**op_coords == 0); /* initialized by caller currently */
19011902
*op_strides = &NAD_STRIDES(axisdata)[iop];
1902-
if ((*op_strides)[0] == 0) {
1903+
if ((*op_strides)[0] == 0 && (
1904+
!(opitflags & NPY_OP_ITFLAG_CONTIG) ||
1905+
(opitflags & NPY_OP_ITFLAG_WRITE))) {
1906+
/*
1907+
* If the user didn't force contig, optimize single element.
1908+
* (Unless CONTIG was requested and this is not a write/reduce!)
1909+
*/
19031910
*op_transfersize = 1;
19041911
*buf_stride = 0;
19051912
}

numpy/_core/src/multiarray/nditer_constr.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,10 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags)
10101010
*op_itflags |= NPY_OP_ITFLAG_VIRTUAL;
10111011
}
10121012

1013+
if (op_flags & NPY_ITER_CONTIG) {
1014+
*op_itflags |= NPY_OP_ITFLAG_CONTIG;
1015+
}
1016+
10131017
return 1;
10141018
}
10151019

@@ -2175,6 +2179,11 @@ npyiter_find_buffering_setup(NpyIter *iter)
21752179
npy_bool is_reduce_op;
21762180
npy_bool op_is_buffered = (op_itflags[iop]&NPY_OP_ITFLAG_CAST) != 0;
21772181

2182+
/* If contig was requested and this is not writeable avoid zero strides */
2183+
npy_bool avoid_zero_strides = (
2184+
(op_itflags[iop] & NPY_OP_ITFLAG_CONTIG) != 0
2185+
&& !(op_itflags[iop] & NPY_OP_ITFLAG_WRITE));
2186+
21782187
/*
21792188
* Figure out if this is iterated as a reduce op. Even one marked
21802189
* for reduction may not be iterated as one.
@@ -2189,16 +2198,19 @@ npyiter_find_buffering_setup(NpyIter *iter)
21892198
else if (op_single_stride_dims[iop] == best_dim && !op_is_buffered) {
21902199
/*
21912200
* Optimization: This operand is not buffered and we might as well
2192-
* iterate it as an unbuffered reduce operand (if not yet buffered).
2201+
* iterate it as an unbuffered reduce operand.
21932202
*/
21942203
is_reduce_op = 1;
21952204
}
21962205
else if (NAD_STRIDES(reduce_axisdata)[iop] == 0
2197-
&& op_single_stride_dims[iop] <= best_dim) {
2206+
&& op_single_stride_dims[iop] <= best_dim
2207+
&& !avoid_zero_strides) {
21982208
/*
21992209
* Optimization: If the outer (reduce) stride is 0 on the operand
22002210
* then we can iterate this in a reduce way: buffer the core only
22012211
* and repeat it in the "outer" dimension.
2212+
* If user requested contig, we may have to avoid 0 strides, this
2213+
* is incompatible with the reduce path.
22022214
*/
22032215
is_reduce_op = 1;
22042216
}
@@ -2229,7 +2241,8 @@ npyiter_find_buffering_setup(NpyIter *iter)
22292241
*/
22302242
if (!is_reduce_op
22312243
&& NIT_OPITFLAGS(iter)[iop] & NPY_OP_ITFLAG_BUF_SINGLESTRIDE
2232-
&& NAD_STRIDES(axisdata)[iop] == 0) {
2244+
&& NAD_STRIDES(axisdata)[iop] == 0
2245+
&& !avoid_zero_strides) {
22332246
/* This op is always 0 strides, so even the buffer is that. */
22342247
inner_stride = 0;
22352248
reduce_outer_stride = 0;
@@ -3310,11 +3323,16 @@ npyiter_allocate_arrays(NpyIter *iter,
33103323
}
33113324

33123325
/* Here we can finally check for contiguous iteration */
3313-
if (op_flags[iop] & NPY_ITER_CONTIG) {
3326+
if (op_itflags[iop] & NPY_OP_ITFLAG_CONTIG) {
33143327
NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
33153328
npy_intp stride = NAD_STRIDES(axisdata)[iop];
33163329

33173330
if (stride != op_dtype[iop]->elsize) {
3331+
/*
3332+
* Need to copy to buffer (cast) to ensure contiguous
3333+
* NOTE: This is the wrong place in case of axes reorder
3334+
* (there is an xfailing test for this).
3335+
*/
33183336
NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
33193337
"because of NPY_ITER_CONTIG\n");
33203338
op_itflags[iop] |= NPY_OP_ITFLAG_CAST;

numpy/_core/src/multiarray/nditer_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@
139139
#define NPY_OP_ITFLAG_FORCECOPY 0x0200
140140
/* The operand has temporary data, write it back at dealloc */
141141
#define NPY_OP_ITFLAG_HAS_WRITEBACK 0x0400
142+
/* Whether the user request a contiguous operand */
143+
#define NPY_OP_ITFLAG_CONTIG 0x0800
142144

143145
/*
144146
* The data layout of the iterator is fully specified by

numpy/_core/tests/test_nditer.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,6 +2320,79 @@ def test_iter_buffering_growinner():
23202320
assert_equal(i[0].size, a.size)
23212321

23222322

2323+
@pytest.mark.parametrize("read_or_readwrite", ["readonly", "readwrite"])
2324+
def test_iter_contig_flag_reduce_error(read_or_readwrite):
2325+
# Test that a non-contiguous operand is rejected without buffering.
2326+
# NOTE: This is true even for a reduction, where we return a 0-stride
2327+
# below!
2328+
with pytest.raises(TypeError, match="Iterator operand required buffering"):
2329+
it = np.nditer(
2330+
(np.zeros(()),), flags=["external_loop", "reduce_ok"],
2331+
op_flags=[(read_or_readwrite, "contig"),], itershape=(10,))
2332+
2333+
2334+
@pytest.mark.parametrize("arr", [
2335+
lambda: np.zeros(()),
2336+
lambda: np.zeros((20, 1))[::20],
2337+
lambda: np.zeros((1, 20))[:, ::20]
2338+
])
2339+
def test_iter_contig_flag_single_operand_strides(arr):
2340+
"""
2341+
Tests the strides with the contig flag for both broadcast and non-broadcast
2342+
operands in 3 cases where the logic is needed:
2343+
1. When everything has a zero stride, the broadcast op needs to repeated
2344+
2. When the reduce axis is the last axis (first to iterate).
2345+
3. When the reduce axis is the first axis (last to iterate).
2346+
2347+
NOTE: The semantics of the cast flag are not clearly defined when
2348+
it comes to reduction. It is unclear that there are any users.
2349+
"""
2350+
first_op = np.ones((10, 10))
2351+
broadcast_op = arr()
2352+
red_op = arr()
2353+
# Add a first operand to ensure no axis-reordering and the result shape.
2354+
iterator = np.nditer(
2355+
(first_op, broadcast_op, red_op),
2356+
flags=["external_loop", "reduce_ok", "buffered", "delay_bufalloc"],
2357+
op_flags=[("readonly", "contig")] * 2 + [("readwrite", "contig")])
2358+
2359+
with iterator:
2360+
iterator.reset()
2361+
try:
2362+
for f, b, r in iterator:
2363+
# The first operand is contigouos, we should have a view
2364+
assert np.shares_memory(f, first_op)
2365+
# Although broadcast, the second op always has a contiguous stride
2366+
assert b.strides[0] == 8
2367+
assert not np.shares_memory(b, broadcast_op)
2368+
# The reduction has a contiguous stride or a 0 stride
2369+
if red_op.ndim == 0 or red_op.shape[-1] == 1:
2370+
assert r.strides[0] == 0
2371+
else:
2372+
# The stride is 8, although it was not originally:
2373+
assert r.strides[0] == 8
2374+
# If the reduce stride is 0, buffering makes no difference, but we
2375+
# do it anyway right now:
2376+
assert not np.shares_memory(r, red_op)
2377+
finally:
2378+
iterator.debug_print()
2379+
2380+
2381+
@pytest.mark.xfail(reason="The contig flag was always buggy.")
2382+
def test_iter_contig_flag_incorrect():
2383+
# This case does the wrong thing...
2384+
iterator = np.nditer(
2385+
(np.ones((10, 10)).T, np.ones((1, 10))),
2386+
flags=["external_loop", "reduce_ok", "buffered", "delay_bufalloc"],
2387+
op_flags=[("readonly", "contig")] * 2)
2388+
2389+
with iterator:
2390+
iterator.reset()
2391+
for a, b in iterator:
2392+
assert a.strides == 8
2393+
assert b.strides == 8 # should be 8 but is 0 due to axis reorder
2394+
2395+
23232396
@pytest.mark.slow
23242397
def test_iter_buffered_reduce_reuse():
23252398
# large enough array for all views, including negative strides.
@@ -3324,8 +3397,8 @@ def test_debug_print(capfd):
33243397
| BufIterEnd: 5
33253398
| BUFFER CoreSize: 5
33263399
| REDUCE Pos: 0
3327-
| BUFFER Reduce outersize: 10
3328-
| BUFFER OuterDim: 1
3400+
| REDUCE OuterSize: 10
3401+
| REDUCE OuterDim: 1
33293402
| Strides: 8 4
33303403
| Ptrs:
33313404
| REDUCE Outer Strides: 40 0

0 commit comments

Comments
 (0)