diff --git a/numpy/lib/_npyio_impl.py b/numpy/lib/_npyio_impl.py index f0d1bb2b0c68..4dc3a4b9b7e2 100644 --- a/numpy/lib/_npyio_impl.py +++ b/numpy/lib/_npyio_impl.py @@ -1084,7 +1084,7 @@ def _read(fname, *, delimiter=',', comment='#', quote='"', # be adapted (in principle the concatenate could cast). chunks.append(next_arr.astype(read_dtype_via_object_chunks)) - skiprows = 0 # Only have to skip for first chunk + skiplines = 0 # Only have to skip for first chunk if max_rows >= 0: max_rows -= chunk_size if len(next_arr) < chunk_size: diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py index 116cd1608da3..60717be3bd9a 100644 --- a/numpy/lib/tests/test_loadtxt.py +++ b/numpy/lib/tests/test_loadtxt.py @@ -1073,3 +1073,28 @@ def test_maxrows_exceeding_chunksize(nmax): res = np.loadtxt(fname, dtype=str, delimiter=" ", max_rows=nmax) os.remove(fname) assert len(res) == nmax + +@pytest.mark.parametrize("nskip", (0, 10000, 12345, 50000, 67891, 100000)) +def test_skiprow_exceeding_maxrows_exceeding_chunksize(tmpdir, nskip): + # tries to read a file in chunks by skipping a variable amount of lines, + # less, equal, greater than max_rows + file_length = 110000 + data = "\n".join(f"{i} a 0.5 1" for i in range(1, file_length + 1)) + expected_length = min(60000, file_length - nskip) + expected = np.arange(nskip + 1, nskip + 1 + expected_length).astype(str) + + # file-like path + txt = StringIO(data) + res = np.loadtxt(txt, dtype='str', delimiter=" ", skiprows=nskip, max_rows=60000) + assert len(res) == expected_length + # are the right lines read in res? + assert_array_equal(expected, res[:, 0]) + + # file-obj path + tmp_file = tmpdir / "test_data.txt" + tmp_file.write(data) + fname = str(tmp_file) + res = np.loadtxt(fname, dtype='str', delimiter=" ", skiprows=nskip, max_rows=60000) + assert len(res) == expected_length + # are the right lines read in res? + assert_array_equal(expected, res[:, 0])