Skip to content

BUG: numpy.loadtxt reads only 50000 lines when skip_rows >= max_rows #28379

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion numpy/lib/_npyio_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1084,7 +1084,7 @@ def _read(fname, *, delimiter=',', comment='#', quote='"',
# be adapted (in principle the concatenate could cast).
chunks.append(next_arr.astype(read_dtype_via_object_chunks))

skiprows = 0 # Only have to skip for first chunk
skiplines = 0 # Only have to skip for first chunk
if max_rows >= 0:
max_rows -= chunk_size
if len(next_arr) < chunk_size:
Expand Down
25 changes: 25 additions & 0 deletions numpy/lib/tests/test_loadtxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,3 +1073,28 @@ def test_maxrows_exceeding_chunksize(nmax):
res = np.loadtxt(fname, dtype=str, delimiter=" ", max_rows=nmax)
os.remove(fname)
assert len(res) == nmax

@pytest.mark.parametrize("nskip", (0, 10000, 12345, 50000, 67891, 100000))
def test_skiprow_exceeding_maxrows_exceeding_chunksize(tmpdir, nskip):
# tries to read a file in chunks by skipping a variable amount of lines,
# less, equal, greater than max_rows
file_length = 110000
data = "\n".join(f"{i} a 0.5 1" for i in range(1, file_length + 1))
expected_length = min(60000, file_length - nskip)
expected = np.arange(nskip + 1, nskip + 1 + expected_length).astype(str)

# file-like path
txt = StringIO(data)
res = np.loadtxt(txt, dtype='str', delimiter=" ", skiprows=nskip, max_rows=60000)
assert len(res) == expected_length
# are the right lines read in res?
assert_array_equal(expected, res[:, 0])

# file-obj path
tmp_file = tmpdir / "test_data.txt"
tmp_file.write(data)
fname = str(tmp_file)
res = np.loadtxt(fname, dtype='str', delimiter=" ", skiprows=nskip, max_rows=60000)
assert len(res) == expected_length
# are the right lines read in res?
assert_array_equal(expected, res[:, 0])
Loading