From 363135470f2ea68950117cdf2469ac961430baa0 Mon Sep 17 00:00:00 2001
From: Giovanni Del Monte <giovanni.delmonte90@gmail.com>
Date: Tue, 18 Feb 2025 15:48:48 +0100
Subject: [PATCH] BUG: numpy.loadtxt reads only 50000 lines when skip_rows >=
 max_rows (#28319)

* fixed bug in function _read in numpy/lib/_npyio_impl.py, misnamed variable skiplines as skiprows; added test in numpy/lib/tests/test_loadtxt.py

* fixed sintax in test_loadtxt.py

* changed use of mkstemp with use of tmpdir provided by pytest

* fixed bug in use of tmpdir in loadtxt test

* Update numpy/lib/tests/test_loadtxt.py

Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>

* Update file numpy/lib/tests/test_loadtxt.py

* Update file numpy/lib/tests/test_loadtxt.py

* Update numpy/lib/tests/test_loadtxt.py

---------

Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
---
 numpy/lib/_npyio_impl.py        |  2 +-
 numpy/lib/tests/test_loadtxt.py | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/numpy/lib/_npyio_impl.py b/numpy/lib/_npyio_impl.py
index f0d1bb2b0c68..4dc3a4b9b7e2 100644
--- a/numpy/lib/_npyio_impl.py
+++ b/numpy/lib/_npyio_impl.py
@@ -1084,7 +1084,7 @@ def _read(fname, *, delimiter=',', comment='#', quote='"',
                 # be adapted (in principle the concatenate could cast).
                 chunks.append(next_arr.astype(read_dtype_via_object_chunks))
 
-                skiprows = 0  # Only have to skip for first chunk
+                skiplines = 0  # Only have to skip for first chunk
                 if max_rows >= 0:
                     max_rows -= chunk_size
                 if len(next_arr) < chunk_size:
diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py
index 116cd1608da3..60717be3bd9a 100644
--- a/numpy/lib/tests/test_loadtxt.py
+++ b/numpy/lib/tests/test_loadtxt.py
@@ -1073,3 +1073,28 @@ def test_maxrows_exceeding_chunksize(nmax):
     res = np.loadtxt(fname, dtype=str, delimiter=" ", max_rows=nmax)
     os.remove(fname)
     assert len(res) == nmax
+
+@pytest.mark.parametrize("nskip", (0, 10000, 12345, 50000, 67891, 100000))
+def test_skiprow_exceeding_maxrows_exceeding_chunksize(tmpdir, nskip):
+    # tries to read a file in chunks by skipping a variable amount of lines,
+    # less, equal, greater than max_rows
+    file_length = 110000
+    data = "\n".join(f"{i} a 0.5 1" for i in range(1, file_length + 1))
+    expected_length = min(60000, file_length - nskip)
+    expected = np.arange(nskip + 1, nskip + 1 + expected_length).astype(str)
+
+    # file-like path
+    txt = StringIO(data)
+    res = np.loadtxt(txt, dtype='str', delimiter=" ", skiprows=nskip, max_rows=60000)
+    assert len(res) == expected_length
+    # are the right lines read in res?
+    assert_array_equal(expected, res[:, 0])
+
+    # file-obj path
+    tmp_file = tmpdir / "test_data.txt"
+    tmp_file.write(data)
+    fname = str(tmp_file)
+    res = np.loadtxt(fname, dtype='str', delimiter=" ", skiprows=nskip, max_rows=60000)
+    assert len(res) == expected_length
+    # are the right lines read in res?
+    assert_array_equal(expected, res[:, 0])