diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 81a846bf6d96..cb8265dd3d7a 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -270,6 +270,15 @@ as_pystring(PyObject *scalar, int coerce) "string coercion is disabled."); return NULL; } + else if (scalar_type == &PyBytes_Type) { + // assume UTF-8 encoding + char *buffer; + Py_ssize_t length; + if (PyBytes_AsStringAndSize(scalar, &buffer, &length) < 0) { + return NULL; + } + return PyUnicode_FromStringAndSize(buffer, length); + } else { // attempt to coerce to str scalar = PyObject_Str(scalar); diff --git a/numpy/_core/tests/test_stringdtype.py b/numpy/_core/tests/test_stringdtype.py index ad4276f40a3e..29b52b27afe8 100644 --- a/numpy/_core/tests/test_stringdtype.py +++ b/numpy/_core/tests/test_stringdtype.py @@ -190,10 +190,14 @@ def test_array_creation_utf8(dtype, data): ], ) def test_scalars_string_conversion(data, dtype): + try: + str_vals = [str(d.decode('utf-8')) for d in data] + except AttributeError: + str_vals = [str(d) for d in data] if dtype.coerce: assert_array_equal( np.array(data, dtype=dtype), - np.array([str(d) for d in data], dtype=dtype), + np.array(str_vals, dtype=dtype), ) else: with pytest.raises(ValueError): @@ -284,6 +288,14 @@ def test_bytes_casts(self, dtype, strings): barr = np.array(utf8_bytes, dtype=bytes_dtype) assert_array_equal(barr, sarr.astype(bytes_dtype)) assert_array_equal(barr.astype(dtype), sarr) + if dtype.coerce: + barr = np.array(utf8_bytes, dtype=dtype) + assert_array_equal(barr, sarr) + barr = np.array(utf8_bytes, dtype="O") + assert_array_equal(barr.astype(dtype), sarr) + else: + with pytest.raises(ValueError): + np.array(utf8_bytes, dtype=dtype) except UnicodeEncodeError: with pytest.raises(UnicodeEncodeError): sarr.astype("S20")