From e24d40e6cff6c3ed947f3b425b3101abc641f842 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 24 Mar 2025 16:05:33 +0100 Subject: [PATCH 01/27] gh-129813: Add PyBytesWriter C API (flavor with size) Add functions: * PyBytesWriter_Create() * PyBytesWriter_Discard() * PyBytesWriter_Finish() * PyBytesWriter_FinishWithSize() * PyBytesWriter_FinishWithEndPointer() * PyBytesWriter_Data() * PyBytesWriter_Allocated() * PyBytesWriter_SetSize() * PyBytesWriter_Resize() --- Include/cpython/bytesobject.h | 30 +++ Include/internal/pycore_freelist_state.h | 2 + Modules/_pickle.c | 50 ++-- Modules/_struct.c | 13 +- Modules/binascii.c | 54 ++-- Objects/bytesobject.c | 327 ++++++++++++++++++----- Objects/object.c | 1 + 7 files changed, 359 insertions(+), 118 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 71c133f173f157..ed2d835913bb51 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -40,3 +40,33 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable) { return PyBytes_Join(sep, iterable); } + + +// --- PyBytesWriter API ----------------------------------------------------- + +typedef struct PyBytesWriter PyBytesWriter; + +PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create( + Py_ssize_t alloc); +PyAPI_FUNC(void) PyBytesWriter_Discard( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( + PyBytesWriter *writer); +PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithSize( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithEndPointer( + PyBytesWriter *writer, + void *data); + +PyAPI_FUNC(void*) PyBytesWriter_Data( + PyBytesWriter *writer); +PyAPI_FUNC(Py_ssize_t) PyBytesWriter_Allocated( + PyBytesWriter *writer); + +PyAPI_FUNC(int) PyBytesWriter_SetSize( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_Resize( + PyBytesWriter *writer, + Py_ssize_t alloc); diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 7c252f5b570c13..50c8e04c761e03 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -24,6 +24,7 @@ extern "C" { # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 # define Py_unicode_writers_MAXFREELIST 1 +# define Py_bytes_writers_MAXFREELIST 1 # define Py_pymethodobjects_MAXFREELIST 20 // A generic freelist of either PyObjects or other data structures. @@ -53,6 +54,7 @@ struct _Py_freelists { struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; struct _Py_freelist unicode_writers; + struct _Py_freelist bytes_writers; struct _Py_freelist pymethodobjects; }; diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d260f1a68f8c70..60cd822c6f8ed6 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2615,31 +2615,31 @@ save_picklebuffer(PickleState *st, PicklerObject *self, PyObject *obj) static PyObject * raw_unicode_escape(PyObject *obj) { - char *p; - Py_ssize_t i, size; - const void *data; - int kind; - _PyBytesWriter writer; + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + const void *data = PyUnicode_DATA(obj); + int kind = PyUnicode_KIND(obj); - _PyBytesWriter_Init(&writer); - - size = PyUnicode_GET_LENGTH(obj); - data = PyUnicode_DATA(obj); - kind = PyUnicode_KIND(obj); - - p = _PyBytesWriter_Alloc(&writer, size); - if (p == NULL) - goto error; - writer.overallocate = 1; + Py_ssize_t alloc = size; + PyBytesWriter *writer = PyBytesWriter_Create(alloc); + if (writer == NULL) { + return NULL; + } + char *p = PyBytesWriter_Data(writer); - for (i=0; i < size; i++) { + for (Py_ssize_t i=0; i < size; i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 10-1); - if (p == NULL) + alloc += 10-1; + Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer); + if (PyBytesWriter_SetSize(writer, pos) < 0) { goto error; + } + if (PyBytesWriter_Resize(writer, alloc) < 0) { + goto error; + } + p = (char*)PyBytesWriter_Data(writer) + pos; *p++ = '\\'; *p++ = 'U'; @@ -2658,9 +2658,15 @@ raw_unicode_escape(PyObject *obj) ch == 0x1a) { /* -1: subtract 1 preallocated byte */ - p = _PyBytesWriter_Prepare(&writer, p, 6-1); - if (p == NULL) + alloc += 6-1; + Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer); + if (PyBytesWriter_SetSize(writer, pos) < 0) { goto error; + } + if (PyBytesWriter_Resize(writer, alloc) < 0) { + goto error; + } + p = (char*)PyBytesWriter_Data(writer) + pos; *p++ = '\\'; *p++ = 'u'; @@ -2674,10 +2680,10 @@ raw_unicode_escape(PyObject *obj) *p++ = (char) ch; } - return _PyBytesWriter_Finish(&writer, p); + return PyBytesWriter_FinishWithEndPointer(writer, p); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/_struct.c b/Modules/_struct.c index f04805d9d6d1d7..b6f71eeef17198 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2272,7 +2272,6 @@ strings."); static PyObject * s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { - char *buf; PyStructObject *soself; _structmodulestate *state = get_struct_state_structinst(self); @@ -2288,21 +2287,19 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } /* Allocate a new string */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - buf = _PyBytesWriter_Alloc(&writer, soself->s_size); - if (buf == NULL) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter *writer = PyBytesWriter_Create(soself->s_size); + if (writer == NULL) { return NULL; } + char *buf = PyBytesWriter_Data(writer); /* Call the guts */ if ( s_pack_internal(soself, args, 0, buf, state) != 0 ) { - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } - return _PyBytesWriter_Finish(&writer, buf + soself->s_size); + return PyBytesWriter_FinishWithSize(writer, soself->s_size); } PyDoc_STRVAR(s_pack_into__doc__, diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..7954198245972b 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -302,16 +302,13 @@ static PyObject * binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) /*[clinic end generated code: output=b1b99de62d9bbeb8 input=beb27822241095cd]*/ { - unsigned char *ascii_data; const unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; binascii_state *state; - Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; + Py_ssize_t bin_len; - _PyBytesWriter_Init(&writer); bin_data = data->buf; bin_len = data->len; if ( bin_len > 45 ) { @@ -325,10 +322,12 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } /* We're lazy and allocate to much (fixed up later) */ - out_len = 2 + (bin_len + 2) / 3 * 4; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + Py_ssize_t out_len = 2 + (bin_len + 2) / 3 * 4; + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { return NULL; + } + unsigned char *ascii_data = PyBytesWriter_Data(writer); /* Store the length */ if (backtick && !bin_len) @@ -356,7 +355,7 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + return PyBytesWriter_FinishWithEndPointer(writer, ascii_data); } /*[clinic input] @@ -387,12 +386,11 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) /* Allocate the buffer */ Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ - _PyBytesWriter writer; - _PyBytesWriter_Init(&writer); - unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len); - if (bin_data == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); + if (writer == NULL) { return NULL; - unsigned char *bin_data_start = bin_data; + } + unsigned char *bin_data = PyBytesWriter_Data(writer); if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') { state = get_binascii_state(module); @@ -488,12 +486,14 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) state = get_binascii_state(module); if (state == NULL) { /* error already set, from get_binascii_state */ + assert(PyErr_Occurred()); } else if (quad_pos == 1) { /* ** There is exactly one extra valid, non-padding, base64 character. ** This is an invalid length, as there is no possible input that ** could encoded into such a base64 string. */ + unsigned char *bin_data_start = PyBytesWriter_Data(writer); PyErr_Format(state->Error, "Invalid base64-encoded string: " "number of data characters (%zd) cannot be 1 more " @@ -502,13 +502,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) } else { PyErr_SetString(state->Error, "Incorrect padding"); } - error_end: - _PyBytesWriter_Dealloc(&writer); - return NULL; + goto error_end; } done: - return _PyBytesWriter_Finish(&writer, bin_data); + return PyBytesWriter_FinishWithEndPointer(writer, bin_data); + +error_end: + PyBytesWriter_Discard(writer); + return NULL; } @@ -527,18 +529,15 @@ static PyObject * binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) /*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/ { - unsigned char *ascii_data; const unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - Py_ssize_t bin_len, out_len; - _PyBytesWriter writer; + Py_ssize_t bin_len; binascii_state *state; bin_data = data->buf; bin_len = data->len; - _PyBytesWriter_Init(&writer); assert(bin_len >= 0); @@ -554,12 +553,15 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) /* We're lazy and allocate too much (fixed up later). "+2" leaves room for up to two pad characters. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */ - out_len = bin_len*2 + 2; - if (newline) + Py_ssize_t out_len = bin_len*2 + 2; + if (newline) { out_len++; - ascii_data = _PyBytesWriter_Alloc(&writer, out_len); - if (ascii_data == NULL) + } + PyBytesWriter *writer = PyBytesWriter_Create(out_len); + if (writer == NULL) { return NULL; + } + unsigned char *ascii_data = PyBytesWriter_Data(writer); for( ; bin_len > 0 ; bin_len--, bin_data++ ) { /* Shift the data into our buffer */ @@ -584,7 +586,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) if (newline) *ascii_data++ = '\n'; /* Append a courtesy newline */ - return _PyBytesWriter_Finish(&writer, ascii_data); + return PyBytesWriter_FinishWithEndPointer(writer, ascii_data); } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 31ba89ffb18379..244e4aa8950d08 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -7,6 +7,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE() #include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_DigitValue @@ -198,7 +199,6 @@ PyBytes_FromString(const char *str) PyObject * PyBytes_FromFormatV(const char *format, va_list vargs) { - char *s; const char *f; const char *p; Py_ssize_t prec; @@ -212,21 +212,30 @@ PyBytes_FromFormatV(const char *format, va_list vargs) Longest 64-bit pointer representation: "0xffffffffffffffff\0" (19 bytes). */ char buffer[21]; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - s = _PyBytesWriter_Alloc(&writer, strlen(format)); - if (s == NULL) + Py_ssize_t alloc = strlen(format); + PyBytesWriter *writer = PyBytesWriter_Create(alloc); + if (writer == NULL) { return NULL; - writer.overallocate = 1; + } + char *s = PyBytesWriter_Data(writer); -#define WRITE_BYTES(str) \ +#define WRITE_BYTES_LEN(str, len_expr) \ do { \ - s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ - if (s == NULL) \ + size_t len = (len_expr); \ + alloc += len; \ + Py_ssize_t pos = s - (char*)PyBytesWriter_Data(writer); \ + if (PyBytesWriter_SetSize(writer, pos) < 0) { \ + goto error; \ + } \ + if (PyBytesWriter_Resize(writer, alloc) < 0) { \ goto error; \ + } \ + s = (char*)PyBytesWriter_Data(writer) + pos; \ + memcpy(s, (str), len); \ + s += len; \ } while (0) +#define WRITE_BYTES(str) WRITE_BYTES_LEN(str, strlen(str)) for (f = format; *f; f++) { if (*f != '%') { @@ -267,10 +276,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) ++f; } - /* subtract bytes preallocated for the format string - (ex: 2 for "%s") */ - writer.min_size -= (f - p + 1); - switch (*f) { case 'c': { @@ -281,7 +286,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) "expects an integer in range [0; 255]"); goto error; } - writer.min_size++; *s++ = (unsigned char)c; break; } @@ -340,9 +344,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs) i++; } } - s = _PyBytesWriter_WriteBytes(&writer, s, p, i); - if (s == NULL) - goto error; + WRITE_BYTES_LEN(p, i); break; } @@ -361,28 +363,23 @@ PyBytes_FromFormatV(const char *format, va_list vargs) break; case '%': - writer.min_size++; *s++ = '%'; break; default: - if (*f == 0) { - /* fix min_size if we reached the end of the format string */ - writer.min_size++; - } - /* invalid format string: copy unformatted string and exit */ WRITE_BYTES(p); - return _PyBytesWriter_Finish(&writer, s); + return PyBytesWriter_FinishWithEndPointer(writer, s); } } #undef WRITE_BYTES +#undef WRITE_BYTES_LEN - return _PyBytesWriter_Finish(&writer, s); + return PyBytesWriter_FinishWithEndPointer(writer, s); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -1082,21 +1079,15 @@ PyObject *_PyBytes_DecodeEscape(const char *s, const char *errors, const char **first_invalid_escape) { - int c; - char *p; - const char *end; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - - p = _PyBytesWriter_Alloc(&writer, len); - if (p == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(len); + if (writer == NULL) { return NULL; - writer.overallocate = 1; + } + char *p = PyBytesWriter_Data(writer); *first_invalid_escape = NULL; - end = s + len; + const char *end = s + len; while (s < end) { if (*s != '\\') { *p++ = *s++; @@ -1125,7 +1116,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s, case 'a': *p++ = '\007'; break; /* BEL, not classic C */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - c = s[-1] - '0'; + { + int c = s[-1] - '0'; if (s < end && '0' <= *s && *s <= '7') { c = (c<<3) + *s++ - '0'; if (s < end && '0' <= *s && *s <= '7') @@ -1139,6 +1131,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s, } *p++ = c; break; + } case 'x': if (s+1 < end) { int digit1, digit2; @@ -1184,10 +1177,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, } } - return _PyBytesWriter_Finish(&writer, p); + return PyBytesWriter_FinishWithEndPointer(writer, p); failed: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } @@ -2858,23 +2851,18 @@ _PyBytes_FromBuffer(PyObject *x) static PyObject* _PyBytes_FromList(PyObject *x) { - Py_ssize_t i, size = PyList_GET_SIZE(x); - Py_ssize_t value; - char *str; - PyObject *item; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) + Py_ssize_t size = PyList_GET_SIZE(x); + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; - writer.overallocate = 1; - size = writer.allocated; + } + char *str = PyBytesWriter_Data(writer); + size = PyBytesWriter_Allocated(writer); - for (i = 0; i < PyList_GET_SIZE(x); i++) { - item = PyList_GET_ITEM(x, i); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) { + PyObject *item = PyList_GET_ITEM(x, i); Py_INCREF(item); - value = PyNumber_AsSsize_t(item, NULL); + Py_ssize_t value = PyNumber_AsSsize_t(item, NULL); Py_DECREF(item); if (value == -1 && PyErr_Occurred()) goto error; @@ -2886,17 +2874,22 @@ _PyBytes_FromList(PyObject *x) } if (i >= size) { - str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; - size = writer.allocated; + Py_ssize_t pos = str - (char*)PyBytesWriter_Data(writer); + if (PyBytesWriter_SetSize(writer, pos) < 0) { + goto error; + } + if (PyBytesWriter_Resize(writer, size + 1) < 0) { + goto error; + } + str = (char*)PyBytesWriter_Data(writer) + pos; + size = PyBytesWriter_Allocated(writer); } *str++ = (char) value; } - return _PyBytesWriter_Finish(&writer, str); + return PyBytesWriter_FinishWithEndPointer(writer, str); - error: - _PyBytesWriter_Dealloc(&writer); +error: + PyBytesWriter_Discard(writer); return NULL; } @@ -3729,3 +3722,213 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, } } + +// --- PyBytesWriter API ----------------------------------------------------- + +struct PyBytesWriter { + char small_buffer[256]; + PyObject *obj; + Py_ssize_t size; +}; + + +static inline char* +byteswriter_data(PyBytesWriter *writer) +{ + if (writer->obj == NULL) { + return writer->small_buffer; + } + else { + return PyBytes_AS_STRING(writer->obj); + } +} + + +static inline Py_ssize_t +byteswriter_allocated(PyBytesWriter *writer) +{ + if (writer->obj == NULL) { + return sizeof(writer->small_buffer); + } + else { + return PyBytes_GET_SIZE(writer->obj); + } +} + + +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif + + +static inline int +byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) +{ + assert(size >= 0); + + if (writer->obj == NULL) { + if ((size_t)size <= sizeof(writer->small_buffer)) { + return 0; + } + } + else { + if (size <= PyBytes_GET_SIZE(writer->obj)) { + return 0; + } + } + + if (overallocate) { + if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) { + size += size / OVERALLOCATE_FACTOR; + } + } + + if (writer->obj != NULL) { + if (_PyBytes_Resize(&writer->obj, size)) { + return -1; + } + assert(writer->obj != NULL); + } + else { + writer->obj = PyBytes_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return -1; + } + if (writer->size) { + memcpy(PyBytes_AS_STRING(writer->obj), + writer->small_buffer, + writer->size); + } + } + return 0; +} + + +PyBytesWriter* +PyBytesWriter_Create(Py_ssize_t alloc) +{ + if (alloc < 0) { + PyErr_SetString(PyExc_ValueError, "alloc must be >= 0"); + return NULL; + } + + PyBytesWriter *writer = _Py_FREELIST_POP_MEM(bytes_writers); + if (writer == NULL) { + writer = (PyBytesWriter *)PyMem_Malloc(sizeof(PyBytesWriter)); + if (writer == NULL) { + PyErr_NoMemory(); + return NULL; + } + } + writer->obj = NULL; + writer->size = 0; + + if (alloc >= 1) { + if (byteswriter_resize(writer, alloc, 0) < 0) { + PyBytesWriter_Discard(writer); + return NULL; + } + } + return writer; +} + + +void +PyBytesWriter_Discard(PyBytesWriter *writer) +{ + if (writer == NULL) { + return; + } + + Py_XDECREF(writer->obj); + _Py_FREELIST_FREE(bytes_writers, writer, PyMem_Free); +} + + +PyObject* +PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size) +{ + PyObject *result; + if (size == 0) { + result = bytes_get_empty(); + } + else if (writer->obj != NULL) { + if (size != PyBytes_GET_SIZE(writer->obj)) { + if (_PyBytes_Resize(&writer->obj, size)) { + goto error; + } + } + result = writer->obj; + writer->obj = NULL; + } + else { + result = PyBytes_FromStringAndSize(writer->small_buffer, size); + } + PyBytesWriter_Discard(writer); + return result; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + +PyObject* +PyBytesWriter_Finish(PyBytesWriter *writer) +{ + return PyBytesWriter_FinishWithSize(writer, writer->size); +} + + +PyObject* +PyBytesWriter_FinishWithEndPointer(PyBytesWriter *writer, void *data) +{ + Py_ssize_t size = (char*)data - byteswriter_data(writer); + if (size < 0 || size > byteswriter_allocated(writer)) { + PyBytesWriter_Discard(writer); + PyErr_SetString(PyExc_ValueError, "invalid end pointer"); + return NULL; + } + + return PyBytesWriter_FinishWithSize(writer, size); +} + + +void* +PyBytesWriter_Data(PyBytesWriter *writer) +{ + return byteswriter_data(writer); +} + + +Py_ssize_t +PyBytesWriter_Allocated(PyBytesWriter *writer) +{ + return byteswriter_allocated(writer); +} + + +int +PyBytesWriter_SetSize(PyBytesWriter *writer, Py_ssize_t size) +{ + if (size < 0 || size > byteswriter_allocated(writer)) { + PyErr_SetString(PyExc_ValueError, "invalid size"); + return -1; + } + writer->size = size; + return 0; +} + + +int +PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t alloc) +{ + if (alloc < 0) { + PyErr_SetString(PyExc_ValueError, "alloc must be >= 0"); + return -1; + } + return byteswriter_resize(writer, alloc, 1); +} diff --git a/Objects/object.c b/Objects/object.c index ecc5a86901a347..1bf5f11e326460 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -941,6 +941,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); + clear_freelist(&freelists->bytes_writers, is_finalization, PyMem_Free); clear_freelist(&freelists->ints, is_finalization, free_object); clear_freelist(&freelists->pymethodobjects, is_finalization, free_object); } From 8761a9bcf32ee2ce086af2782f9caa3e317e384e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 13:07:04 +0100 Subject: [PATCH 02/27] Remove PyBytesWriter_SetSize() --- Include/cpython/bytesobject.h | 5 +---- Modules/_pickle.c | 6 ------ Objects/bytesobject.c | 35 ++++++++++------------------------- 3 files changed, 11 insertions(+), 35 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index ed2d835913bb51..8e1b390a67f037 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -64,9 +64,6 @@ PyAPI_FUNC(void*) PyBytesWriter_Data( PyAPI_FUNC(Py_ssize_t) PyBytesWriter_Allocated( PyBytesWriter *writer); -PyAPI_FUNC(int) PyBytesWriter_SetSize( - PyBytesWriter *writer, - Py_ssize_t size); PyAPI_FUNC(int) PyBytesWriter_Resize( PyBytesWriter *writer, - Py_ssize_t alloc); + Py_ssize_t size); diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 60cd822c6f8ed6..995a6d3962188e 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2633,9 +2633,6 @@ raw_unicode_escape(PyObject *obj) /* -1: subtract 1 preallocated byte */ alloc += 10-1; Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer); - if (PyBytesWriter_SetSize(writer, pos) < 0) { - goto error; - } if (PyBytesWriter_Resize(writer, alloc) < 0) { goto error; } @@ -2660,9 +2657,6 @@ raw_unicode_escape(PyObject *obj) /* -1: subtract 1 preallocated byte */ alloc += 6-1; Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer); - if (PyBytesWriter_SetSize(writer, pos) < 0) { - goto error; - } if (PyBytesWriter_Resize(writer, alloc) < 0) { goto error; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 244e4aa8950d08..38887482cf75b1 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -225,9 +225,6 @@ PyBytes_FromFormatV(const char *format, va_list vargs) size_t len = (len_expr); \ alloc += len; \ Py_ssize_t pos = s - (char*)PyBytesWriter_Data(writer); \ - if (PyBytesWriter_SetSize(writer, pos) < 0) { \ - goto error; \ - } \ if (PyBytesWriter_Resize(writer, alloc) < 0) { \ goto error; \ } \ @@ -2875,9 +2872,6 @@ _PyBytes_FromList(PyObject *x) if (i >= size) { Py_ssize_t pos = str - (char*)PyBytesWriter_Data(writer); - if (PyBytesWriter_SetSize(writer, pos) < 0) { - goto error; - } if (PyBytesWriter_Resize(writer, size + 1) < 0) { goto error; } @@ -3798,11 +3792,10 @@ byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) if (writer->obj == NULL) { return -1; } - if (writer->size) { - memcpy(PyBytes_AS_STRING(writer->obj), - writer->small_buffer, - writer->size); - } + assert((size_t)size > sizeof(writer->small_buffer)); + memcpy(PyBytes_AS_STRING(writer->obj), + writer->small_buffer, + sizeof(writer->small_buffer)); } return 0; } @@ -3912,23 +3905,15 @@ PyBytesWriter_Allocated(PyBytesWriter *writer) int -PyBytesWriter_SetSize(PyBytesWriter *writer, Py_ssize_t size) +PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size) { - if (size < 0 || size > byteswriter_allocated(writer)) { - PyErr_SetString(PyExc_ValueError, "invalid size"); + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be >= 0"); return -1; } - writer->size = size; - return 0; -} - - -int -PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t alloc) -{ - if (alloc < 0) { - PyErr_SetString(PyExc_ValueError, "alloc must be >= 0"); + if (byteswriter_resize(writer, size, 1) < 0) { return -1; } - return byteswriter_resize(writer, alloc, 1); + writer->size = size; + return 0; } From 92e1294c1fdff9ca537a044d1ecbb60470adf2c3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 13:19:08 +0100 Subject: [PATCH 03/27] Add tests * Add PyBytesWriter_GetSize() * Rename: * PyBytesWriter_Data() => PyBytesWriter_GetData() * PyBytesWriter_Allocated() => PyBytesWriter_GetAllocated() --- Include/cpython/bytesobject.h | 8 +- Lib/test/test_capi/test_bytes.py | 76 ++++++++++++ Modules/_pickle.c | 10 +- Modules/_struct.c | 2 +- Modules/_testcapi/bytes.c | 203 +++++++++++++++++++++++++++++++ Modules/binascii.c | 8 +- Objects/bytesobject.c | 40 +++--- 7 files changed, 318 insertions(+), 29 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 8e1b390a67f037..1012ea4a31789a 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -47,7 +47,7 @@ _PyBytes_Join(PyObject *sep, PyObject *iterable) typedef struct PyBytesWriter PyBytesWriter; PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create( - Py_ssize_t alloc); + Py_ssize_t size); PyAPI_FUNC(void) PyBytesWriter_Discard( PyBytesWriter *writer); PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( @@ -59,9 +59,11 @@ PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithEndPointer( PyBytesWriter *writer, void *data); -PyAPI_FUNC(void*) PyBytesWriter_Data( +PyAPI_FUNC(void*) PyBytesWriter_GetData( + PyBytesWriter *writer); +PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetSize( PyBytesWriter *writer); -PyAPI_FUNC(Py_ssize_t) PyBytesWriter_Allocated( +PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetAllocated( PyBytesWriter *writer); PyAPI_FUNC(int) PyBytesWriter_Resize( diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 5b61c73381542d..7f2c98dad83f6f 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -291,5 +291,81 @@ def test_join(self): bytes_join(b'', NULL) +class PyBytesWriterTest(unittest.TestCase): + SMALL_BUFFER = 256 # bytes + + def create_writer(self, alloc=0, string=b''): + return _testcapi.PyBytesWriter(alloc, string) + + def test_empty(self): + # Test PyBytesWriter_Create() + writer = self.create_writer() + self.assertEqual(writer.get_size(), 0) + self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) + self.assertEqual(writer.finish(), b'') + + def test_abc(self): + # Test PyBytesWriter_Create() + writer = self.create_writer(3, b'abc') + self.assertEqual(writer.get_size(), 3) + self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) + self.assertEqual(writer.finish(), b'abc') + + writer = self.create_writer(10, b'abc') + self.assertEqual(writer.get_size(), 10) + self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) + self.assertEqual(writer.finish_with_size(3), b'abc') + + # def test_write_bytes(self): + # # Test PyBytesWriter_WriteBytes() + + # writer = self.create_writer() + # writer.write_bytes(b'Hello World!', -1) + # self.assertEqual(writer.finish(), b'Hello World!') + + # writer = self.create_writer() + # writer.write_bytes(b'Hello ', -1) + # writer.write_bytes(b'World! ', 6) + # self.assertEqual(writer.finish(), b'Hello World!') + + def test_resize(self): + # Test PyBytesWriter_Extend() + + writer = self.create_writer() + writer.resize(len(b'number=123456'), b'number=123456') + writer.resize(len(b'number=123456'), b'') + self.assertEqual(writer.get_size(), len(b'number=123456')) + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer() + writer.resize(0, b'') + writer.resize(len(b'number=123456'), b'number=123456') + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer() + writer.resize(len(b'number='), b'number=') + writer.resize(len(b'number=123456'), b'123456') + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer() + writer.resize(len(b'number='), b'number=') + writer.resize(len(b'number='), b'') + writer.resize(len(b'number=123456'), b'123456') + self.assertEqual(writer.finish(), b'number=123456') + + writer = self.create_writer() + writer.resize(len(b'number'), b'number') + writer.resize(len(b'number='), b'=') + writer.resize(len(b'number=123'), b'123') + writer.resize(len(b'number=123456'), b'456') + self.assertEqual(writer.finish(), b'number=123456') + + # def test_format(self): + # # Test PyBytesWriter_Format() + # writer = self.create_writer() + # writer.format_i(123456) + # self.assertEqual(writer.finish(), b'123456') + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 995a6d3962188e..bba604ddecc127 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2624,7 +2624,7 @@ raw_unicode_escape(PyObject *obj) if (writer == NULL) { return NULL; } - char *p = PyBytesWriter_Data(writer); + char *p = PyBytesWriter_GetData(writer); for (Py_ssize_t i=0; i < size; i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); @@ -2632,11 +2632,11 @@ raw_unicode_escape(PyObject *obj) if (ch >= 0x10000) { /* -1: subtract 1 preallocated byte */ alloc += 10-1; - Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer); + Py_ssize_t pos = p - (char*)PyBytesWriter_GetData(writer); if (PyBytesWriter_Resize(writer, alloc) < 0) { goto error; } - p = (char*)PyBytesWriter_Data(writer) + pos; + p = (char*)PyBytesWriter_GetData(writer) + pos; *p++ = '\\'; *p++ = 'U'; @@ -2656,11 +2656,11 @@ raw_unicode_escape(PyObject *obj) { /* -1: subtract 1 preallocated byte */ alloc += 6-1; - Py_ssize_t pos = p - (char*)PyBytesWriter_Data(writer); + Py_ssize_t pos = p - (char*)PyBytesWriter_GetData(writer); if (PyBytesWriter_Resize(writer, alloc) < 0) { goto error; } - p = (char*)PyBytesWriter_Data(writer) + pos; + p = (char*)PyBytesWriter_GetData(writer) + pos; *p++ = '\\'; *p++ = 'u'; diff --git a/Modules/_struct.c b/Modules/_struct.c index b6f71eeef17198..8e84a8a949ad0a 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2291,7 +2291,7 @@ s_pack(PyObject *self, PyObject *const *args, Py_ssize_t nargs) if (writer == NULL) { return NULL; } - char *buf = PyBytesWriter_Data(writer); + char *buf = PyBytesWriter_GetData(writer); /* Call the guts */ if ( s_pack_internal(soself, args, 0, buf, state) != 0 ) { diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 33903de14ba68d..6ff3dfbea5ac2c 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -51,6 +51,199 @@ bytes_join(PyObject *Py_UNUSED(module), PyObject *args) } +// --- PyBytesWriter type --------------------------------------------------- + +typedef struct { + PyObject_HEAD + PyBytesWriter *writer; +} WriterObject; + + +static PyObject * +writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)type->tp_alloc(type, 0); + if (!self) { + return NULL; + } + self->writer = NULL; + return (PyObject*)self; +} + + +static int +writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs) +{ + WriterObject *self = (WriterObject *)self_raw; + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + + if (kwargs && PyDict_GET_SIZE(kwargs)) { + PyErr_Format(PyExc_TypeError, + "PyBytesWriter() takes exactly no keyword arguments"); + return -1; + } + + Py_ssize_t alloc; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, "ny#", &alloc, &str, &str_size)) { + return -1; + } + + self->writer = PyBytesWriter_Create(alloc); + if (self->writer == NULL) { + return -1; + } + + if (str_size) { + char *buf = PyBytesWriter_GetData(self->writer); + memcpy(buf, str, str_size); + } + + return 0; +} + + +static void +writer_dealloc(PyObject *self_raw) +{ + WriterObject *self = (WriterObject *)self_raw; + PyTypeObject *tp = Py_TYPE(self); + if (self->writer) { + PyBytesWriter_Discard(self->writer); + } + tp->tp_free(self); + Py_DECREF(tp); +} + + +static inline int +writer_check(WriterObject *self) +{ + if (self->writer == NULL) { + PyErr_SetString(PyExc_ValueError, "operation on finished writer"); + return -1; + } + return 0; +} + + +static PyObject* +writer_resize(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t size; + char *str; + Py_ssize_t str_size; + if (!PyArg_ParseTuple(args, + "ny#", + &size, &str, &str_size)) { + return NULL; + } + assert(size >= str_size); + + Py_ssize_t pos = PyBytesWriter_GetSize(self->writer); + if (PyBytesWriter_Resize(self->writer, size) < 0) { + return NULL; + } + + char *buf = PyBytesWriter_GetData(self->writer); + memcpy(buf + pos, str, str_size); + + Py_RETURN_NONE; +} + + +static PyObject* +writer_get_size(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t alloc = PyBytesWriter_GetSize(self->writer); + return PyLong_FromSsize_t(alloc); +} + + +static PyObject* +writer_get_allocated(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t alloc = PyBytesWriter_GetAllocated(self->writer); + return PyLong_FromSsize_t(alloc); +} + + +static PyObject* +writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + PyObject *str = PyBytesWriter_Finish(self->writer); + self->writer = NULL; + return str; +} + + +static PyObject* +writer_finish_with_size(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "n", &size)) { + return NULL; + } + + PyObject *str = PyBytesWriter_FinishWithSize(self->writer, size); + self->writer = NULL; + return str; +} + + +static PyMethodDef writer_methods[] = { + {"resize", _PyCFunction_CAST(writer_resize), METH_VARARGS}, + {"get_size", _PyCFunction_CAST(writer_get_size), METH_NOARGS}, + {"get_allocated", _PyCFunction_CAST(writer_get_allocated), METH_NOARGS}, + {"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS}, + {"finish_with_size", _PyCFunction_CAST(writer_finish_with_size), METH_VARARGS}, + {NULL, NULL} /* sentinel */ +}; + +static PyType_Slot Writer_Type_slots[] = { + {Py_tp_new, writer_new}, + {Py_tp_init, writer_init}, + {Py_tp_dealloc, writer_dealloc}, + {Py_tp_methods, writer_methods}, + {0, 0}, /* sentinel */ +}; + +static PyType_Spec Writer_spec = { + .name = "_testcapi.PyBytesWriter", + .basicsize = sizeof(WriterObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = Writer_Type_slots, +}; + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, {"bytes_join", bytes_join, METH_VARARGS}, @@ -64,5 +257,15 @@ _PyTestCapi_Init_Bytes(PyObject *m) return -1; } + PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec); + if (writer_type == NULL) { + return -1; + } + if (PyModule_AddType(m, writer_type) < 0) { + Py_DECREF(writer_type); + return -1; + } + Py_DECREF(writer_type); + return 0; } diff --git a/Modules/binascii.c b/Modules/binascii.c index 7954198245972b..02fbec04017ae0 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -327,7 +327,7 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) if (writer == NULL) { return NULL; } - unsigned char *ascii_data = PyBytesWriter_Data(writer); + unsigned char *ascii_data = PyBytesWriter_GetData(writer); /* Store the length */ if (backtick && !bin_len) @@ -390,7 +390,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) if (writer == NULL) { return NULL; } - unsigned char *bin_data = PyBytesWriter_Data(writer); + unsigned char *bin_data = PyBytesWriter_GetData(writer); if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') { state = get_binascii_state(module); @@ -493,7 +493,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) ** This is an invalid length, as there is no possible input that ** could encoded into such a base64 string. */ - unsigned char *bin_data_start = PyBytesWriter_Data(writer); + unsigned char *bin_data_start = PyBytesWriter_GetData(writer); PyErr_Format(state->Error, "Invalid base64-encoded string: " "number of data characters (%zd) cannot be 1 more " @@ -561,7 +561,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) if (writer == NULL) { return NULL; } - unsigned char *ascii_data = PyBytesWriter_Data(writer); + unsigned char *ascii_data = PyBytesWriter_GetData(writer); for( ; bin_len > 0 ; bin_len--, bin_data++ ) { /* Shift the data into our buffer */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 38887482cf75b1..2dca09d561bef9 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -218,17 +218,17 @@ PyBytes_FromFormatV(const char *format, va_list vargs) if (writer == NULL) { return NULL; } - char *s = PyBytesWriter_Data(writer); + char *s = PyBytesWriter_GetData(writer); #define WRITE_BYTES_LEN(str, len_expr) \ do { \ size_t len = (len_expr); \ alloc += len; \ - Py_ssize_t pos = s - (char*)PyBytesWriter_Data(writer); \ + Py_ssize_t pos = s - (char*)PyBytesWriter_GetData(writer); \ if (PyBytesWriter_Resize(writer, alloc) < 0) { \ goto error; \ } \ - s = (char*)PyBytesWriter_Data(writer) + pos; \ + s = (char*)PyBytesWriter_GetData(writer) + pos; \ memcpy(s, (str), len); \ s += len; \ } while (0) @@ -1080,7 +1080,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s, if (writer == NULL) { return NULL; } - char *p = PyBytesWriter_Data(writer); + char *p = PyBytesWriter_GetData(writer); *first_invalid_escape = NULL; @@ -2853,8 +2853,8 @@ _PyBytes_FromList(PyObject *x) if (writer == NULL) { return NULL; } - char *str = PyBytesWriter_Data(writer); - size = PyBytesWriter_Allocated(writer); + char *str = PyBytesWriter_GetData(writer); + size = PyBytesWriter_GetAllocated(writer); for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) { PyObject *item = PyList_GET_ITEM(x, i); @@ -2871,12 +2871,12 @@ _PyBytes_FromList(PyObject *x) } if (i >= size) { - Py_ssize_t pos = str - (char*)PyBytesWriter_Data(writer); + Py_ssize_t pos = str - (char*)PyBytesWriter_GetData(writer); if (PyBytesWriter_Resize(writer, size + 1) < 0) { goto error; } - str = (char*)PyBytesWriter_Data(writer) + pos; - size = PyBytesWriter_Allocated(writer); + str = (char*)PyBytesWriter_GetData(writer) + pos; + size = PyBytesWriter_GetAllocated(writer); } *str++ = (char) value; } @@ -3802,10 +3802,10 @@ byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) PyBytesWriter* -PyBytesWriter_Create(Py_ssize_t alloc) +PyBytesWriter_Create(Py_ssize_t size) { - if (alloc < 0) { - PyErr_SetString(PyExc_ValueError, "alloc must be >= 0"); + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be >= 0"); return NULL; } @@ -3820,11 +3820,12 @@ PyBytesWriter_Create(Py_ssize_t alloc) writer->obj = NULL; writer->size = 0; - if (alloc >= 1) { - if (byteswriter_resize(writer, alloc, 0) < 0) { + if (size >= 1) { + if (byteswriter_resize(writer, size, 0) < 0) { PyBytesWriter_Discard(writer); return NULL; } + writer->size = size; } return writer; } @@ -3891,14 +3892,21 @@ PyBytesWriter_FinishWithEndPointer(PyBytesWriter *writer, void *data) void* -PyBytesWriter_Data(PyBytesWriter *writer) +PyBytesWriter_GetData(PyBytesWriter *writer) { return byteswriter_data(writer); } Py_ssize_t -PyBytesWriter_Allocated(PyBytesWriter *writer) +PyBytesWriter_GetSize(PyBytesWriter *writer) +{ + return writer->size; +} + + +Py_ssize_t +PyBytesWriter_GetAllocated(PyBytesWriter *writer) { return byteswriter_allocated(writer); } From eff71b52b22f3744edab7cf566588e1d0f43470b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 13:43:13 +0100 Subject: [PATCH 04/27] Add PyBytesWriter_WriteBytes() --- Include/cpython/bytesobject.h | 5 +++++ Lib/test/test_capi/test_bytes.py | 26 +++++++++++--------------- Modules/_testcapi/bytes.c | 22 ++++++++++++++++++++++ Objects/bytesobject.c | 24 ++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 1012ea4a31789a..5218f4535a1571 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -66,6 +66,11 @@ PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetSize( PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetAllocated( PyBytesWriter *writer); +PyAPI_FUNC(int) PyBytesWriter_WriteBytes( + PyBytesWriter *writer, + const void *bytes, + Py_ssize_t size); + PyAPI_FUNC(int) PyBytesWriter_Resize( PyBytesWriter *writer, Py_ssize_t size); diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 7f2c98dad83f6f..bd69f9bdcd1f16 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -297,15 +297,13 @@ class PyBytesWriterTest(unittest.TestCase): def create_writer(self, alloc=0, string=b''): return _testcapi.PyBytesWriter(alloc, string) - def test_empty(self): + def test_create(self): # Test PyBytesWriter_Create() writer = self.create_writer() self.assertEqual(writer.get_size(), 0) self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) self.assertEqual(writer.finish(), b'') - def test_abc(self): - # Test PyBytesWriter_Create() writer = self.create_writer(3, b'abc') self.assertEqual(writer.get_size(), 3) self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) @@ -316,21 +314,19 @@ def test_abc(self): self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) self.assertEqual(writer.finish_with_size(3), b'abc') - # def test_write_bytes(self): - # # Test PyBytesWriter_WriteBytes() + def test_write_bytes(self): + # Test PyBytesWriter_WriteBytes() + writer = self.create_writer() + writer.write_bytes(b'Hello World!', -1) + self.assertEqual(writer.finish(), b'Hello World!') - # writer = self.create_writer() - # writer.write_bytes(b'Hello World!', -1) - # self.assertEqual(writer.finish(), b'Hello World!') - - # writer = self.create_writer() - # writer.write_bytes(b'Hello ', -1) - # writer.write_bytes(b'World! ', 6) - # self.assertEqual(writer.finish(), b'Hello World!') + writer = self.create_writer() + writer.write_bytes(b'Hello ', -1) + writer.write_bytes(b'World! ', 6) + self.assertEqual(writer.finish(), b'Hello World!') def test_resize(self): - # Test PyBytesWriter_Extend() - + # Test PyBytesWriter_Resize() writer = self.create_writer() writer.resize(len(b'number=123456'), b'number=123456') writer.resize(len(b'number=123456'), b'') diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 6ff3dfbea5ac2c..0eeeec5c51ce7d 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -130,6 +130,27 @@ writer_check(WriterObject *self) } +static PyObject* +writer_write_bytes(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *bytes; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &bytes, &size)) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(self->writer, bytes, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject* writer_resize(PyObject *self_raw, PyObject *args) { @@ -220,6 +241,7 @@ writer_finish_with_size(PyObject *self_raw, PyObject *args) static PyMethodDef writer_methods[] = { + {"write_bytes", _PyCFunction_CAST(writer_write_bytes), METH_VARARGS}, {"resize", _PyCFunction_CAST(writer_resize), METH_VARARGS}, {"get_size", _PyCFunction_CAST(writer_get_size), METH_NOARGS}, {"get_allocated", _PyCFunction_CAST(writer_get_allocated), METH_NOARGS}, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2dca09d561bef9..7cf8e1a428fdae 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3925,3 +3925,27 @@ PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size) writer->size = size; return 0; } + + +int +PyBytesWriter_WriteBytes(PyBytesWriter *writer, + const void *bytes, Py_ssize_t size) +{ + if (size < 0) { + size = strlen(bytes); + } + + Py_ssize_t pos = writer->size; + if (size > PY_SSIZE_T_MAX - pos) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t total = pos + size; + + if (PyBytesWriter_Resize(writer, total) < 0) { + return -1; + } + char *buf = byteswriter_data(writer); + memcpy(buf + pos, bytes, size); + return 0; +} From 31c7ca78a820f1dc778069e03dfdf27078a860ee Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 13:59:28 +0100 Subject: [PATCH 05/27] Add PyBytesWriter_Format() --- Include/cpython/bytesobject.h | 4 ++ Lib/test/test_capi/test_bytes.py | 15 +++++--- Modules/_testcapi/bytes.c | 22 +++++++++++ Objects/bytesobject.c | 65 ++++++++++++++++++++++++++------ 4 files changed, 90 insertions(+), 16 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 5218f4535a1571..2560fb835e5393 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -70,6 +70,10 @@ PyAPI_FUNC(int) PyBytesWriter_WriteBytes( PyBytesWriter *writer, const void *bytes, Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_Format( + PyBytesWriter *writer, + const char *format, + ...); PyAPI_FUNC(int) PyBytesWriter_Resize( PyBytesWriter *writer, diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index bd69f9bdcd1f16..dbd7a456ee55a5 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -356,11 +356,16 @@ def test_resize(self): writer.resize(len(b'number=123456'), b'456') self.assertEqual(writer.finish(), b'number=123456') - # def test_format(self): - # # Test PyBytesWriter_Format() - # writer = self.create_writer() - # writer.format_i(123456) - # self.assertEqual(writer.finish(), b'123456') + def test_format_i(self): + # Test PyBytesWriter_Format() + writer = self.create_writer() + writer.format_i(b'x=%i', 123456) + self.assertEqual(writer.finish(), b'x=123456') + + writer = self.create_writer() + writer.format_i(b'x=%i, ', 123) + writer.format_i(b'y=%i', 456) + self.assertEqual(writer.finish(), b'x=123, y=456') if __name__ == "__main__": diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 0eeeec5c51ce7d..2ef4c15560e3df 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -151,6 +151,27 @@ writer_write_bytes(PyObject *self_raw, PyObject *args) } +static PyObject* +writer_format_i(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *format; + int value; + if (!PyArg_ParseTuple(args, "yi", &format, &value)) { + return NULL; + } + + if (PyBytesWriter_Format(self->writer, format, value) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject* writer_resize(PyObject *self_raw, PyObject *args) { @@ -242,6 +263,7 @@ writer_finish_with_size(PyObject *self_raw, PyObject *args) static PyMethodDef writer_methods[] = { {"write_bytes", _PyCFunction_CAST(writer_write_bytes), METH_VARARGS}, + {"format_i", _PyCFunction_CAST(writer_format_i), METH_VARARGS}, {"resize", _PyCFunction_CAST(writer_resize), METH_VARARGS}, {"get_size", _PyCFunction_CAST(writer_get_size), METH_NOARGS}, {"get_allocated", _PyCFunction_CAST(writer_get_allocated), METH_NOARGS}, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 7cf8e1a428fdae..3261b88bd28bd5 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -196,8 +196,10 @@ PyBytes_FromString(const char *str) return (PyObject *) op; } -PyObject * -PyBytes_FromFormatV(const char *format, va_list vargs) + +static char* +bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos, + const char *format, va_list vargs) { const char *f; const char *p; @@ -213,12 +215,8 @@ PyBytes_FromFormatV(const char *format, va_list vargs) "0xffffffffffffffff\0" (19 bytes). */ char buffer[21]; - Py_ssize_t alloc = strlen(format); - PyBytesWriter *writer = PyBytesWriter_Create(alloc); - if (writer == NULL) { - return NULL; - } - char *s = PyBytesWriter_GetData(writer); + char *s = PyBytesWriter_GetData(writer) + writer_pos; + Py_ssize_t alloc = PyBytesWriter_GetSize(writer); #define WRITE_BYTES_LEN(str, len_expr) \ do { \ @@ -366,20 +364,39 @@ PyBytes_FromFormatV(const char *format, va_list vargs) default: /* invalid format string: copy unformatted string and exit */ WRITE_BYTES(p); - return PyBytesWriter_FinishWithEndPointer(writer, s); + return s; } } #undef WRITE_BYTES #undef WRITE_BYTES_LEN - return PyBytesWriter_FinishWithEndPointer(writer, s); + return s; error: - PyBytesWriter_Discard(writer); return NULL; } + +PyObject * +PyBytes_FromFormatV(const char *format, va_list vargs) +{ + Py_ssize_t alloc = strlen(format); + PyBytesWriter *writer = PyBytesWriter_Create(alloc); + if (writer == NULL) { + return NULL; + } + + char *s = bytes_fromformat(writer, 0, format, vargs); + if (s == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + + return PyBytesWriter_FinishWithEndPointer(writer, s); +} + + PyObject * PyBytes_FromFormat(const char *format, ...) { @@ -392,6 +409,7 @@ PyBytes_FromFormat(const char *format, ...) return ret; } + /* Helpers for formatstring */ Py_LOCAL_INLINE(PyObject *) @@ -3949,3 +3967,28 @@ PyBytesWriter_WriteBytes(PyBytesWriter *writer, memcpy(buf + pos, bytes, size); return 0; } + + +int +PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) +{ + Py_ssize_t pos = writer->size; + Py_ssize_t format_len = strlen(format); + if (format_len > PY_SSIZE_T_MAX - pos) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t alloc = pos + format_len; + + if (PyBytesWriter_Resize(writer, alloc) < 0) { + return -1; + } + + va_list vargs; + va_start(vargs, format); + char *buf = bytes_fromformat(writer, pos, format, vargs); + va_end(vargs); + + Py_ssize_t size = buf - byteswriter_data(writer); + return PyBytesWriter_Resize(writer, size); +} From 86d0fd9a8ee7db837c222ae39e55a285e9c66b15 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 14:10:13 +0100 Subject: [PATCH 06/27] Fix build on Windows --- Objects/bytesobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 3261b88bd28bd5..ae10f600833739 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -215,7 +215,7 @@ bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos, "0xffffffffffffffff\0" (19 bytes). */ char buffer[21]; - char *s = PyBytesWriter_GetData(writer) + writer_pos; + char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos; Py_ssize_t alloc = PyBytesWriter_GetSize(writer); #define WRITE_BYTES_LEN(str, len_expr) \ From 79fa5f8f696b3a45f48f0d5e1f6cd78f3818d91c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 17:42:03 +0100 Subject: [PATCH 07/27] Add PyBytesWriter_ResizeAndUpdatePointer() function --- Include/cpython/bytesobject.h | 4 ++++ Modules/_pickle.c | 10 ++++------ Objects/bytesobject.c | 22 ++++++++++++++++------ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 2560fb835e5393..f914f808715df8 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -78,3 +78,7 @@ PyAPI_FUNC(int) PyBytesWriter_Format( PyAPI_FUNC(int) PyBytesWriter_Resize( PyBytesWriter *writer, Py_ssize_t size); +PyAPI_FUNC(void*) PyBytesWriter_ResizeAndUpdatePointer( + PyBytesWriter *writer, + Py_ssize_t size, + void *data); diff --git a/Modules/_pickle.c b/Modules/_pickle.c index bba604ddecc127..7e47eafa09f06b 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2632,11 +2632,10 @@ raw_unicode_escape(PyObject *obj) if (ch >= 0x10000) { /* -1: subtract 1 preallocated byte */ alloc += 10-1; - Py_ssize_t pos = p - (char*)PyBytesWriter_GetData(writer); - if (PyBytesWriter_Resize(writer, alloc) < 0) { + p = PyBytesWriter_ResizeAndUpdatePointer(writer, alloc, p); + if (p == NULL) { goto error; } - p = (char*)PyBytesWriter_GetData(writer) + pos; *p++ = '\\'; *p++ = 'U'; @@ -2656,11 +2655,10 @@ raw_unicode_escape(PyObject *obj) { /* -1: subtract 1 preallocated byte */ alloc += 6-1; - Py_ssize_t pos = p - (char*)PyBytesWriter_GetData(writer); - if (PyBytesWriter_Resize(writer, alloc) < 0) { + p = PyBytesWriter_ResizeAndUpdatePointer(writer, alloc, p); + if (p == NULL) { goto error; } - p = (char*)PyBytesWriter_GetData(writer) + pos; *p++ = '\\'; *p++ = 'u'; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ae10f600833739..4bb39b5d2ef070 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -222,11 +222,10 @@ bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos, do { \ size_t len = (len_expr); \ alloc += len; \ - Py_ssize_t pos = s - (char*)PyBytesWriter_GetData(writer); \ - if (PyBytesWriter_Resize(writer, alloc) < 0) { \ + s = PyBytesWriter_ResizeAndUpdatePointer(writer, alloc, s); \ + if (s == NULL) { \ goto error; \ } \ - s = (char*)PyBytesWriter_GetData(writer) + pos; \ memcpy(s, (str), len); \ s += len; \ } while (0) @@ -2889,11 +2888,10 @@ _PyBytes_FromList(PyObject *x) } if (i >= size) { - Py_ssize_t pos = str - (char*)PyBytesWriter_GetData(writer); - if (PyBytesWriter_Resize(writer, size + 1) < 0) { + str = PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); + if (str == NULL) { goto error; } - str = (char*)PyBytesWriter_GetData(writer) + pos; size = PyBytesWriter_GetAllocated(writer); } *str++ = (char) value; @@ -3992,3 +3990,15 @@ PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) Py_ssize_t size = buf - byteswriter_data(writer); return PyBytesWriter_Resize(writer, size); } + + +void* +PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *data) +{ + Py_ssize_t pos = (char*)data - byteswriter_data(writer); + if (PyBytesWriter_Resize(writer, size) < 0) { + return NULL; + } + return byteswriter_data(writer) + pos; +} From bf60f7f4c0ac513a8c8f3bb3c8d32cd4b69744d3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 17:50:04 +0100 Subject: [PATCH 08/27] Convert _PyBytes_FromIterator() --- Objects/bytesobject.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 4bb39b5d2ef070..5effeebc343082 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2940,21 +2940,19 @@ _PyBytes_FromTuple(PyObject *x) static PyObject * _PyBytes_FromIterator(PyObject *it, PyObject *x) { - char *str; Py_ssize_t i, size; - _PyBytesWriter writer; /* For iterator version, create a bytes object and resize as needed */ size = PyObject_LengthHint(x, 64); if (size == -1 && PyErr_Occurred()) return NULL; - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; - writer.overallocate = 1; - size = writer.allocated; + } + char *str = PyBytesWriter_GetData(writer); + size = PyBytesWriter_GetAllocated(writer); /* Run the iterator to exhaustion */ for (i = 0; ; i++) { @@ -2984,18 +2982,18 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) /* Append the byte */ if (i >= size) { - str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; - size = writer.allocated; + str = PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); + if (str == NULL) { + goto error; + } + size = PyBytesWriter_GetAllocated(writer); } *str++ = (char) value; } - - return _PyBytesWriter_Finish(&writer, str); + return PyBytesWriter_FinishWithEndPointer(writer, str); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); return NULL; } From 62a15be6569a1f8b9b849592a514c5de2c32d016 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 25 Mar 2025 18:00:51 +0100 Subject: [PATCH 09/27] Add _PyBytesWriter_CreateByteArray() Convert _PyBytes_FromHex(). --- Include/cpython/bytesobject.h | 2 + Lib/test/test_capi/test_bytes.py | 35 +++++++------ Modules/_testcapi/bytes.c | 11 +++- Objects/bytesobject.c | 86 ++++++++++++++++++++++---------- 4 files changed, 93 insertions(+), 41 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index f914f808715df8..875e9644503fd7 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -48,6 +48,8 @@ typedef struct PyBytesWriter PyBytesWriter; PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create( Py_ssize_t size); +PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray( + Py_ssize_t size); PyAPI_FUNC(void) PyBytesWriter_Discard( PyBytesWriter *writer); PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index dbd7a456ee55a5..98af97cd740dd0 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -291,39 +291,40 @@ def test_join(self): bytes_join(b'', NULL) -class PyBytesWriterTest(unittest.TestCase): +class BytesWriterTest(unittest.TestCase): SMALL_BUFFER = 256 # bytes + result_type = bytes def create_writer(self, alloc=0, string=b''): - return _testcapi.PyBytesWriter(alloc, string) + return _testcapi.PyBytesWriter(alloc, string, 0) def test_create(self): # Test PyBytesWriter_Create() writer = self.create_writer() self.assertEqual(writer.get_size(), 0) self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) - self.assertEqual(writer.finish(), b'') + self.assertEqual(writer.finish(), self.result_type(b'')) writer = self.create_writer(3, b'abc') self.assertEqual(writer.get_size(), 3) self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) - self.assertEqual(writer.finish(), b'abc') + self.assertEqual(writer.finish(), self.result_type(b'abc')) writer = self.create_writer(10, b'abc') self.assertEqual(writer.get_size(), 10) self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) - self.assertEqual(writer.finish_with_size(3), b'abc') + self.assertEqual(writer.finish_with_size(3), self.result_type(b'abc')) def test_write_bytes(self): # Test PyBytesWriter_WriteBytes() writer = self.create_writer() writer.write_bytes(b'Hello World!', -1) - self.assertEqual(writer.finish(), b'Hello World!') + self.assertEqual(writer.finish(), self.result_type(b'Hello World!')) writer = self.create_writer() writer.write_bytes(b'Hello ', -1) writer.write_bytes(b'World! ', 6) - self.assertEqual(writer.finish(), b'Hello World!') + self.assertEqual(writer.finish(), self.result_type(b'Hello World!')) def test_resize(self): # Test PyBytesWriter_Resize() @@ -331,42 +332,48 @@ def test_resize(self): writer.resize(len(b'number=123456'), b'number=123456') writer.resize(len(b'number=123456'), b'') self.assertEqual(writer.get_size(), len(b'number=123456')) - self.assertEqual(writer.finish(), b'number=123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) writer = self.create_writer() writer.resize(0, b'') writer.resize(len(b'number=123456'), b'number=123456') - self.assertEqual(writer.finish(), b'number=123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) writer = self.create_writer() writer.resize(len(b'number='), b'number=') writer.resize(len(b'number=123456'), b'123456') - self.assertEqual(writer.finish(), b'number=123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) writer = self.create_writer() writer.resize(len(b'number='), b'number=') writer.resize(len(b'number='), b'') writer.resize(len(b'number=123456'), b'123456') - self.assertEqual(writer.finish(), b'number=123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) writer = self.create_writer() writer.resize(len(b'number'), b'number') writer.resize(len(b'number='), b'=') writer.resize(len(b'number=123'), b'123') writer.resize(len(b'number=123456'), b'456') - self.assertEqual(writer.finish(), b'number=123456') + self.assertEqual(writer.finish(), self.result_type(b'number=123456')) def test_format_i(self): # Test PyBytesWriter_Format() writer = self.create_writer() writer.format_i(b'x=%i', 123456) - self.assertEqual(writer.finish(), b'x=123456') + self.assertEqual(writer.finish(), self.result_type(b'x=123456')) writer = self.create_writer() writer.format_i(b'x=%i, ', 123) writer.format_i(b'y=%i', 456) - self.assertEqual(writer.finish(), b'x=123, y=456') + self.assertEqual(writer.finish(), self.result_type(b'x=123, y=456')) +class ByteArrayWriterTest(BytesWriterTest): + result_type = bytearray + + def create_writer(self, alloc=0, string=b''): + return _testcapi.PyBytesWriter(alloc, string, 1) + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 2ef4c15560e3df..3a3dc44b7e9b69 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -88,11 +88,18 @@ writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs) Py_ssize_t alloc; char *str; Py_ssize_t str_size; - if (!PyArg_ParseTuple(args, "ny#", &alloc, &str, &str_size)) { + int use_bytearray; + if (!PyArg_ParseTuple(args, "ny#i", + &alloc, &str, &str_size, &use_bytearray)) { return -1; } - self->writer = PyBytesWriter_Create(alloc); + if (use_bytearray) { + self->writer = _PyBytesWriter_CreateByteArray(alloc); + } + else { + self->writer = PyBytesWriter_Create(alloc); + } if (self->writer == NULL) { return -1; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 5effeebc343082..8ba49b61a958ef 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2515,17 +2515,13 @@ bytes_fromhex_impl(PyTypeObject *type, PyObject *string) PyObject* _PyBytes_FromHex(PyObject *string, int use_bytearray) { - char *buf; Py_ssize_t hexlen, invalid_char; unsigned int top, bot; const Py_UCS1 *str, *start, *end; - _PyBytesWriter writer; + PyBytesWriter *writer = NULL; Py_buffer view; view.obj = NULL; - _PyBytesWriter_Init(&writer); - writer.use_bytearray = use_bytearray; - if (PyUnicode_Check(string)) { hexlen = PyUnicode_GET_LENGTH(string); @@ -2561,10 +2557,11 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) } /* This overestimates if there are spaces */ - buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); - if (buf == NULL) { + writer = _PyBytesWriter_CreateByteArray(hexlen / 2); + if (writer == NULL) { goto release_buffer; } + char *buf = PyBytesWriter_GetData(writer); start = str; end = str + hexlen; @@ -2603,7 +2600,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) if (view.obj != NULL) { PyBuffer_Release(&view); } - return _PyBytesWriter_Finish(&writer, buf); + return PyBytesWriter_FinishWithEndPointer(writer, buf); error: if (invalid_char == -1) { @@ -2614,7 +2611,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) "non-hexadecimal number found in " "fromhex() arg at position %zd", invalid_char); } - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); release_buffer: if (view.obj != NULL) { @@ -3737,6 +3734,7 @@ struct PyBytesWriter { char small_buffer[256]; PyObject *obj; Py_ssize_t size; + int use_bytearray; }; @@ -3758,6 +3756,9 @@ byteswriter_allocated(PyBytesWriter *writer) if (writer->obj == NULL) { return sizeof(writer->small_buffer); } + else if (writer->use_bytearray) { + return PyByteArray_GET_SIZE(writer->obj); + } else { return PyBytes_GET_SIZE(writer->obj); } @@ -3778,15 +3779,8 @@ byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) { assert(size >= 0); - if (writer->obj == NULL) { - if ((size_t)size <= sizeof(writer->small_buffer)) { - return 0; - } - } - else { - if (size <= PyBytes_GET_SIZE(writer->obj)) { - return 0; - } + if (size <= byteswriter_allocated(writer)) { + return 0; } if (overallocate) { @@ -3796,11 +3790,28 @@ byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) } if (writer->obj != NULL) { - if (_PyBytes_Resize(&writer->obj, size)) { - return -1; + if (writer->use_bytearray) { + if (PyByteArray_Resize(writer->obj, size)) { + return -1; + } + } + else { + if (_PyBytes_Resize(&writer->obj, size)) { + return -1; + } } assert(writer->obj != NULL); } + else if (writer->use_bytearray) { + writer->obj = PyByteArray_FromStringAndSize(NULL, size); + if (writer->obj == NULL) { + return -1; + } + assert((size_t)size > sizeof(writer->small_buffer)); + memcpy(PyByteArray_AS_STRING(writer->obj), + writer->small_buffer, + sizeof(writer->small_buffer)); + } else { writer->obj = PyBytes_FromStringAndSize(NULL, size); if (writer->obj == NULL) { @@ -3815,8 +3826,8 @@ byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) } -PyBytesWriter* -PyBytesWriter_Create(Py_ssize_t size) +static PyBytesWriter* +byteswriter_create(Py_ssize_t size, int use_bytearray) { if (size < 0) { PyErr_SetString(PyExc_ValueError, "size must be >= 0"); @@ -3833,6 +3844,7 @@ PyBytesWriter_Create(Py_ssize_t size) } writer->obj = NULL; writer->size = 0; + writer->use_bytearray = use_bytearray; if (size >= 1) { if (byteswriter_resize(writer, size, 0) < 0) { @@ -3844,6 +3856,18 @@ PyBytesWriter_Create(Py_ssize_t size) return writer; } +PyBytesWriter* +PyBytesWriter_Create(Py_ssize_t size) +{ + return byteswriter_create(size, 0); +} + +PyBytesWriter* +_PyBytesWriter_CreateByteArray(Py_ssize_t size) +{ + return byteswriter_create(size, 1); +} + void PyBytesWriter_Discard(PyBytesWriter *writer) @@ -3865,14 +3889,26 @@ PyBytesWriter_FinishWithSize(PyBytesWriter *writer, Py_ssize_t size) result = bytes_get_empty(); } else if (writer->obj != NULL) { - if (size != PyBytes_GET_SIZE(writer->obj)) { - if (_PyBytes_Resize(&writer->obj, size)) { - goto error; + if (writer->use_bytearray) { + if (size != PyByteArray_GET_SIZE(writer->obj)) { + if (PyByteArray_Resize(writer->obj, size)) { + goto error; + } + } + } + else { + if (size != PyBytes_GET_SIZE(writer->obj)) { + if (_PyBytes_Resize(&writer->obj, size)) { + goto error; + } } } result = writer->obj; writer->obj = NULL; } + else if (writer->use_bytearray) { + result = PyByteArray_FromStringAndSize(writer->small_buffer, size); + } else { result = PyBytes_FromStringAndSize(writer->small_buffer, size); } From 0a70d70925314ec723d1928567328278062262c0 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 26 Mar 2025 17:53:32 +0100 Subject: [PATCH 10/27] Convert _PyBytes_FormatEx() --- Include/internal/pycore_long.h | 2 +- Objects/bytesobject.c | 49 +++++++++++++++------------------- Objects/longobject.c | 20 +++++++++----- 3 files changed, 35 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ed6c435316708e..971d6031972b8a 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -135,7 +135,7 @@ extern int _PyLong_FormatWriter( int alternate); extern char* _PyLong_FormatBytesWriter( - _PyBytesWriter *writer, + PyBytesWriter *writer, char *str, PyObject *obj, int base, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 8ba49b61a958ef..f471fe9d537500 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -431,7 +431,7 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) static char* formatfloat(PyObject *v, int flags, int prec, int type, - PyObject **p_result, _PyBytesWriter *writer, char *str) + PyObject **p_result, PyBytesWriter *writer, char *str) { char *p; PyObject *result; @@ -459,7 +459,8 @@ formatfloat(PyObject *v, int flags, int prec, int type, len = strlen(p); if (writer != NULL) { - str = _PyBytesWriter_Prepare(writer, str, len); + Py_ssize_t resize = PyBytesWriter_GetSize(writer) + len; + str = PyBytesWriter_ResizeAndUpdatePointer(writer, resize, str); if (str == NULL) { PyMem_Free(p); return NULL; @@ -612,12 +613,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, PyObject *args, int use_bytearray) { const char *fmt; - char *res; Py_ssize_t arglen, argidx; Py_ssize_t fmtcnt; int args_owned = 0; PyObject *dict = NULL; - _PyBytesWriter writer; if (args == NULL) { PyErr_BadInternalCall(); @@ -626,14 +625,17 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, fmt = format; fmtcnt = format_len; - _PyBytesWriter_Init(&writer); - writer.use_bytearray = use_bytearray; - - res = _PyBytesWriter_Alloc(&writer, fmtcnt); - if (res == NULL) + PyBytesWriter *writer; + if (use_bytearray) { + writer = _PyBytesWriter_CreateByteArray(fmtcnt); + } + else { + writer = PyBytesWriter_Create(fmtcnt); + } + if (writer == NULL) { return NULL; - if (!use_bytearray) - writer.overallocate = 1; + } + char *res = PyBytesWriter_GetData(writer); if (PyTuple_Check(args)) { arglen = PyTuple_GET_SIZE(args); @@ -836,11 +838,6 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (v == NULL) goto error; - if (fmtcnt == 0) { - /* last write: disable writer overallocation */ - writer.overallocate = 0; - } - sign = 0; fill = ' '; switch (c) { @@ -901,8 +898,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } /* Fast path */ - writer.min_size -= 2; /* size preallocated for "%d" */ - res = _PyLong_FormatBytesWriter(&writer, res, + res = _PyLong_FormatBytesWriter(writer, res, v, base, alternate); if (res == NULL) goto error; @@ -930,8 +926,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, && !(flags & (F_SIGN | F_BLANK))) { /* Fast path */ - writer.min_size -= 2; /* size preallocated for "%f" */ - res = formatfloat(v, flags, prec, c, NULL, &writer, res); + res = formatfloat(v, flags, prec, c, NULL, writer, res); if (res == NULL) goto error; continue; @@ -987,9 +982,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, alloc++; /* 2: size preallocated for %s */ if (alloc > 2) { - res = _PyBytesWriter_Prepare(&writer, res, alloc - 2); - if (res == NULL) + Py_ssize_t resize = PyBytesWriter_GetSize(writer) + alloc - 2; + res = PyBytesWriter_ResizeAndUpdatePointer(writer, resize, res); + if (res == NULL) { goto error; + } } #ifndef NDEBUG char *before = res; @@ -1062,10 +1059,6 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, assert((res - before) == alloc); #endif } /* '%' */ - - /* If overallocation was disabled, ensure that it was the last - write. Otherwise, we missed an optimization */ - assert(writer.overallocate || fmtcnt == 0 || use_bytearray); } /* until end */ if (argidx < arglen && !dict) { @@ -1077,10 +1070,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (args_owned) { Py_DECREF(args); } - return _PyBytesWriter_Finish(&writer, res); + return PyBytesWriter_FinishWithEndPointer(writer, res); error: - _PyBytesWriter_Dealloc(&writer); + PyBytesWriter_Discard(writer); if (args_owned) { Py_DECREF(args); } diff --git a/Objects/longobject.c b/Objects/longobject.c index 984381ff4969d0..7867ae4ff003a4 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2016,7 +2016,7 @@ static int pylong_int_to_decimal_string(PyObject *aa, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, + PyBytesWriter *bytes_writer, char **bytes_str) { PyObject *s = NULL; @@ -2047,7 +2047,9 @@ pylong_int_to_decimal_string(PyObject *aa, Py_ssize_t size = PyUnicode_GET_LENGTH(s); const void *data = PyUnicode_DATA(s); int kind = PyUnicode_KIND(s); - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, size); + Py_ssize_t resize = PyBytesWriter_GetSize(bytes_writer) + size; + *bytes_str = PyBytesWriter_ResizeAndUpdatePointer(bytes_writer, resize, + *bytes_str); if (*bytes_str == NULL) { goto error; } @@ -2084,7 +2086,7 @@ static int long_to_decimal_string_internal(PyObject *aa, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, + PyBytesWriter *bytes_writer, char **bytes_str) { PyLongObject *scratch, *a; @@ -2210,7 +2212,9 @@ long_to_decimal_string_internal(PyObject *aa, } } else if (bytes_writer) { - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, strlen); + Py_ssize_t resize = PyBytesWriter_GetSize(bytes_writer) + strlen; + *bytes_str = PyBytesWriter_ResizeAndUpdatePointer(bytes_writer, resize, + *bytes_str); if (*bytes_str == NULL) { Py_DECREF(scratch); return -1; @@ -2320,7 +2324,7 @@ long_to_decimal_string(PyObject *aa) static int long_format_binary(PyObject *aa, int base, int alternate, PyObject **p_output, _PyUnicodeWriter *writer, - _PyBytesWriter *bytes_writer, char **bytes_str) + PyBytesWriter *bytes_writer, char **bytes_str) { PyLongObject *a = (PyLongObject *)aa; PyObject *v = NULL; @@ -2381,7 +2385,9 @@ long_format_binary(PyObject *aa, int base, int alternate, return -1; } else if (bytes_writer) { - *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, sz); + Py_ssize_t resize = PyBytesWriter_GetSize(bytes_writer) + sz; + *bytes_str = PyBytesWriter_ResizeAndUpdatePointer(bytes_writer, resize, + *bytes_str); if (*bytes_str == NULL) return -1; } @@ -2510,7 +2516,7 @@ _PyLong_FormatWriter(_PyUnicodeWriter *writer, } char* -_PyLong_FormatBytesWriter(_PyBytesWriter *writer, char *str, +_PyLong_FormatBytesWriter(PyBytesWriter *writer, char *str, PyObject *obj, int base, int alternate) { From 457e21a695f0befd4f9d810aa8d4ec1a39df258f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 26 Mar 2025 17:55:18 +0100 Subject: [PATCH 11/27] Rename PyBytesWriter_FinishWithPointer() --- Include/cpython/bytesobject.h | 2 +- Modules/_pickle.c | 2 +- Modules/binascii.c | 6 +++--- Objects/bytesobject.c | 14 +++++++------- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 875e9644503fd7..153ea653b04d43 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -57,7 +57,7 @@ PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithSize( PyBytesWriter *writer, Py_ssize_t size); -PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithEndPointer( +PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithPointer( PyBytesWriter *writer, void *data); diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 7e47eafa09f06b..f32d7de5f9081a 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2672,7 +2672,7 @@ raw_unicode_escape(PyObject *obj) *p++ = (char) ch; } - return PyBytesWriter_FinishWithEndPointer(writer, p); + return PyBytesWriter_FinishWithPointer(writer, p); error: PyBytesWriter_Discard(writer); diff --git a/Modules/binascii.c b/Modules/binascii.c index 02fbec04017ae0..a2e34829b97f4f 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -355,7 +355,7 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - return PyBytesWriter_FinishWithEndPointer(writer, ascii_data); + return PyBytesWriter_FinishWithPointer(writer, ascii_data); } /*[clinic input] @@ -506,7 +506,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode) } done: - return PyBytesWriter_FinishWithEndPointer(writer, bin_data); + return PyBytesWriter_FinishWithPointer(writer, bin_data); error_end: PyBytesWriter_Discard(writer); @@ -586,7 +586,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) if (newline) *ascii_data++ = '\n'; /* Append a courtesy newline */ - return PyBytesWriter_FinishWithEndPointer(writer, ascii_data); + return PyBytesWriter_FinishWithPointer(writer, ascii_data); } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index f471fe9d537500..f94ea8c7e3e3db 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -392,7 +392,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs) return NULL; } - return PyBytesWriter_FinishWithEndPointer(writer, s); + return PyBytesWriter_FinishWithPointer(writer, s); } @@ -1070,7 +1070,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (args_owned) { Py_DECREF(args); } - return PyBytesWriter_FinishWithEndPointer(writer, res); + return PyBytesWriter_FinishWithPointer(writer, res); error: PyBytesWriter_Discard(writer); @@ -1184,7 +1184,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s, } } - return PyBytesWriter_FinishWithEndPointer(writer, p); + return PyBytesWriter_FinishWithPointer(writer, p); failed: PyBytesWriter_Discard(writer); @@ -2593,7 +2593,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) if (view.obj != NULL) { PyBuffer_Release(&view); } - return PyBytesWriter_FinishWithEndPointer(writer, buf); + return PyBytesWriter_FinishWithPointer(writer, buf); error: if (invalid_char == -1) { @@ -2886,7 +2886,7 @@ _PyBytes_FromList(PyObject *x) } *str++ = (char) value; } - return PyBytesWriter_FinishWithEndPointer(writer, str); + return PyBytesWriter_FinishWithPointer(writer, str); error: PyBytesWriter_Discard(writer); @@ -2980,7 +2980,7 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) } *str++ = (char) value; } - return PyBytesWriter_FinishWithEndPointer(writer, str); + return PyBytesWriter_FinishWithPointer(writer, str); error: PyBytesWriter_Discard(writer); @@ -3921,7 +3921,7 @@ PyBytesWriter_Finish(PyBytesWriter *writer) PyObject* -PyBytesWriter_FinishWithEndPointer(PyBytesWriter *writer, void *data) +PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *data) { Py_ssize_t size = (char*)data - byteswriter_data(writer); if (size < 0 || size > byteswriter_allocated(writer)) { From 40ef4e1cc4b21b28ae452e98797c5e8dd40a9791 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 26 Mar 2025 18:10:08 +0100 Subject: [PATCH 12/27] Add PyBytesWriter_GrowAndUpdatePointer() --- Include/cpython/bytesobject.h | 7 ++++ Modules/_pickle.c | 6 ++-- Objects/bytesobject.c | 60 +++++++++++++++++++++++------------ Objects/longobject.c | 15 ++++----- 4 files changed, 54 insertions(+), 34 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 153ea653b04d43..40de7564b882b8 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -84,3 +84,10 @@ PyAPI_FUNC(void*) PyBytesWriter_ResizeAndUpdatePointer( PyBytesWriter *writer, Py_ssize_t size, void *data); +PyAPI_FUNC(int) PyBytesWriter_Grow( + PyBytesWriter *writer, + Py_ssize_t size); +PyAPI_FUNC(void*) PyBytesWriter_GrowAndUpdatePointer( + PyBytesWriter *writer, + Py_ssize_t size, + void *data); diff --git a/Modules/_pickle.c b/Modules/_pickle.c index f32d7de5f9081a..a1ca9875458b7a 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2631,8 +2631,7 @@ raw_unicode_escape(PyObject *obj) /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { /* -1: subtract 1 preallocated byte */ - alloc += 10-1; - p = PyBytesWriter_ResizeAndUpdatePointer(writer, alloc, p); + p = PyBytesWriter_GrowAndUpdatePointer(writer, 10-1, p); if (p == NULL) { goto error; } @@ -2654,8 +2653,7 @@ raw_unicode_escape(PyObject *obj) ch == 0x1a) { /* -1: subtract 1 preallocated byte */ - alloc += 6-1; - p = PyBytesWriter_ResizeAndUpdatePointer(writer, alloc, p); + p = PyBytesWriter_GrowAndUpdatePointer(writer, 6-1, p); if (p == NULL) { goto error; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index f94ea8c7e3e3db..a00a249af2c38f 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -221,8 +221,7 @@ bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos, #define WRITE_BYTES_LEN(str, len_expr) \ do { \ size_t len = (len_expr); \ - alloc += len; \ - s = PyBytesWriter_ResizeAndUpdatePointer(writer, alloc, s); \ + s = PyBytesWriter_GrowAndUpdatePointer(writer, len, s); \ if (s == NULL) { \ goto error; \ } \ @@ -459,8 +458,7 @@ formatfloat(PyObject *v, int flags, int prec, int type, len = strlen(p); if (writer != NULL) { - Py_ssize_t resize = PyBytesWriter_GetSize(writer) + len; - str = PyBytesWriter_ResizeAndUpdatePointer(writer, resize, str); + str = PyBytesWriter_GrowAndUpdatePointer(writer, len, str); if (str == NULL) { PyMem_Free(p); return NULL; @@ -982,8 +980,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, alloc++; /* 2: size preallocated for %s */ if (alloc > 2) { - Py_ssize_t resize = PyBytesWriter_GetSize(writer) + alloc - 2; - res = PyBytesWriter_ResizeAndUpdatePointer(writer, resize, res); + res = PyBytesWriter_GrowAndUpdatePointer(writer, alloc - 2, res); if (res == NULL) { goto error; } @@ -3971,41 +3968,50 @@ PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size) int -PyBytesWriter_WriteBytes(PyBytesWriter *writer, - const void *bytes, Py_ssize_t size) +PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size) { if (size < 0) { - size = strlen(bytes); + PyErr_SetString(PyExc_ValueError, "size must be >= 0"); + return -1; } - Py_ssize_t pos = writer->size; - if (size > PY_SSIZE_T_MAX - pos) { + if (size > PY_SSIZE_T_MAX - writer->size) { PyErr_NoMemory(); return -1; } - Py_ssize_t total = pos + size; + size = writer->size + size; - if (PyBytesWriter_Resize(writer, total) < 0) { + if (byteswriter_resize(writer, size, 1) < 0) { return -1; } - char *buf = byteswriter_data(writer); - memcpy(buf + pos, bytes, size); + writer->size = size; return 0; } int -PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) +PyBytesWriter_WriteBytes(PyBytesWriter *writer, + const void *bytes, Py_ssize_t size) { + if (size < 0) { + size = strlen(bytes); + } + Py_ssize_t pos = writer->size; - Py_ssize_t format_len = strlen(format); - if (format_len > PY_SSIZE_T_MAX - pos) { - PyErr_NoMemory(); + if (PyBytesWriter_Grow(writer, size) < 0) { return -1; } - Py_ssize_t alloc = pos + format_len; + char *buf = byteswriter_data(writer); + memcpy(buf + pos, bytes, size); + return 0; +} + - if (PyBytesWriter_Resize(writer, alloc) < 0) { +int +PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) +{ + Py_ssize_t pos = writer->size; + if (PyBytesWriter_Grow(writer, strlen(format)) < 0) { return -1; } @@ -4029,3 +4035,15 @@ PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, } return byteswriter_data(writer) + pos; } + + +void* +PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *data) +{ + Py_ssize_t pos = (char*)data - byteswriter_data(writer); + if (PyBytesWriter_Grow(writer, size) < 0) { + return NULL; + } + return byteswriter_data(writer) + pos; +} diff --git a/Objects/longobject.c b/Objects/longobject.c index 7867ae4ff003a4..ca3fddc3a593b7 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2047,9 +2047,8 @@ pylong_int_to_decimal_string(PyObject *aa, Py_ssize_t size = PyUnicode_GET_LENGTH(s); const void *data = PyUnicode_DATA(s); int kind = PyUnicode_KIND(s); - Py_ssize_t resize = PyBytesWriter_GetSize(bytes_writer) + size; - *bytes_str = PyBytesWriter_ResizeAndUpdatePointer(bytes_writer, resize, - *bytes_str); + *bytes_str = PyBytesWriter_GrowAndUpdatePointer(bytes_writer, size, + *bytes_str); if (*bytes_str == NULL) { goto error; } @@ -2212,9 +2211,8 @@ long_to_decimal_string_internal(PyObject *aa, } } else if (bytes_writer) { - Py_ssize_t resize = PyBytesWriter_GetSize(bytes_writer) + strlen; - *bytes_str = PyBytesWriter_ResizeAndUpdatePointer(bytes_writer, resize, - *bytes_str); + *bytes_str = PyBytesWriter_GrowAndUpdatePointer(bytes_writer, strlen, + *bytes_str); if (*bytes_str == NULL) { Py_DECREF(scratch); return -1; @@ -2385,9 +2383,8 @@ long_format_binary(PyObject *aa, int base, int alternate, return -1; } else if (bytes_writer) { - Py_ssize_t resize = PyBytesWriter_GetSize(bytes_writer) + sz; - *bytes_str = PyBytesWriter_ResizeAndUpdatePointer(bytes_writer, resize, - *bytes_str); + *bytes_str = PyBytesWriter_GrowAndUpdatePointer(bytes_writer, sz, + *bytes_str); if (*bytes_str == NULL) return -1; } From 0313087add58c8b06d7e6c3ad116fbef9ecf9645 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 10:38:46 +0100 Subject: [PATCH 13/27] Make PyBytesWriter_ResizeAndUpdatePointer() private --- Include/cpython/bytesobject.h | 4 ---- Objects/bytesobject.c | 13 +++++++------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 40de7564b882b8..2db8d384cc5b80 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -80,10 +80,6 @@ PyAPI_FUNC(int) PyBytesWriter_Format( PyAPI_FUNC(int) PyBytesWriter_Resize( PyBytesWriter *writer, Py_ssize_t size); -PyAPI_FUNC(void*) PyBytesWriter_ResizeAndUpdatePointer( - PyBytesWriter *writer, - Py_ssize_t size, - void *data); PyAPI_FUNC(int) PyBytesWriter_Grow( PyBytesWriter *writer, Py_ssize_t size); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a00a249af2c38f..f62a23af29e937 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -36,6 +36,8 @@ class bytes "PyBytesObject *" "&PyBytes_Type" /* Forward declaration */ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str); +static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, + Py_ssize_t size, void *data); #define CHARACTERS _Py_SINGLETON(bytes_characters) @@ -216,7 +218,6 @@ bytes_fromformat(PyBytesWriter *writer, Py_ssize_t writer_pos, char buffer[21]; char *s = (char*)PyBytesWriter_GetData(writer) + writer_pos; - Py_ssize_t alloc = PyBytesWriter_GetSize(writer); #define WRITE_BYTES_LEN(str, len_expr) \ do { \ @@ -2875,7 +2876,7 @@ _PyBytes_FromList(PyObject *x) } if (i >= size) { - str = PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); + str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); if (str == NULL) { goto error; } @@ -2969,7 +2970,7 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) /* Append the byte */ if (i >= size) { - str = PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); + str = _PyBytesWriter_ResizeAndUpdatePointer(writer, size + 1, str); if (str == NULL) { goto error; } @@ -4025,9 +4026,9 @@ PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) } -void* -PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, - void *data) +static void* +_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *data) { Py_ssize_t pos = (char*)data - byteswriter_data(writer); if (PyBytesWriter_Resize(writer, size) < 0) { From c8ac889798725a9084e6bac05789b29cc1ef01a8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 10:48:02 +0100 Subject: [PATCH 14/27] Make PyBytesWriter_GetAllocated() private --- Include/cpython/bytesobject.h | 2 -- Lib/test/test_capi/test_bytes.py | 4 ---- Modules/_testcapi/bytes.c | 14 -------------- Objects/bytesobject.c | 13 +++++++------ 4 files changed, 7 insertions(+), 26 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 2db8d384cc5b80..462f15b005be74 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -65,8 +65,6 @@ PyAPI_FUNC(void*) PyBytesWriter_GetData( PyBytesWriter *writer); PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetSize( PyBytesWriter *writer); -PyAPI_FUNC(Py_ssize_t) PyBytesWriter_GetAllocated( - PyBytesWriter *writer); PyAPI_FUNC(int) PyBytesWriter_WriteBytes( PyBytesWriter *writer, diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 98af97cd740dd0..4023e954ede37f 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -292,7 +292,6 @@ def test_join(self): class BytesWriterTest(unittest.TestCase): - SMALL_BUFFER = 256 # bytes result_type = bytes def create_writer(self, alloc=0, string=b''): @@ -302,17 +301,14 @@ def test_create(self): # Test PyBytesWriter_Create() writer = self.create_writer() self.assertEqual(writer.get_size(), 0) - self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) self.assertEqual(writer.finish(), self.result_type(b'')) writer = self.create_writer(3, b'abc') self.assertEqual(writer.get_size(), 3) - self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) self.assertEqual(writer.finish(), self.result_type(b'abc')) writer = self.create_writer(10, b'abc') self.assertEqual(writer.get_size(), 10) - self.assertEqual(writer.get_allocated(), self.SMALL_BUFFER) self.assertEqual(writer.finish_with_size(3), self.result_type(b'abc')) def test_write_bytes(self): diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 3a3dc44b7e9b69..a7e9c3a0f9650d 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -222,19 +222,6 @@ writer_get_size(PyObject *self_raw, PyObject *Py_UNUSED(args)) } -static PyObject* -writer_get_allocated(PyObject *self_raw, PyObject *Py_UNUSED(args)) -{ - WriterObject *self = (WriterObject *)self_raw; - if (writer_check(self) < 0) { - return NULL; - } - - Py_ssize_t alloc = PyBytesWriter_GetAllocated(self->writer); - return PyLong_FromSsize_t(alloc); -} - - static PyObject* writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) { @@ -273,7 +260,6 @@ static PyMethodDef writer_methods[] = { {"format_i", _PyCFunction_CAST(writer_format_i), METH_VARARGS}, {"resize", _PyCFunction_CAST(writer_resize), METH_VARARGS}, {"get_size", _PyCFunction_CAST(writer_get_size), METH_NOARGS}, - {"get_allocated", _PyCFunction_CAST(writer_get_allocated), METH_NOARGS}, {"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS}, {"finish_with_size", _PyCFunction_CAST(writer_finish_with_size), METH_VARARGS}, {NULL, NULL} /* sentinel */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index f62a23af29e937..d4d585e0be9a67 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -38,6 +38,7 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str); static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, void *data); +static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer); #define CHARACTERS _Py_SINGLETON(bytes_characters) @@ -2859,7 +2860,7 @@ _PyBytes_FromList(PyObject *x) return NULL; } char *str = PyBytesWriter_GetData(writer); - size = PyBytesWriter_GetAllocated(writer); + size = _PyBytesWriter_GetAllocated(writer); for (Py_ssize_t i = 0; i < PyList_GET_SIZE(x); i++) { PyObject *item = PyList_GET_ITEM(x, i); @@ -2880,7 +2881,7 @@ _PyBytes_FromList(PyObject *x) if (str == NULL) { goto error; } - size = PyBytesWriter_GetAllocated(writer); + size = _PyBytesWriter_GetAllocated(writer); } *str++ = (char) value; } @@ -2940,7 +2941,7 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) return NULL; } char *str = PyBytesWriter_GetData(writer); - size = PyBytesWriter_GetAllocated(writer); + size = _PyBytesWriter_GetAllocated(writer); /* Run the iterator to exhaustion */ for (i = 0; ; i++) { @@ -2974,7 +2975,7 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) if (str == NULL) { goto error; } - size = PyBytesWriter_GetAllocated(writer); + size = _PyBytesWriter_GetAllocated(writer); } *str++ = (char) value; } @@ -3946,8 +3947,8 @@ PyBytesWriter_GetSize(PyBytesWriter *writer) } -Py_ssize_t -PyBytesWriter_GetAllocated(PyBytesWriter *writer) +static Py_ssize_t +_PyBytesWriter_GetAllocated(PyBytesWriter *writer) { return byteswriter_allocated(writer); } From 7095ac45c474089bf0ef8aecb2878340633048df Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 10:48:30 +0100 Subject: [PATCH 15/27] Don't overallocate for bytearray() --- Objects/bytesobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index d4d585e0be9a67..8847c36ae0a985 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3775,7 +3775,7 @@ byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int overallocate) return 0; } - if (overallocate) { + if (overallocate && !writer->use_bytearray) { if (size <= (PY_SSIZE_T_MAX - size / OVERALLOCATE_FACTOR)) { size += size / OVERALLOCATE_FACTOR; } From befd574acfb2eb4a403de8a0bb0d4458d1fd6d4b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 10:54:54 +0100 Subject: [PATCH 16/27] Move _PyBytesWriter_CreateByteArray() to the internal C API --- Include/cpython/bytesobject.h | 2 -- Include/internal/pycore_bytesobject.h | 4 ++++ Modules/_testcapi/bytes.c | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 462f15b005be74..17d6b04efb3553 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -48,8 +48,6 @@ typedef struct PyBytesWriter PyBytesWriter; PyAPI_FUNC(PyBytesWriter *) PyBytesWriter_Create( Py_ssize_t size); -PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray( - Py_ssize_t size); PyAPI_FUNC(void) PyBytesWriter_Discard( PyBytesWriter *writer); PyAPI_FUNC(PyObject*) PyBytesWriter_Finish( diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 300e7f4896a39e..f4fe10eb5ad952 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -142,6 +142,10 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, const void *bytes, Py_ssize_t size); +// Export for '_testcapi' shared extension. +PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray( + Py_ssize_t size); + #ifdef __cplusplus } #endif diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index a7e9c3a0f9650d..cad6fc3d610a92 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -1,6 +1,11 @@ +// Use pycore_bytes.h +#define PYTESTCAPI_NEED_INTERNAL_API + #include "parts.h" #include "util.h" +#include "pycore_bytesobject.h" // _PyBytesWriter_CreateByteArray() + /* Test _PyBytes_Resize() */ static PyObject * From 3ba1d1c985f91b069f47f1c8152e24ef092d185c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 10:59:23 +0100 Subject: [PATCH 17/27] Move code --- Objects/bytesobject.c | 48 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 8847c36ae0a985..5351d3d2616f2b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3969,6 +3969,18 @@ PyBytesWriter_Resize(PyBytesWriter *writer, Py_ssize_t size) } +static void* +_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *data) +{ + Py_ssize_t pos = (char*)data - byteswriter_data(writer); + if (PyBytesWriter_Resize(writer, size) < 0) { + return NULL; + } + return byteswriter_data(writer) + pos; +} + + int PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size) { @@ -3991,6 +4003,18 @@ PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size) } +void* +PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, + void *data) +{ + Py_ssize_t pos = (char*)data - byteswriter_data(writer); + if (PyBytesWriter_Grow(writer, size) < 0) { + return NULL; + } + return byteswriter_data(writer) + pos; +} + + int PyBytesWriter_WriteBytes(PyBytesWriter *writer, const void *bytes, Py_ssize_t size) @@ -4025,27 +4049,3 @@ PyBytesWriter_Format(PyBytesWriter *writer, const char *format, ...) Py_ssize_t size = buf - byteswriter_data(writer); return PyBytesWriter_Resize(writer, size); } - - -static void* -_PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, - void *data) -{ - Py_ssize_t pos = (char*)data - byteswriter_data(writer); - if (PyBytesWriter_Resize(writer, size) < 0) { - return NULL; - } - return byteswriter_data(writer) + pos; -} - - -void* -PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, - void *data) -{ - Py_ssize_t pos = (char*)data - byteswriter_data(writer); - if (PyBytesWriter_Grow(writer, size) < 0) { - return NULL; - } - return byteswriter_data(writer) + pos; -} From ede2776e435334bd355c2f852c3d838597414733 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 11:18:22 +0100 Subject: [PATCH 18/27] Add examples --- Lib/test/test_capi/test_bytes.py | 6 ++++ Modules/_testcapi/bytes.c | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 4023e954ede37f..968042d3d51a87 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -364,6 +364,12 @@ def test_format_i(self): writer.format_i(b'y=%i', 456) self.assertEqual(writer.finish(), self.result_type(b'x=123, y=456')) + def test_abc(self): + self.assertEqual(_testcapi.byteswriter_abc(), b'abc') + + def test_resize(self): + self.assertEqual(_testcapi.byteswriter_resize(), b'Hello World') + class ByteArrayWriterTest(BytesWriterTest): result_type = bytearray diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index cad6fc3d610a92..8b6ae8a1a652f7 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -286,9 +286,56 @@ static PyType_Spec Writer_spec = { }; +static PyObject * +byteswriter_abc(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(3); + if (writer == NULL) { + return NULL; + } + + char *str = PyBytesWriter_GetData(writer); + memcpy(str, "abc", 3); + + return PyBytesWriter_Finish(writer); +} + + +static PyObject * +byteswriter_resize(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + // Allocate 10 bytes + PyBytesWriter *writer = PyBytesWriter_Create(10); + if (writer == NULL) { + return NULL; + } + char *buf = PyBytesWriter_GetData(writer); + + // Write some bytes + memcpy(buf, "Hello ", strlen("Hello ")); + buf += strlen("Hello "); + + // Allocate 10 more bytes + buf = PyBytesWriter_GrowAndUpdatePointer(writer, 10, buf); + if (buf == NULL) { + PyBytesWriter_Discard(writer); + return NULL; + } + + // Write more bytes + memcpy(buf, "World", strlen("World")); + buf += strlen("World"); + + // Truncate to the exact size and create a bytes object + return PyBytesWriter_FinishWithPointer(writer, buf); +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, {"bytes_join", bytes_join, METH_VARARGS}, + {"byteswriter_abc", byteswriter_abc, METH_NOARGS}, + {"byteswriter_resize", byteswriter_resize, METH_NOARGS}, {NULL}, }; From be56685fe3341d7b13f72eeb35315846fb969856 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 16:24:18 +0100 Subject: [PATCH 19/27] Add high-level API example --- Include/cpython/bytesobject.h | 4 ++-- Lib/test/test_capi/test_bytes.py | 3 +++ Modules/_testcapi/bytes.c | 22 ++++++++++++++++++++++ Objects/bytesobject.c | 8 ++++---- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 17d6b04efb3553..85bc2b827df8fb 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -57,7 +57,7 @@ PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithSize( Py_ssize_t size); PyAPI_FUNC(PyObject*) PyBytesWriter_FinishWithPointer( PyBytesWriter *writer, - void *data); + void *buf); PyAPI_FUNC(void*) PyBytesWriter_GetData( PyBytesWriter *writer); @@ -82,4 +82,4 @@ PyAPI_FUNC(int) PyBytesWriter_Grow( PyAPI_FUNC(void*) PyBytesWriter_GrowAndUpdatePointer( PyBytesWriter *writer, Py_ssize_t size, - void *data); + void *buf); diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 968042d3d51a87..4238bee5d313db 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -370,6 +370,9 @@ def test_abc(self): def test_resize(self): self.assertEqual(_testcapi.byteswriter_resize(), b'Hello World') + def test_highlevel(self): + self.assertEqual(_testcapi.byteswriter_highlevel(), b'Hello World!') + class ByteArrayWriterTest(BytesWriterTest): result_type = bytearray diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 8b6ae8a1a652f7..388e65456c3a8b 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -331,11 +331,33 @@ byteswriter_resize(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) } +static PyObject * +byteswriter_highlevel(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyBytesWriter *writer = PyBytesWriter_Create(0); + if (writer == NULL) { + goto error; + } + if (PyBytesWriter_WriteBytes(writer, "Hello", -1) < 0) { + goto error; + } + if (PyBytesWriter_Format(writer, " %s!", "World") < 0) { + goto error; + } + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, {"bytes_join", bytes_join, METH_VARARGS}, {"byteswriter_abc", byteswriter_abc, METH_NOARGS}, {"byteswriter_resize", byteswriter_resize, METH_NOARGS}, + {"byteswriter_highlevel", byteswriter_highlevel, METH_NOARGS}, {NULL}, }; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 5351d3d2616f2b..9affd7e6f4202f 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3920,9 +3920,9 @@ PyBytesWriter_Finish(PyBytesWriter *writer) PyObject* -PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *data) +PyBytesWriter_FinishWithPointer(PyBytesWriter *writer, void *buf) { - Py_ssize_t size = (char*)data - byteswriter_data(writer); + Py_ssize_t size = (char*)buf - byteswriter_data(writer); if (size < 0 || size > byteswriter_allocated(writer)) { PyBytesWriter_Discard(writer); PyErr_SetString(PyExc_ValueError, "invalid end pointer"); @@ -4005,9 +4005,9 @@ PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size) void* PyBytesWriter_GrowAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, - void *data) + void *buf) { - Py_ssize_t pos = (char*)data - byteswriter_data(writer); + Py_ssize_t pos = (char*)buf - byteswriter_data(writer); if (PyBytesWriter_Grow(writer, size) < 0) { return NULL; } From 1135390112c083640de2b28def7833f3f81fe006 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 16:28:04 +0100 Subject: [PATCH 20/27] Fix tests --- Objects/bytesobject.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 9affd7e6f4202f..7a629afd50b8be 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2549,7 +2549,12 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) } /* This overestimates if there are spaces */ - writer = _PyBytesWriter_CreateByteArray(hexlen / 2); + if (use_bytearray) { + writer = _PyBytesWriter_CreateByteArray(hexlen / 2); + } + else { + writer = PyBytesWriter_Create(hexlen / 2); + } if (writer == NULL) { goto release_buffer; } @@ -3736,6 +3741,9 @@ byteswriter_data(PyBytesWriter *writer) if (writer->obj == NULL) { return writer->small_buffer; } + else if (writer->use_bytearray) { + return PyByteArray_AS_STRING(writer->obj); + } else { return PyBytes_AS_STRING(writer->obj); } From 000ba585c284493df20afb116c0e4f4008e27016 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 16:36:55 +0100 Subject: [PATCH 21/27] fix linter --- Lib/test/test_capi/test_bytes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index 4238bee5d313db..be82a2985eb4cd 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -364,13 +364,13 @@ def test_format_i(self): writer.format_i(b'y=%i', 456) self.assertEqual(writer.finish(), self.result_type(b'x=123, y=456')) - def test_abc(self): + def test_example_abc(self): self.assertEqual(_testcapi.byteswriter_abc(), b'abc') - def test_resize(self): + def test_example_resize(self): self.assertEqual(_testcapi.byteswriter_resize(), b'Hello World') - def test_highlevel(self): + def test_example_highlevel(self): self.assertEqual(_testcapi.byteswriter_highlevel(), b'Hello World!') From b864c26eddc1f2d69f053b853220548381c1b3ed Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 16:41:16 +0100 Subject: [PATCH 22/27] Convert more functions Replace PyBytes_FromStringAndSize(NULL, 0) with Py_GetConstant(Py_CONSTANT_EMPTY_BYTES). --- Modules/_bz2module.c | 4 +- Modules/_codecsmodule.c | 67 +++++++++++++--------------- Modules/_dbmmodule.c | 5 +-- Modules/_hashopenssl.c | 12 +++--- Modules/_lzmamodule.c | 19 ++++---- Modules/binascii.c | 37 ++++++++-------- Modules/mmapmodule.c | 10 ++--- Modules/posixmodule.c | 96 +++++++++++++++-------------------------- Modules/socketmodule.c | 13 +++--- Modules/zlibmodule.c | 30 ++++++------- Objects/bytesobject.c | 60 ++++++++++++++------------ Objects/longobject.c | 13 +++--- Objects/memoryobject.c | 34 ++++++++------- Objects/unicodeobject.c | 8 ++-- 14 files changed, 185 insertions(+), 223 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 9e85e0de42cd8d..815cac652707b5 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -668,9 +668,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) self->bzs_avail_in_real = 0; self->input_buffer = NULL; self->input_buffer_size = 0; - self->unused_data = PyBytes_FromStringAndSize(NULL, 0); - if (self->unused_data == NULL) - goto error; + self->unused_data = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); if (catch_bz2_error(bzerror)) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 7cf3f152eeecc6..7478ae20e83703 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -201,52 +201,45 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data, const char *errors) /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/ { - Py_ssize_t size; - Py_ssize_t newsize; - PyObject *v; - - size = PyBytes_GET_SIZE(data); + Py_ssize_t size = PyBytes_GET_SIZE(data); if (size > PY_SSIZE_T_MAX / 4) { PyErr_SetString(PyExc_OverflowError, "string is too large to encode"); return NULL; } - newsize = 4*size; - v = PyBytes_FromStringAndSize(NULL, newsize); + Py_ssize_t alloc_size = 4*size; - if (v == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(alloc_size); + if (writer == NULL) { return NULL; } - else { - Py_ssize_t i; - char c; - char *p = PyBytes_AS_STRING(v); - - for (i = 0; i < size; i++) { - /* There's at least enough room for a hex escape */ - assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4); - c = PyBytes_AS_STRING(data)[i]; - if (c == '\'' || c == '\\') - *p++ = '\\', *p++ = c; - else if (c == '\t') - *p++ = '\\', *p++ = 't'; - else if (c == '\n') - *p++ = '\\', *p++ = 'n'; - else if (c == '\r') - *p++ = '\\', *p++ = 'r'; - else if (c < ' ' || c >= 0x7f) { - *p++ = '\\'; - *p++ = 'x'; - *p++ = Py_hexdigits[(c & 0xf0) >> 4]; - *p++ = Py_hexdigits[c & 0xf]; - } - else - *p++ = c; - } - *p = '\0'; - if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) { - return NULL; + char *p = PyBytesWriter_GetData(writer); + + for (Py_ssize_t i = 0; i < size; i++) { + /* There's at least enough room for a hex escape */ + assert(alloc_size - (p - (char*)PyBytesWriter_GetData(writer)) >= 4); + char c = PyBytes_AS_STRING(data)[i]; + if (c == '\'' || c == '\\') + *p++ = '\\', *p++ = c; + else if (c == '\t') + *p++ = '\\', *p++ = 't'; + else if (c == '\n') + *p++ = '\\', *p++ = 'n'; + else if (c == '\r') + *p++ = '\\', *p++ = 'r'; + else if (c < ' ' || c >= 0x7f) { + *p++ = '\\'; + *p++ = 'x'; + *p++ = Py_hexdigits[(c & 0xf0) >> 4]; + *p++ = Py_hexdigits[c & 0xf]; } + else + *p++ = c; + } + + PyObject *v = PyBytesWriter_FinishWithPointer(writer, p); + if (v == NULL) { + return NULL; } return codec_tuple(v, size); diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index cc65cbd98d71dc..99b321d46e2830 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -401,10 +401,7 @@ _dbm_dbm_setdefault_impl(dbmobject *self, PyTypeObject *cls, const char *key, return PyBytes_FromStringAndSize(val.dptr, val.dsize); } if (default_value == NULL) { - default_value = PyBytes_FromStringAndSize(NULL, 0); - if (default_value == NULL) { - return NULL; - } + default_value = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); val.dptr = NULL; val.dsize = 0; } diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 08f5c0ece0a18c..4ceb7f528f4514 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -806,15 +806,15 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length) /*[clinic end generated code: output=ef9320c23280efad input=816a6537cea3d1db]*/ { EVP_MD_CTX *temp_ctx; - PyObject *retval = PyBytes_FromStringAndSize(NULL, length); - if (retval == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; } temp_ctx = EVP_MD_CTX_new(); if (temp_ctx == NULL) { - Py_DECREF(retval); + PyBytesWriter_Discard(writer); PyErr_NoMemory(); return NULL; } @@ -823,17 +823,17 @@ EVPXOF_digest_impl(EVPobject *self, Py_ssize_t length) goto error; } if (!EVP_DigestFinalXOF(temp_ctx, - (unsigned char*)PyBytes_AS_STRING(retval), + (unsigned char*)PyBytesWriter_GetData(writer), length)) { goto error; } EVP_MD_CTX_free(temp_ctx); - return retval; + return PyBytesWriter_Finish(writer); error: - Py_DECREF(retval); + PyBytesWriter_Discard(writer); EVP_MD_CTX_free(temp_ctx); notify_ssl_error_occurred(); return NULL; diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index 0058e2eec2ef16..422696f3103ff4 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -1259,10 +1259,7 @@ _lzma_LZMADecompressor_impl(PyTypeObject *type, int format, self->needs_input = 1; self->input_buffer = NULL; self->input_buffer_size = 0; - Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0)); - if (self->unused_data == NULL) { - goto error; - } + Py_XSETREF(self->unused_data, Py_GetConstant(Py_CONSTANT_EMPTY_BYTES)); switch (format) { case FORMAT_AUTO: @@ -1441,7 +1438,7 @@ _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter) { lzma_ret lzret; uint32_t encoded_size; - PyObject *result = NULL; + PyBytesWriter *writer = NULL; _lzma_state *state = get_lzma_state(module); assert(state != NULL); @@ -1449,20 +1446,20 @@ _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter) if (catch_lzma_error(state, lzret)) goto error; - result = PyBytes_FromStringAndSize(NULL, encoded_size); - if (result == NULL) + writer = PyBytesWriter_Create(encoded_size); + if (writer == NULL) { goto error; + } - lzret = lzma_properties_encode( - &filter, (uint8_t *)PyBytes_AS_STRING(result)); + lzret = lzma_properties_encode(&filter, PyBytesWriter_GetData(writer)); if (catch_lzma_error(state, lzret)) { goto error; } - return result; + return PyBytesWriter_Finish(writer); error: - Py_XDECREF(result); + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/binascii.c b/Modules/binascii.c index a2e34829b97f4f..04dc37602a7572 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -205,11 +205,9 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/ { const unsigned char *ascii_data; - unsigned char *bin_data; int leftbits = 0; unsigned char this_ch; unsigned int leftchar = 0; - PyObject *rv; Py_ssize_t ascii_len, bin_len; binascii_state *state; @@ -223,9 +221,11 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) ascii_len--; /* Allocate the buffer */ - if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) + PyBytesWriter *writer = PyBytesWriter_Create(bin_len); + if (writer == NULL) { return NULL; - bin_data = (unsigned char *)PyBytes_AS_STRING(rv); + } + unsigned char *bin_data = PyBytesWriter_GetData(writer); for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) { /* XXX is it really best to add NULs if there's no more data */ @@ -248,8 +248,7 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) return NULL; } PyErr_SetString(state->Error, "Illegal char"); - Py_DECREF(rv); - return NULL; + goto error; } this_ch = (this_ch - ' ') & 077; } @@ -280,11 +279,14 @@ binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) return NULL; } PyErr_SetString(state->Error, "Trailing garbage"); - Py_DECREF(rv); - return NULL; + goto error; } } - return rv; + return PyBytesWriter_Finish(writer); + +error: + PyBytesWriter_Discard(writer); + return NULL; } /*[clinic input] @@ -888,8 +890,6 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) { const char* argbuf; Py_ssize_t arglen; - PyObject *retval; - char* retbuf; Py_ssize_t i, j; binascii_state *state; @@ -911,10 +911,11 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) return NULL; } - retval = PyBytes_FromStringAndSize(NULL, (arglen/2)); - if (!retval) + PyBytesWriter *writer = PyBytesWriter_Create(arglen/2); + if (writer == NULL) { return NULL; - retbuf = PyBytes_AS_STRING(retval); + } + char *retbuf = PyBytesWriter_GetData(writer); for (i=j=0; i < arglen; i += 2) { unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])]; @@ -926,14 +927,14 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) } PyErr_SetString(state->Error, "Non-hexadecimal digit found"); - goto finally; + goto error; } retbuf[j++] = (top << 4) + bot; } - return retval; + return PyBytesWriter_Finish(writer); - finally: - Py_DECREF(retval); +error: + PyBytesWriter_Discard(writer); return NULL; } diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 67fd6db2f361d6..5dced092eb74d9 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -451,14 +451,14 @@ _safe_PyBytes_FromStringAndSize(char *start, size_t num_bytes) { } } else { - PyObject *result = PyBytes_FromStringAndSize(NULL, num_bytes); - if (result == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(num_bytes); + if (writer == NULL) { return NULL; } - if (safe_memcpy(PyBytes_AS_STRING(result), start, num_bytes) < 0) { - Py_CLEAR(result); + if (safe_memcpy(PyBytesWriter_GetData(writer), start, num_bytes) < 0) { + PyBytesWriter_Discard(writer); } - return result; + return PyBytesWriter_Finish(writer); } } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f3ce1fb632226e..7b9d34024a71b3 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -11436,9 +11436,6 @@ static PyObject * os_read_impl(PyObject *module, int fd, Py_ssize_t length) /*[clinic end generated code: output=dafbe9a5cddb987b input=1df2eaa27c0bf1d3]*/ { - Py_ssize_t n; - PyObject *buffer; - if (length < 0) { errno = EINVAL; return posix_error(); @@ -11446,20 +11443,18 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length) length = Py_MIN(length, _PY_READ_MAX); - buffer = PyBytes_FromStringAndSize((char *)NULL, length); - if (buffer == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; + } - n = _Py_read(fd, PyBytes_AS_STRING(buffer), length); + Py_ssize_t n = _Py_read(fd, PyBytesWriter_GetData(writer), length); if (n == -1) { - Py_DECREF(buffer); + PyBytesWriter_Discard(writer); return NULL; } - if (n != length) - _PyBytes_Resize(&buffer, n); - - return buffer; + return PyBytesWriter_FinishWithSize(writer, n); } /*[clinic input] @@ -11635,20 +11630,20 @@ os_pread_impl(PyObject *module, int fd, Py_ssize_t length, Py_off_t offset) { Py_ssize_t n; int async_err = 0; - PyObject *buffer; if (length < 0) { errno = EINVAL; return posix_error(); } - buffer = PyBytes_FromStringAndSize((char *)NULL, length); - if (buffer == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; + } do { Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH - n = pread(fd, PyBytes_AS_STRING(buffer), length, offset); + n = pread(fd, PyBytesWriter_GetData(writer), length, offset); _Py_END_SUPPRESS_IPH Py_END_ALLOW_THREADS } while (n < 0 && errno == EINTR && !(async_err = PyErr_CheckSignals())); @@ -11657,12 +11652,10 @@ os_pread_impl(PyObject *module, int fd, Py_ssize_t length, Py_off_t offset) if (!async_err) { posix_error(); } - Py_DECREF(buffer); + PyBytesWriter_Discard(writer); return NULL; } - if (n != length) - _PyBytes_Resize(&buffer, n); - return buffer; + return PyBytesWriter_FinishWithSize(writer, n); } #endif /* HAVE_PREAD */ @@ -14864,9 +14857,6 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, int follow_symlinks) /*[clinic end generated code: output=5f2f44200a43cff2 input=025789491708f7eb]*/ { - Py_ssize_t i; - PyObject *buffer = NULL; - if (fd_and_follow_symlinks_invalid("getxattr", path->fd, follow_symlinks)) return NULL; @@ -14874,8 +14864,7 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, return NULL; } - for (i = 0; ; i++) { - void *ptr; + for (Py_ssize_t i = 0; ; i++) { ssize_t result; static const Py_ssize_t buffer_sizes[] = {128, XATTR_SIZE_MAX, 0}; Py_ssize_t buffer_size = buffer_sizes[i]; @@ -14883,10 +14872,11 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, path_error(path); return NULL; } - buffer = PyBytes_FromStringAndSize(NULL, buffer_size); - if (!buffer) + PyBytesWriter *writer = PyBytesWriter_Create(buffer_size); + if (writer == NULL) { return NULL; - ptr = PyBytes_AS_STRING(buffer); + } + void *ptr = PyBytesWriter_GetData(writer); Py_BEGIN_ALLOW_THREADS; if (path->fd >= 0) @@ -14898,23 +14888,16 @@ os_getxattr_impl(PyObject *module, path_t *path, path_t *attribute, Py_END_ALLOW_THREADS; if (result < 0) { + PyBytesWriter_Discard(writer); if (errno == ERANGE) { - Py_DECREF(buffer); continue; } path_error(path); - Py_DECREF(buffer); return NULL; } - if (result != buffer_size) { - /* Can only shrink. */ - _PyBytes_Resize(&buffer, result); - } - break; + return PyBytesWriter_FinishWithSize(writer, result); } - - return buffer; } @@ -15138,22 +15121,22 @@ static PyObject * os_urandom_impl(PyObject *module, Py_ssize_t size) /*[clinic end generated code: output=42c5cca9d18068e9 input=4067cdb1b6776c29]*/ { - PyObject *bytes; - int result; - - if (size < 0) + if (size < 0) { return PyErr_Format(PyExc_ValueError, "negative argument not allowed"); - bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) + } + + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; + } - result = _PyOS_URandom(PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes)); + int result = _PyOS_URandom(PyBytesWriter_GetData(writer), size); if (result == -1) { - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } - return bytes; + return PyBytesWriter_Finish(writer); } #ifdef HAVE_MEMFD_CREATE @@ -16616,25 +16599,20 @@ static PyObject * os_getrandom_impl(PyObject *module, Py_ssize_t size, int flags) /*[clinic end generated code: output=b3a618196a61409c input=59bafac39c594947]*/ { - PyObject *bytes; - Py_ssize_t n; - if (size < 0) { errno = EINVAL; return posix_error(); } - bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) { - PyErr_NoMemory(); + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; } + void *data = PyBytesWriter_GetData(writer); + Py_ssize_t n; while (1) { - n = syscall(SYS_getrandom, - PyBytes_AS_STRING(bytes), - PyBytes_GET_SIZE(bytes), - flags); + n = syscall(SYS_getrandom, data, size, flags); if (n < 0 && errno == EINTR) { if (PyErr_CheckSignals() < 0) { goto error; @@ -16651,14 +16629,10 @@ os_getrandom_impl(PyObject *module, Py_ssize_t size, int flags) goto error; } - if (n != size) { - _PyBytes_Resize(&bytes, n); - } - - return bytes; + return PyBytesWriter_FinishWithSize(writer, n); error: - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } #endif /* HAVE_GETRANDOM_SYSCALL */ diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 2e48a72a72f27c..92b8fc6e8d10c8 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -3313,7 +3313,6 @@ sock_getsockopt(PyObject *self, PyObject *args) int level; int optname; int res; - PyObject *buf; socklen_t buflen = 0; int flag = 0; socklen_t flagsize; @@ -3358,17 +3357,17 @@ sock_getsockopt(PyObject *self, PyObject *args) "getsockopt buflen out of range"); return NULL; } - buf = PyBytes_FromStringAndSize((char *)NULL, buflen); - if (buf == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(buflen); + if (writer == NULL) { return NULL; + } res = getsockopt(get_sock_fd(s), level, optname, - (void *)PyBytes_AS_STRING(buf), &buflen); + PyBytesWriter_GetData(writer), &buflen); if (res < 0) { - Py_DECREF(buf); + PyBytesWriter_Discard(writer); return s->errorhandler(); } - _PyBytes_Resize(&buf, buflen); - return buf; + return PyBytesWriter_FinishWithSize(writer, buflen); } PyDoc_STRVAR(getsockopt_doc, diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index d4b4b91697c08e..1a8acfada325d4 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -816,22 +816,24 @@ save_unconsumed_input(compobject *self, Py_buffer *data, int err) input data in self->unused_data. */ if (self->zst.avail_in > 0) { Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data); - Py_ssize_t new_size, left_size; - PyObject *new_data; + Py_ssize_t left_size; left_size = (Byte *)data->buf + data->len - self->zst.next_in; if (left_size > (PY_SSIZE_T_MAX - old_size)) { PyErr_NoMemory(); return -1; } - new_size = old_size + left_size; - new_data = PyBytes_FromStringAndSize(NULL, new_size); - if (new_data == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(old_size + left_size); + if (writer == NULL) { return -1; - memcpy(PyBytes_AS_STRING(new_data), - PyBytes_AS_STRING(self->unused_data), old_size); - memcpy(PyBytes_AS_STRING(new_data) + old_size, - self->zst.next_in, left_size); - Py_SETREF(self->unused_data, new_data); + } + char *new_data = PyBytesWriter_GetData(writer); + memcpy(new_data, PyBytes_AS_STRING(self->unused_data), old_size); + memcpy(new_data + old_size, self->zst.next_in, left_size); + PyObject *new_unused_data = PyBytesWriter_Finish(writer); + if (new_unused_data == NULL) { + return -1; + } + Py_SETREF(self->unused_data, new_unused_data); self->zst.avail_in = 0; } } @@ -994,7 +996,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in doing any work at all; just return an empty string. */ if (mode == Z_NO_FLUSH) { - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } ENTER_ZLIB(self); @@ -1744,11 +1746,7 @@ ZlibDecompressor__new__(PyTypeObject *cls, self->zst.zfree = PyZlib_Free; self->zst.next_in = NULL; self->zst.avail_in = 0; - self->unused_data = PyBytes_FromStringAndSize(NULL, 0); - if (self->unused_data == NULL) { - Py_CLEAR(self); - return NULL; - } + self->unused_data = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); self->lock = PyThread_allocate_lock(); if (self->lock == NULL) { Py_DECREF(self); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 7a629afd50b8be..7019a73672d6e0 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1464,12 +1464,16 @@ bytes_concat(PyObject *a, PyObject *b) goto done; } - result = PyBytes_FromStringAndSize(NULL, va.len + vb.len); - if (result != NULL) { - memcpy(PyBytes_AS_STRING(result), va.buf, va.len); - memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len); + PyBytesWriter *writer = PyBytesWriter_Create(va.len + vb.len); + if (writer == NULL) { + goto done; } + char *data = PyBytesWriter_GetData(writer); + memcpy(data, va.buf, va.len); + memcpy(data + va.len, vb.buf, vb.len); + result = PyBytesWriter_Finish(writer); + done: if (va.len != -1) PyBuffer_Release(&va); @@ -1646,8 +1650,6 @@ bytes_subscript(PyObject *op, PyObject* item) Py_ssize_t start, stop, step, slicelength, i; size_t cur; const char* source_buf; - char* result_buf; - PyObject* result; if (PySlice_Unpack(item, &start, &stop, &step) < 0) { return NULL; @@ -1670,17 +1672,18 @@ bytes_subscript(PyObject *op, PyObject* item) } else { source_buf = PyBytes_AS_STRING(self); - result = PyBytes_FromStringAndSize(NULL, slicelength); - if (result == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(slicelength); + if (writer == NULL) { return NULL; + } + char *buf = PyBytesWriter_GetData(writer); - result_buf = PyBytes_AS_STRING(result); for (cur = start, i = 0; i < slicelength; cur += step, i++) { - result_buf[i] = source_buf[cur]; + buf[i] = source_buf[cur]; } - return result; + return PyBytesWriter_Finish(writer); } } else { @@ -2763,7 +2766,7 @@ bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, "errors without a string argument"); return NULL; } - bytes = PyBytes_FromStringAndSize(NULL, 0); + bytes = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else if (encoding != NULL) { /* Encode via the codec registry */ @@ -2835,23 +2838,25 @@ bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, static PyObject* _PyBytes_FromBuffer(PyObject *x) { - PyObject *new; Py_buffer view; - if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0) return NULL; - new = PyBytes_FromStringAndSize(NULL, view.len); - if (!new) + PyBytesWriter *writer = PyBytesWriter_Create(view.len); + if (writer == NULL) { goto fail; - if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval, - &view, view.len, 'C') < 0) + } + + if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer), + &view, view.len, 'C') < 0) { goto fail; + } + PyBuffer_Release(&view); - return new; + return PyBytesWriter_Finish(writer); fail: - Py_XDECREF(new); + PyBytesWriter_Discard(writer); PyBuffer_Release(&view); return NULL; } @@ -2900,16 +2905,15 @@ _PyBytes_FromList(PyObject *x) static PyObject* _PyBytes_FromTuple(PyObject *x) { - PyObject *bytes; Py_ssize_t i, size = PyTuple_GET_SIZE(x); Py_ssize_t value; - char *str; PyObject *item; - bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(size); + if (writer == NULL) { return NULL; - str = ((PyBytesObject *)bytes)->ob_sval; + } + char *str = PyBytesWriter_GetData(writer); for (i = 0; i < size; i++) { item = PyTuple_GET_ITEM(x, i); @@ -2924,10 +2928,10 @@ _PyBytes_FromTuple(PyObject *x) } *str++ = (char) value; } - return bytes; + return PyBytesWriter_Finish(writer); error: - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } @@ -3652,7 +3656,7 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) if (size == 0 && !writer->use_bytearray) { Py_CLEAR(writer->buffer); /* Get the empty byte string singleton */ - result = PyBytes_FromStringAndSize(NULL, 0); + result = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else if (writer->use_small_buffer) { if (writer->use_bytearray) { diff --git a/Objects/longobject.c b/Objects/longobject.c index ca3fddc3a593b7..c9736dbab24e48 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6379,8 +6379,6 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder, /*[clinic end generated code: output=89c801df114050a3 input=a0103d0e9ad85c2b]*/ { int little_endian; - PyObject *bytes; - if (byteorder == NULL) little_endian = 0; else if (_PyUnicode_Equal(byteorder, &_Py_ID(little))) @@ -6399,18 +6397,19 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder, return NULL; } - bytes = PyBytes_FromStringAndSize(NULL, length); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(length); + if (writer == NULL) { return NULL; + } if (_PyLong_AsByteArray((PyLongObject *)self, - (unsigned char *)PyBytes_AS_STRING(bytes), + PyBytesWriter_GetData(writer), length, little_endian, is_signed, 1) < 0) { - Py_DECREF(bytes); + PyBytesWriter_Discard(writer); return NULL; } - return bytes; + return PyBytesWriter_Finish(writer); } /*[clinic input] diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index cf673fb379edcd..1e66ae062ddc58 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -2284,7 +2284,6 @@ memoryview_tobytes_impl(PyMemoryViewObject *self, const char *order) { Py_buffer *src = VIEW_ADDR(self); char ord = 'C'; - PyObject *bytes; CHECK_RELEASED(self); @@ -2302,16 +2301,18 @@ memoryview_tobytes_impl(PyMemoryViewObject *self, const char *order) } } - bytes = PyBytes_FromStringAndSize(NULL, src->len); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(src->len); + if (writer == NULL) { return NULL; + } - if (PyBuffer_ToContiguous(PyBytes_AS_STRING(bytes), src, src->len, ord) < 0) { - Py_DECREF(bytes); + if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer), + src, src->len, ord) < 0) { + PyBytesWriter_Discard(writer); return NULL; } - return bytes; + return PyBytesWriter_Finish(writer); } /*[clinic input] @@ -2343,8 +2344,6 @@ memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, /*[clinic end generated code: output=430ca760f94f3ca7 input=539f6a3a5fb56946]*/ { Py_buffer *src = VIEW_ADDR(self); - PyObject *bytes; - PyObject *ret; CHECK_RELEASED(self); @@ -2352,19 +2351,22 @@ memoryview_hex_impl(PyMemoryViewObject *self, PyObject *sep, return _Py_strhex_with_sep(src->buf, src->len, sep, bytes_per_sep); } - bytes = PyBytes_FromStringAndSize(NULL, src->len); - if (bytes == NULL) + PyBytesWriter *writer = PyBytesWriter_Create(src->len); + if (writer == NULL) { return NULL; + } - if (PyBuffer_ToContiguous(PyBytes_AS_STRING(bytes), src, src->len, 'C') < 0) { - Py_DECREF(bytes); + if (PyBuffer_ToContiguous(PyBytesWriter_GetData(writer), + src, src->len, 'C') < 0) { + PyBytesWriter_Discard(writer); return NULL; } - ret = _Py_strhex_with_sep( - PyBytes_AS_STRING(bytes), PyBytes_GET_SIZE(bytes), - sep, bytes_per_sep); - Py_DECREF(bytes); + PyObject *ret = _Py_strhex_with_sep( + PyBytesWriter_GetData(writer), + PyBytesWriter_GetSize(writer), + sep, bytes_per_sep); + PyBytesWriter_Discard(writer); return ret; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3e0bd90c17995f..7e722d0ab718b4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4931,7 +4931,7 @@ _PyUnicode_EncodeUTF7(PyObject *str, len = PyUnicode_GET_LENGTH(str); if (len == 0) - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); /* It might be possible to tighten this worst case */ if (len > PY_SSIZE_T_MAX / 8) @@ -6922,7 +6922,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode) len = PyUnicode_GET_LENGTH(unicode); if (len == 0) { - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } kind = PyUnicode_KIND(unicode); @@ -7372,7 +7372,7 @@ unicode_encode_ucs1(PyObject *unicode, /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ if (size == 0) - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); _PyBytesWriter_Init(&writer); str = _PyBytesWriter_Alloc(&writer, size); @@ -8317,7 +8317,7 @@ encode_code_page(int code_page, } if (len == 0) - return PyBytes_FromStringAndSize(NULL, 0); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); offset = 0; do From 6d7e37dd9c6d14d8d2e8ea94077e577b39edf6fe Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 27 Mar 2025 17:53:42 +0100 Subject: [PATCH 23/27] Convert _hashopenssl function --- Modules/_hashopenssl.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 4ceb7f528f4514..b0f4b0906e833f 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -1414,8 +1414,6 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, long maxmem, long dklen) /*[clinic end generated code: output=14849e2aa2b7b46c input=48a7d63bf3f75c42]*/ { - PyObject *key_obj = NULL; - char *key; int retval; unsigned long n, r, p; @@ -1486,27 +1484,27 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, return NULL; } - key_obj = PyBytes_FromStringAndSize(NULL, dklen); - if (key_obj == NULL) { + PyBytesWriter *writer = PyBytesWriter_Create(dklen); + if (writer == NULL) { return NULL; } - key = PyBytes_AS_STRING(key_obj); + unsigned char *key = PyBytesWriter_GetData(writer); Py_BEGIN_ALLOW_THREADS retval = EVP_PBE_scrypt( (const char*)password->buf, (size_t)password->len, (const unsigned char *)salt->buf, (size_t)salt->len, n, r, p, maxmem, - (unsigned char *)key, (size_t)dklen + key, (size_t)dklen ); Py_END_ALLOW_THREADS if (!retval) { - Py_CLEAR(key_obj); + PyBytesWriter_Discard(writer); notify_ssl_error_occurred(); return NULL; } - return key_obj; + return PyBytesWriter_Finish(writer); } #endif /* PY_OPENSSL_HAS_SCRYPT */ From d8a4659ced24a2544f158ae4bb8e17a30b44a5b3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 31 Mar 2025 14:10:31 +0200 Subject: [PATCH 24/27] Detect strlen() overflow --- Objects/bytesobject.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 7019a73672d6e0..e02564563d3cf4 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -4032,7 +4032,12 @@ PyBytesWriter_WriteBytes(PyBytesWriter *writer, const void *bytes, Py_ssize_t size) { if (size < 0) { - size = strlen(bytes); + size_t len = strlen(bytes); + if (len > (size_t)PY_SSIZE_T_MAX) { + PyErr_NoMemory(); + return NULL; + } + size = (Py_ssize_t)len; } Py_ssize_t pos = writer->size; From ed00f952146bbcb3c3afda90730bf420ebb40214 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 31 Mar 2025 18:48:59 +0200 Subject: [PATCH 25/27] Fix mmap --- Modules/mmapmodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 5dced092eb74d9..697ca88c07c34f 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -457,6 +457,7 @@ _safe_PyBytes_FromStringAndSize(char *start, size_t num_bytes) { } if (safe_memcpy(PyBytesWriter_GetData(writer), start, num_bytes) < 0) { PyBytesWriter_Discard(writer); + return NULL; } return PyBytesWriter_Finish(writer); } From 630789529cfe58842abe677207434478b9ef43ac Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 31 Mar 2025 18:49:04 +0200 Subject: [PATCH 26/27] Grow() can now shrink the buffer --- Objects/bytesobject.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index e02564563d3cf4..1c11b22f973b8b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3996,11 +3996,10 @@ _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, Py_ssize_t size, int PyBytesWriter_Grow(PyBytesWriter *writer, Py_ssize_t size) { - if (size < 0) { - PyErr_SetString(PyExc_ValueError, "size must be >= 0"); + if (size < 0 && writer->size + size < 0) { + PyErr_SetString(PyExc_ValueError, "invalid size"); return -1; } - if (size > PY_SSIZE_T_MAX - writer->size) { PyErr_NoMemory(); return -1; From 18d41ffdbfd1d167b0ebf55fad145113e69e7847 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 31 Mar 2025 18:51:15 +0200 Subject: [PATCH 27/27] Fix WriteBytes() --- Objects/bytesobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 1c11b22f973b8b..d160491aa263c0 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -4034,7 +4034,7 @@ PyBytesWriter_WriteBytes(PyBytesWriter *writer, size_t len = strlen(bytes); if (len > (size_t)PY_SSIZE_T_MAX) { PyErr_NoMemory(); - return NULL; + return -1; } size = (Py_ssize_t)len; }