mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Commit strict str/bytes distinction.
From now on, trying to write str to a binary stream is an error (I'm still working on the reverse). There are still (at least) two failing tests: - test_asynchat - test_urllib2_localnet but I'm sure these will be fixed by someone.
This commit is contained in:
		
							parent
							
								
									245b42ec4b
								
							
						
					
					
						commit
						a74184eb1d
					
				
					 3 changed files with 59 additions and 119 deletions
				
			
		
							
								
								
									
										13
									
								
								Lib/io.py
									
										
									
									
									
								
							
							
						
						
									
										13
									
								
								Lib/io.py
									
										
									
									
									
								
							| 
						 | 
					@ -659,12 +659,14 @@ def read1(self, n):
 | 
				
			||||||
    def write(self, b):
 | 
					    def write(self, b):
 | 
				
			||||||
        if self.closed:
 | 
					        if self.closed:
 | 
				
			||||||
            raise ValueError("write to closed file")
 | 
					            raise ValueError("write to closed file")
 | 
				
			||||||
 | 
					        if isinstance(b, str):
 | 
				
			||||||
 | 
					            raise TypeError("can't write str to binary stream")
 | 
				
			||||||
        n = len(b)
 | 
					        n = len(b)
 | 
				
			||||||
        newpos = self._pos + n
 | 
					        newpos = self._pos + n
 | 
				
			||||||
        if newpos > len(self._buffer):
 | 
					        if newpos > len(self._buffer):
 | 
				
			||||||
            # Inserts null bytes between the current end of the file
 | 
					            # Inserts null bytes between the current end of the file
 | 
				
			||||||
            # and the new write position.
 | 
					            # and the new write position.
 | 
				
			||||||
            padding = '\x00' * (newpos - len(self._buffer) - n)
 | 
					            padding = b'\x00' * (newpos - len(self._buffer) - n)
 | 
				
			||||||
            self._buffer[self._pos:newpos - n] = padding
 | 
					            self._buffer[self._pos:newpos - n] = padding
 | 
				
			||||||
        self._buffer[self._pos:newpos] = b
 | 
					        self._buffer[self._pos:newpos] = b
 | 
				
			||||||
        self._pos = newpos
 | 
					        self._pos = newpos
 | 
				
			||||||
| 
						 | 
					@ -801,11 +803,8 @@ def __init__(self, raw,
 | 
				
			||||||
    def write(self, b):
 | 
					    def write(self, b):
 | 
				
			||||||
        if self.closed:
 | 
					        if self.closed:
 | 
				
			||||||
            raise ValueError("write to closed file")
 | 
					            raise ValueError("write to closed file")
 | 
				
			||||||
        if not isinstance(b, bytes):
 | 
					        if isinstance(b, str):
 | 
				
			||||||
            if hasattr(b, "__index__"):
 | 
					            raise TypeError("can't write str to binary stream")
 | 
				
			||||||
                raise TypeError("Can't write object of type %s" %
 | 
					 | 
				
			||||||
                                type(b).__name__)
 | 
					 | 
				
			||||||
            b = bytes(b)
 | 
					 | 
				
			||||||
        # XXX we can implement some more tricks to try and avoid partial writes
 | 
					        # XXX we can implement some more tricks to try and avoid partial writes
 | 
				
			||||||
        if len(self._write_buf) > self.buffer_size:
 | 
					        if len(self._write_buf) > self.buffer_size:
 | 
				
			||||||
            # We're full, so let's pre-flush the buffer
 | 
					            # We're full, so let's pre-flush the buffer
 | 
				
			||||||
| 
						 | 
					@ -1099,8 +1098,6 @@ def write(self, s: str):
 | 
				
			||||||
            s = s.replace("\n", self._writenl)
 | 
					            s = s.replace("\n", self._writenl)
 | 
				
			||||||
        # XXX What if we were just reading?
 | 
					        # XXX What if we were just reading?
 | 
				
			||||||
        b = s.encode(self._encoding)
 | 
					        b = s.encode(self._encoding)
 | 
				
			||||||
        if isinstance(b, str):
 | 
					 | 
				
			||||||
            b = bytes(b)
 | 
					 | 
				
			||||||
        self.buffer.write(b)
 | 
					        self.buffer.write(b)
 | 
				
			||||||
        if haslf and self.isatty():
 | 
					        if haslf and self.isatty():
 | 
				
			||||||
            self.flush()
 | 
					            self.flush()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -82,7 +82,13 @@ _getbuffer(PyObject *obj, PyBuffer *view)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (buffer == NULL ||
 | 
					    if (buffer == NULL ||
 | 
				
			||||||
        PyUnicode_Check(obj) ||
 | 
					        PyUnicode_Check(obj) ||
 | 
				
			||||||
        buffer->bf_getbuffer == NULL) return -1;
 | 
					        buffer->bf_getbuffer == NULL)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        PyErr_Format(PyExc_TypeError,
 | 
				
			||||||
 | 
					                     "Type %.100s doesn't support the buffer API",
 | 
				
			||||||
 | 
					                     Py_Type(obj)->tp_name);
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
 | 
					    if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
 | 
				
			||||||
            return -1;
 | 
					            return -1;
 | 
				
			||||||
| 
						 | 
					@ -167,7 +173,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
 | 
				
			||||||
    else if (size < alloc) {
 | 
					    else if (size < alloc) {
 | 
				
			||||||
        /* Within allocated size; quick exit */
 | 
					        /* Within allocated size; quick exit */
 | 
				
			||||||
        Py_Size(self) = size;
 | 
					        Py_Size(self) = size;
 | 
				
			||||||
	((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
 | 
					        ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
 | 
				
			||||||
        return 0;
 | 
					        return 0;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else if (size <= alloc * 1.125) {
 | 
					    else if (size <= alloc * 1.125) {
 | 
				
			||||||
| 
						 | 
					@ -181,7 +187,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (((PyBytesObject *)self)->ob_exports > 0) {
 | 
					    if (((PyBytesObject *)self)->ob_exports > 0) {
 | 
				
			||||||
            /*
 | 
					            /*
 | 
				
			||||||
            fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes);
 | 
					            fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
 | 
				
			||||||
 | 
					                    ((PyBytesObject *)self)->ob_bytes);
 | 
				
			||||||
            */
 | 
					            */
 | 
				
			||||||
            PyErr_SetString(PyExc_BufferError,
 | 
					            PyErr_SetString(PyExc_BufferError,
 | 
				
			||||||
                    "Existing exports of data: object cannot be re-sized");
 | 
					                    "Existing exports of data: object cannot be re-sized");
 | 
				
			||||||
| 
						 | 
					@ -262,8 +269,8 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
 | 
				
			||||||
    PyBuffer vo;
 | 
					    PyBuffer vo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (_getbuffer(other, &vo) < 0) {
 | 
					    if (_getbuffer(other, &vo) < 0) {
 | 
				
			||||||
            PyErr_Format(PyExc_TypeError,
 | 
					        PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
 | 
				
			||||||
                         "can't concat bytes to %.100s", Py_Type(self)->tp_name);
 | 
					                     Py_Type(self)->tp_name);
 | 
				
			||||||
        return NULL;
 | 
					        return NULL;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -757,8 +764,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
 | 
				
			||||||
    if (PyUnicode_Check(arg)) {
 | 
					    if (PyUnicode_Check(arg)) {
 | 
				
			||||||
        /* Encode via the codec registry */
 | 
					        /* Encode via the codec registry */
 | 
				
			||||||
        PyObject *encoded, *new;
 | 
					        PyObject *encoded, *new;
 | 
				
			||||||
        if (encoding == NULL)
 | 
					        if (encoding == NULL) {
 | 
				
			||||||
            encoding = PyUnicode_GetDefaultEncoding();
 | 
					            PyErr_SetString(PyExc_TypeError,
 | 
				
			||||||
 | 
					                            "string argument without an encoding");
 | 
				
			||||||
 | 
					            return -1;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        encoded = PyCodec_Encode(arg, encoding, errors);
 | 
					        encoded = PyCodec_Encode(arg, encoding, errors);
 | 
				
			||||||
        if (encoded == NULL)
 | 
					        if (encoded == NULL)
 | 
				
			||||||
            return -1;
 | 
					            return -1;
 | 
				
			||||||
| 
						 | 
					@ -954,12 +964,14 @@ bytes_richcompare(PyObject *self, PyObject *other, int op)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    self_size = _getbuffer(self, &self_bytes);
 | 
					    self_size = _getbuffer(self, &self_bytes);
 | 
				
			||||||
    if (self_size < 0) {
 | 
					    if (self_size < 0) {
 | 
				
			||||||
 | 
					        PyErr_Clear();
 | 
				
			||||||
        Py_INCREF(Py_NotImplemented);
 | 
					        Py_INCREF(Py_NotImplemented);
 | 
				
			||||||
        return Py_NotImplemented;
 | 
					        return Py_NotImplemented;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    other_size = _getbuffer(other, &other_bytes);
 | 
					    other_size = _getbuffer(other, &other_bytes);
 | 
				
			||||||
    if (other_size < 0) {
 | 
					    if (other_size < 0) {
 | 
				
			||||||
 | 
					        PyErr_Clear();
 | 
				
			||||||
        PyObject_ReleaseBuffer(self, &self_bytes);
 | 
					        PyObject_ReleaseBuffer(self, &self_bytes);
 | 
				
			||||||
        Py_INCREF(Py_NotImplemented);
 | 
					        Py_INCREF(Py_NotImplemented);
 | 
				
			||||||
        return Py_NotImplemented;
 | 
					        return Py_NotImplemented;
 | 
				
			||||||
| 
						 | 
					@ -1061,10 +1073,11 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
 | 
				
			||||||
        sub_len = PyBytes_GET_SIZE(subobj);
 | 
					        sub_len = PyBytes_GET_SIZE(subobj);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    /* XXX --> use the modern buffer interface */
 | 
					    /* XXX --> use the modern buffer interface */
 | 
				
			||||||
    else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
 | 
					    else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) {
 | 
				
			||||||
        /* XXX - the "expected a character buffer object" is pretty
 | 
					        /* XXX - the "expected a character buffer object" is pretty
 | 
				
			||||||
           confusing for a non-expert.  remap to something else ? */
 | 
					           confusing for a non-expert.  remap to something else ? */
 | 
				
			||||||
        return -2;
 | 
					        return -2;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (dir > 0)
 | 
					    if (dir > 0)
 | 
				
			||||||
        return stringlib_find_slice(
 | 
					        return stringlib_find_slice(
 | 
				
			||||||
| 
						 | 
					@ -2021,48 +2034,23 @@ bytes_replace(PyBytesObject *self, PyObject *args)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    Py_ssize_t count = -1;
 | 
					    Py_ssize_t count = -1;
 | 
				
			||||||
    PyObject *from, *to, *res;
 | 
					    PyObject *from, *to, *res;
 | 
				
			||||||
    const char *from_s, *to_s;
 | 
					 | 
				
			||||||
    Py_ssize_t from_len, to_len;
 | 
					 | 
				
			||||||
    int relfrom=0, relto=0;
 | 
					 | 
				
			||||||
    PyBuffer vfrom, vto;
 | 
					    PyBuffer vfrom, vto;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
 | 
					    if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
 | 
				
			||||||
        return NULL;
 | 
					        return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (PyBytes_Check(from)) {
 | 
					    if (_getbuffer(from, &vfrom) < 0)
 | 
				
			||||||
        from_s = PyBytes_AS_STRING(from);
 | 
					 | 
				
			||||||
        from_len = PyBytes_GET_SIZE(from);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    else {
 | 
					 | 
				
			||||||
            if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0)
 | 
					 | 
				
			||||||
        return NULL;
 | 
					        return NULL;
 | 
				
			||||||
            from_s = vfrom.buf;
 | 
					    if (_getbuffer(to, &vto) < 0) {
 | 
				
			||||||
            from_len = vfrom.len;
 | 
					 | 
				
			||||||
            relfrom = 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (PyBytes_Check(to)) {
 | 
					 | 
				
			||||||
        to_s = PyBytes_AS_STRING(to);
 | 
					 | 
				
			||||||
        to_len = PyBytes_GET_SIZE(to);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    else {
 | 
					 | 
				
			||||||
            if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) {
 | 
					 | 
				
			||||||
                    if (relfrom)
 | 
					 | 
				
			||||||
        PyObject_ReleaseBuffer(from, &vfrom);
 | 
					        PyObject_ReleaseBuffer(from, &vfrom);
 | 
				
			||||||
        return NULL;
 | 
					        return NULL;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
            to_s = vto.buf;
 | 
					 | 
				
			||||||
            to_len = vto.len;
 | 
					 | 
				
			||||||
            relto = 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    res = (PyObject *)replace((PyBytesObject *) self,
 | 
					    res = (PyObject *)replace((PyBytesObject *) self,
 | 
				
			||||||
                              from_s, from_len,
 | 
					                              vfrom.buf, vfrom.len,
 | 
				
			||||||
                              to_s, to_len, count);
 | 
					                              vto.buf, vto.len, count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (relfrom)
 | 
					 | 
				
			||||||
    PyObject_ReleaseBuffer(from, &vfrom);
 | 
					    PyObject_ReleaseBuffer(from, &vfrom);
 | 
				
			||||||
    if (relto)
 | 
					 | 
				
			||||||
    PyObject_ReleaseBuffer(to, &vto);
 | 
					    PyObject_ReleaseBuffer(to, &vto);
 | 
				
			||||||
    return res;
 | 
					    return res;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -965,31 +965,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if 0
 | 
					 | 
				
			||||||
    /* For b/w compatibility we also accept Unicode objects provided
 | 
					 | 
				
			||||||
       that no encodings is given and then redirect to
 | 
					 | 
				
			||||||
       PyObject_Unicode() which then applies the additional logic for
 | 
					 | 
				
			||||||
       Unicode subclasses.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
       NOTE: This API should really only be used for object which
 | 
					 | 
				
			||||||
             represent *encoded* Unicode !
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    */
 | 
					 | 
				
			||||||
	if (PyUnicode_Check(obj)) {
 | 
					 | 
				
			||||||
	    if (encoding) {
 | 
					 | 
				
			||||||
		PyErr_SetString(PyExc_TypeError,
 | 
					 | 
				
			||||||
				"decoding Unicode is not supported");
 | 
					 | 
				
			||||||
	    return NULL;
 | 
					 | 
				
			||||||
	    }
 | 
					 | 
				
			||||||
	return PyObject_Unicode(obj);
 | 
					 | 
				
			||||||
	    }
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
    if (PyUnicode_Check(obj)) {
 | 
					    if (PyUnicode_Check(obj)) {
 | 
				
			||||||
	PyErr_SetString(PyExc_TypeError,
 | 
						PyErr_SetString(PyExc_TypeError,
 | 
				
			||||||
			"decoding Unicode is not supported");
 | 
								"decoding Unicode is not supported");
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /* Coerce object */
 | 
					    /* Coerce object */
 | 
				
			||||||
    if (PyString_Check(obj)) {
 | 
					    if (PyString_Check(obj)) {
 | 
				
			||||||
| 
						 | 
					@ -6440,26 +6420,7 @@ able to handle UnicodeDecodeErrors.");
 | 
				
			||||||
static PyObject *
 | 
					static PyObject *
 | 
				
			||||||
unicode_decode(PyUnicodeObject *self, PyObject *args)
 | 
					unicode_decode(PyUnicodeObject *self, PyObject *args)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    char *encoding = NULL;
 | 
					    PyErr_Format(PyExc_TypeError, "decoding str is not supported");
 | 
				
			||||||
    char *errors = NULL;
 | 
					 | 
				
			||||||
    PyObject *v;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
 | 
					 | 
				
			||||||
        return NULL;
 | 
					 | 
				
			||||||
    v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
 | 
					 | 
				
			||||||
    if (v == NULL)
 | 
					 | 
				
			||||||
        goto onError;
 | 
					 | 
				
			||||||
    if (!PyString_Check(v) && !PyUnicode_Check(v)) {
 | 
					 | 
				
			||||||
        PyErr_Format(PyExc_TypeError,
 | 
					 | 
				
			||||||
                     "decoder did not return a string/unicode object "
 | 
					 | 
				
			||||||
                     "(type=%.400s)",
 | 
					 | 
				
			||||||
                     Py_Type(v)->tp_name);
 | 
					 | 
				
			||||||
        Py_DECREF(v);
 | 
					 | 
				
			||||||
        return NULL;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    return v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 onError:
 | 
					 | 
				
			||||||
    return NULL;
 | 
					    return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8136,17 +8097,11 @@ unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (flags & PyBUF_CHARACTER) {
 | 
					    if (flags & PyBUF_CHARACTER) {
 | 
				
			||||||
        PyObject *str;
 | 
					        PyErr_SetString(PyExc_SystemError, "can't use str as char buffer");
 | 
				
			||||||
        
 | 
					        return -1;
 | 
				
			||||||
        str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
 | 
					 | 
				
			||||||
        if (str == NULL) return -1;
 | 
					 | 
				
			||||||
        return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
 | 
					 | 
				
			||||||
                                 PyString_GET_SIZE(str), 1, flags);
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else {
 | 
					 | 
				
			||||||
    return PyBuffer_FillInfo(view, (void *)self->str,
 | 
					    return PyBuffer_FillInfo(view, (void *)self->str,
 | 
				
			||||||
                             PyUnicode_GET_DATA_SIZE(self), 1, flags);
 | 
					                             PyUnicode_GET_DATA_SIZE(self), 1, flags);
 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue