GH-134453: Fix subprocess memoryview input handling on POSIX (GH-134949)

Fix inconsistent subprocess.Popen.communicate() behavior between Windows
and POSIX when using memoryview objects with non-byte elements as input.

On POSIX systems, the code was incorrectly comparing bytes written against
element count instead of byte count, causing data truncation for large
inputs with non-byte element types.

Changes:
- Cast memoryview inputs to byte view when input is already a memoryview
- Fix progress tracking to use len(input_view) instead of len(self._input)
- Add comprehensive test coverage for memoryview inputs

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* old-man-yells-at-ReST
* Update 2025-05-30-18-37-44.gh-issue-134453.kxkA-o.rst
* assertIsNone review feedback
* fix memoryview_nonbytes test to fail without our fix on main, and have a nicer error.

Thanks to Peter Bierma @ZeroIntensity for the code review.
This commit is contained in:
Gregory P. Smith 2025-11-28 20:25:06 -08:00 committed by GitHub
parent 526d7a8bb4
commit cc6bc4c97f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 51 additions and 2 deletions

View file

@ -2102,7 +2102,10 @@ def _communicate(self, input, endtime, orig_timeout):
self._save_input(input) self._save_input(input)
if self._input: if self._input:
input_view = memoryview(self._input) if not isinstance(self._input, memoryview):
input_view = memoryview(self._input)
else:
input_view = self._input.cast("b") # byte input required
with _PopenSelector() as selector: with _PopenSelector() as selector:
if self.stdin and not self.stdin.closed and self._input: if self.stdin and not self.stdin.closed and self._input:
@ -2138,7 +2141,7 @@ def _communicate(self, input, endtime, orig_timeout):
selector.unregister(key.fileobj) selector.unregister(key.fileobj)
key.fileobj.close() key.fileobj.close()
else: else:
if self._input_offset >= len(self._input): if self._input_offset >= len(input_view):
selector.unregister(key.fileobj) selector.unregister(key.fileobj)
key.fileobj.close() key.fileobj.close()
elif key.fileobj in (self.stdout, self.stderr): elif key.fileobj in (self.stdout, self.stderr):

View file

@ -957,6 +957,48 @@ def test_communicate(self):
self.assertEqual(stdout, b"banana") self.assertEqual(stdout, b"banana")
self.assertEqual(stderr, b"pineapple") self.assertEqual(stderr, b"pineapple")
def test_communicate_memoryview_input(self):
# Test memoryview input with byte elements
test_data = b"Hello, memoryview!"
mv = memoryview(test_data)
p = subprocess.Popen([sys.executable, "-c",
'import sys; sys.stdout.write(sys.stdin.read())'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
self.addCleanup(p.stdout.close)
self.addCleanup(p.stdin.close)
(stdout, stderr) = p.communicate(mv)
self.assertEqual(stdout, test_data)
self.assertIsNone(stderr)
def test_communicate_memoryview_input_nonbyte(self):
# Test memoryview input with non-byte elements (e.g., int32)
# This tests the fix for gh-134453 where non-byte memoryviews
# had incorrect length tracking on POSIX
import array
# Create an array of 32-bit integers that's large enough to trigger
# the chunked writing behavior (> PIPE_BUF)
pipe_buf = getattr(select, 'PIPE_BUF', 512)
# Each 'i' element is 4 bytes, so we need more than pipe_buf/4 elements
# Add some extra to ensure we exceed the buffer size
num_elements = pipe_buf + 1
test_array = array.array('i', [0x64306f66 for _ in range(num_elements)])
expected_bytes = test_array.tobytes()
mv = memoryview(test_array)
p = subprocess.Popen([sys.executable, "-c",
'import sys; '
'data = sys.stdin.buffer.read(); '
'sys.stdout.buffer.write(data)'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
self.addCleanup(p.stdout.close)
self.addCleanup(p.stdin.close)
(stdout, stderr) = p.communicate(mv)
self.assertEqual(stdout, expected_bytes,
msg=f"{len(stdout)=} =? {len(expected_bytes)=}")
self.assertIsNone(stderr)
def test_communicate_timeout(self): def test_communicate_timeout(self):
p = subprocess.Popen([sys.executable, "-c", p = subprocess.Popen([sys.executable, "-c",
'import sys,os,time;' 'import sys,os,time;'

View file

@ -0,0 +1,4 @@
Fixed :func:`subprocess.Popen.communicate` ``input=`` handling of :class:`memoryview`
instances that were non-byte shaped on POSIX platforms. Those are now properly
cast to a byte shaped view instead of truncating the input. Windows platforms
did not have this bug.