mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
Simplify _communicate_streams() to only accept file objects
Remove support for raw file descriptors in _communicate_streams(), requiring all streams to be file objects. This simplifies both the Windows and POSIX implementations by removing isinstance() checks and fd-wrapping logic. The run_pipeline() function now wraps the stderr pipe's read end with os.fdopen() immediately after creation. This change makes _communicate_streams() more compatible with Popen.communicate() which already uses file objects, enabling potential future refactoring to share the multiplexed I/O logic. Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
2470e14a70
commit
e22d1da9bc
1 changed files with 31 additions and 49 deletions
|
|
@ -333,18 +333,19 @@ def _communicate_streams(stdin=None, input_data=None, read_streams=None,
|
||||||
"""
|
"""
|
||||||
Multiplex I/O: write input_data to stdin, read from read_streams.
|
Multiplex I/O: write input_data to stdin, read from read_streams.
|
||||||
|
|
||||||
Works with both file objects and raw file descriptors.
|
All streams must be file objects (not raw file descriptors).
|
||||||
All I/O is done in binary mode; caller handles text encoding.
|
All I/O is done in binary mode; caller handles text encoding.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
stdin: Writable file object for input, or None
|
stdin: Writable binary file object for input, or None
|
||||||
input_data: Bytes to write to stdin, or None
|
input_data: Bytes to write to stdin, or None
|
||||||
read_streams: List of readable file objects or raw fds to read from
|
read_streams: List of readable binary file objects to read from
|
||||||
timeout: Timeout in seconds, or None for no timeout
|
timeout: Timeout in seconds, or None for no timeout
|
||||||
cmd_for_timeout: Value to use for TimeoutExpired.cmd
|
cmd_for_timeout: Value to use for TimeoutExpired.cmd
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict mapping each item in read_streams to its bytes data
|
Dict mapping each file object in read_streams to its bytes data.
|
||||||
|
All file objects in read_streams will be closed.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TimeoutExpired: If timeout expires (with partial data)
|
TimeoutExpired: If timeout expires (with partial data)
|
||||||
|
|
@ -377,22 +378,15 @@ def _communicate_streams_windows(stdin, input_data, read_streams,
|
||||||
"""Windows implementation using threads."""
|
"""Windows implementation using threads."""
|
||||||
threads = []
|
threads = []
|
||||||
buffers = {}
|
buffers = {}
|
||||||
fds_to_close = []
|
|
||||||
|
|
||||||
# Start reader threads
|
# Start reader threads for each stream
|
||||||
for stream in read_streams:
|
for stream in read_streams:
|
||||||
buf = []
|
buf = []
|
||||||
buffers[stream] = buf
|
buffers[stream] = buf
|
||||||
# Wrap raw fds in file objects
|
t = threading.Thread(target=_reader_thread_func, args=(stream, buf))
|
||||||
if isinstance(stream, int):
|
|
||||||
fobj = os.fdopen(os.dup(stream), 'rb')
|
|
||||||
fds_to_close.append(stream)
|
|
||||||
else:
|
|
||||||
fobj = stream
|
|
||||||
t = threading.Thread(target=_reader_thread_func, args=(fobj, buf))
|
|
||||||
t.daemon = True
|
t.daemon = True
|
||||||
t.start()
|
t.start()
|
||||||
threads.append((stream, t, fobj))
|
threads.append((stream, t))
|
||||||
|
|
||||||
# Write stdin
|
# Write stdin
|
||||||
if stdin and input_data:
|
if stdin and input_data:
|
||||||
|
|
@ -413,7 +407,7 @@ def _communicate_streams_windows(stdin, input_data, read_streams,
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# Join threads with timeout
|
# Join threads with timeout
|
||||||
for stream, t, fobj in threads:
|
for stream, t in threads:
|
||||||
remaining = _remaining_time_helper(endtime)
|
remaining = _remaining_time_helper(endtime)
|
||||||
if remaining is not None and remaining < 0:
|
if remaining is not None and remaining < 0:
|
||||||
remaining = 0
|
remaining = 0
|
||||||
|
|
@ -425,13 +419,6 @@ def _communicate_streams_windows(stdin, input_data, read_streams,
|
||||||
cmd_for_timeout, orig_timeout,
|
cmd_for_timeout, orig_timeout,
|
||||||
output=results.get(read_streams[0]) if read_streams else None)
|
output=results.get(read_streams[0]) if read_streams else None)
|
||||||
|
|
||||||
# Close any raw fds we duped
|
|
||||||
for fd in fds_to_close:
|
|
||||||
try:
|
|
||||||
os.close(fd)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Collect results
|
# Collect results
|
||||||
return {stream: (buf[0] if buf else b'') for stream, buf in buffers.items()}
|
return {stream: (buf[0] if buf else b'') for stream, buf in buffers.items()}
|
||||||
|
|
||||||
|
|
@ -439,14 +426,10 @@ def _communicate_streams_windows(stdin, input_data, read_streams,
|
||||||
def _communicate_streams_posix(stdin, input_data, read_streams,
|
def _communicate_streams_posix(stdin, input_data, read_streams,
|
||||||
endtime, orig_timeout, cmd_for_timeout):
|
endtime, orig_timeout, cmd_for_timeout):
|
||||||
"""POSIX implementation using selectors."""
|
"""POSIX implementation using selectors."""
|
||||||
# Normalize read_streams: build mapping of fd -> (original_key, chunks)
|
# Build mapping of fd -> (file_object, chunks_list)
|
||||||
fd_info = {} # fd -> (original_stream, chunks_list)
|
fd_info = {}
|
||||||
for stream in read_streams:
|
for stream in read_streams:
|
||||||
if isinstance(stream, int):
|
fd_info[stream.fileno()] = (stream, [])
|
||||||
fd = stream
|
|
||||||
else:
|
|
||||||
fd = stream.fileno()
|
|
||||||
fd_info[fd] = (stream, [])
|
|
||||||
|
|
||||||
# Prepare stdin
|
# Prepare stdin
|
||||||
stdin_fd = None
|
stdin_fd = None
|
||||||
|
|
@ -477,8 +460,8 @@ def _communicate_streams_posix(stdin, input_data, read_streams,
|
||||||
remaining = _remaining_time_helper(endtime)
|
remaining = _remaining_time_helper(endtime)
|
||||||
if remaining is not None and remaining < 0:
|
if remaining is not None and remaining < 0:
|
||||||
# Timed out - collect partial results
|
# Timed out - collect partial results
|
||||||
results = {orig: b''.join(chunks)
|
results = {stream: b''.join(chunks)
|
||||||
for fd, (orig, chunks) in fd_info.items()}
|
for fd, (stream, chunks) in fd_info.items()}
|
||||||
raise TimeoutExpired(
|
raise TimeoutExpired(
|
||||||
cmd_for_timeout, orig_timeout,
|
cmd_for_timeout, orig_timeout,
|
||||||
output=results.get(read_streams[0]) if read_streams else None)
|
output=results.get(read_streams[0]) if read_streams else None)
|
||||||
|
|
@ -487,8 +470,8 @@ def _communicate_streams_posix(stdin, input_data, read_streams,
|
||||||
|
|
||||||
# Check timeout after select
|
# Check timeout after select
|
||||||
if endtime is not None and _time() > endtime:
|
if endtime is not None and _time() > endtime:
|
||||||
results = {orig: b''.join(chunks)
|
results = {stream: b''.join(chunks)
|
||||||
for fd, (orig, chunks) in fd_info.items()}
|
for fd, (stream, chunks) in fd_info.items()}
|
||||||
raise TimeoutExpired(
|
raise TimeoutExpired(
|
||||||
cmd_for_timeout, orig_timeout,
|
cmd_for_timeout, orig_timeout,
|
||||||
output=results.get(read_streams[0]) if read_streams else None)
|
output=results.get(read_streams[0]) if read_streams else None)
|
||||||
|
|
@ -520,16 +503,14 @@ def _communicate_streams_posix(stdin, input_data, read_streams,
|
||||||
else:
|
else:
|
||||||
fd_info[key.fd][1].append(data)
|
fd_info[key.fd][1].append(data)
|
||||||
|
|
||||||
# Build results: map original stream keys to joined data
|
# Build results and close all file objects
|
||||||
results = {}
|
results = {}
|
||||||
for fd, (orig_stream, chunks) in fd_info.items():
|
for fd, (stream, chunks) in fd_info.items():
|
||||||
results[orig_stream] = b''.join(chunks)
|
results[stream] = b''.join(chunks)
|
||||||
# Close file objects (but not raw fds - caller manages those)
|
try:
|
||||||
if not isinstance(orig_stream, int):
|
stream.close()
|
||||||
try:
|
except OSError:
|
||||||
orig_stream.close()
|
pass
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
@ -942,13 +923,14 @@ def run_pipeline(*commands, input=None, capture_output=False, timeout=None,
|
||||||
stdout_arg = kwargs.pop('stdout', None)
|
stdout_arg = kwargs.pop('stdout', None)
|
||||||
|
|
||||||
processes = []
|
processes = []
|
||||||
stderr_read_fd = None # Read end of shared stderr pipe (for parent)
|
stderr_reader = None # File object for reading shared stderr (for parent)
|
||||||
stderr_write_fd = None # Write end of shared stderr pipe (for children)
|
stderr_write_fd = None # Write end of shared stderr pipe (for children)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Create a single stderr pipe that all processes will share
|
# Create a single stderr pipe that all processes will share
|
||||||
if capture_stderr:
|
if capture_stderr:
|
||||||
stderr_read_fd, stderr_write_fd = os.pipe()
|
stderr_read_fd, stderr_write_fd = os.pipe()
|
||||||
|
stderr_reader = os.fdopen(stderr_read_fd, 'rb')
|
||||||
|
|
||||||
for i, cmd in enumerate(commands):
|
for i, cmd in enumerate(commands):
|
||||||
is_first = (i == 0)
|
is_first = (i == 0)
|
||||||
|
|
@ -1017,8 +999,8 @@ def run_pipeline(*commands, input=None, capture_output=False, timeout=None,
|
||||||
read_streams = []
|
read_streams = []
|
||||||
if last_proc.stdout is not None:
|
if last_proc.stdout is not None:
|
||||||
read_streams.append(last_proc.stdout)
|
read_streams.append(last_proc.stdout)
|
||||||
if stderr_read_fd is not None:
|
if stderr_reader is not None:
|
||||||
read_streams.append(stderr_read_fd)
|
read_streams.append(stderr_reader)
|
||||||
|
|
||||||
# Use multiplexed I/O to handle stdin/stdout/stderr concurrently
|
# Use multiplexed I/O to handle stdin/stdout/stderr concurrently
|
||||||
# This avoids deadlocks from pipe buffer limits
|
# This avoids deadlocks from pipe buffer limits
|
||||||
|
|
@ -1043,7 +1025,7 @@ def run_pipeline(*commands, input=None, capture_output=False, timeout=None,
|
||||||
|
|
||||||
# Extract results
|
# Extract results
|
||||||
stdout = results.get(last_proc.stdout)
|
stdout = results.get(last_proc.stdout)
|
||||||
stderr = results.get(stderr_read_fd)
|
stderr = results.get(stderr_reader)
|
||||||
|
|
||||||
# Decode stdout if in text mode (Popen text mode only applies to
|
# Decode stdout if in text mode (Popen text mode only applies to
|
||||||
# streams it creates, but we read via _communicate_streams which
|
# streams it creates, but we read via _communicate_streams which
|
||||||
|
|
@ -1087,10 +1069,10 @@ def run_pipeline(*commands, input=None, capture_output=False, timeout=None,
|
||||||
proc.stdin.close()
|
proc.stdin.close()
|
||||||
if proc.stdout and not proc.stdout.closed:
|
if proc.stdout and not proc.stdout.closed:
|
||||||
proc.stdout.close()
|
proc.stdout.close()
|
||||||
# Close stderr pipe file descriptor
|
# Close stderr pipe (reader is a file object, writer is a raw fd)
|
||||||
if stderr_read_fd is not None:
|
if stderr_reader is not None and not stderr_reader.closed:
|
||||||
try:
|
try:
|
||||||
os.close(stderr_read_fd)
|
stderr_reader.close()
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
if stderr_write_fd is not None:
|
if stderr_write_fd is not None:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue