From 78799b971bf73c8cc70e208c63a7400081bc3ec4 Mon Sep 17 00:00:00 2001 From: Maanik Garg Date: Thu, 7 May 2026 15:39:38 +0200 Subject: [PATCH] fix: default encoding_error_handler to replace in stdio_client for UTF-8 resilience The stdio_client transport previously defaulted to encoding_error_handler= strict, causing the transport to crash when the child process emits invalid UTF-8 bytes. This is asymmetric with the server-side fix in PR #2302, which already uses errors=replace for stdio_server. Changes: - Default StdioServerParameters.encoding_error_handler to replace - Invalid bytes are now substituted with U+FFFD and the resulting line fails JSON validation, surfacing as an in-stream Exception - The transport stays alive for subsequent valid messages - Changed logger.exception to logger.warning for parse failures (avoids noisy tracebacks for expected validation errors) - Removed pragma: no cover from the now-reachable exception handling path Add regression test that spawns a child emitting invalid UTF-8 followed by a valid JSON-RPC message, asserting both are delivered correctly. Fixes #2454 --- src/mcp/client/stdio.py | 11 ++++++--- tests/client/test_stdio.py | 47 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/mcp/client/stdio.py b/src/mcp/client/stdio.py index 902dc8576..cfdbfab4b 100644 --- a/src/mcp/client/stdio.py +++ b/src/mcp/client/stdio.py @@ -92,10 +92,15 @@ class StdioServerParameters(BaseModel): Defaults to utf-8. """ - encoding_error_handler: Literal["strict", "ignore", "replace"] = "strict" + encoding_error_handler: Literal["strict", "ignore", "replace"] = "replace" """ The text encoding error handler. + Defaults to "replace" so that malformed bytes from the child process are + substituted with U+FFFD rather than crashing the transport. The invalid + line will then fail JSON validation and be surfaced as an in-stream + exception, keeping the transport alive for subsequent valid messages. + See https://docs.python.org/3/library/codecs.html#codec-base-classes for explanations of possible values. """ @@ -151,8 +156,8 @@ async def stdout_reader(): for line in lines: try: message = types.jsonrpc_message_adapter.validate_json(line, by_name=False) - except Exception as exc: # pragma: no cover - logger.exception("Failed to parse JSONRPC message from server") + except Exception as exc: + logger.warning("Failed to parse JSONRPC message from server: %s", exc) await read_stream_writer.send(exc) continue diff --git a/tests/client/test_stdio.py b/tests/client/test_stdio.py index 06e2cba4b..691fad7fb 100644 --- a/tests/client/test_stdio.py +++ b/tests/client/test_stdio.py @@ -103,6 +103,53 @@ async def test_stdio_client_nonexistent_command(): assert exc_info.value.errno == errno.ENOENT +@pytest.mark.anyio +async def test_stdio_client_invalid_utf8_resilience(): + """Malformed UTF-8 from child stdout must not crash the transport. + + With encoding_error_handler="replace" (now the default), invalid bytes + are replaced with U+FFFD. The resulting line fails JSON validation and + is surfaced as an in-stream Exception. Subsequent valid messages must + still be delivered. + """ + # Child script writes one malformed line, then one valid JSON-RPC line + child_script = textwrap.dedent( + """ + import sys + # Write invalid UTF-8 bytes followed by newline + sys.stdout.buffer.write(b"\\xff\\xfe\\n") + sys.stdout.buffer.flush() + # Write a valid JSON-RPC message + sys.stdout.buffer.write(b'{"jsonrpc":"2.0","id":1,"method":"ping"}\\n') + sys.stdout.buffer.flush() + # Exit cleanly + """ + ) + + server_params = StdioServerParameters( + command=sys.executable, + args=["-c", child_script], + ) + + async with stdio_client(server_params) as (read_stream, write_stream): + received: list[SessionMessage | Exception] = [] + async with read_stream: + async for item in read_stream: + received.append(item) + if len(received) == 2: + break + + # First item: the malformed line should surface as a parse exception + assert isinstance(received[0], Exception), ( + f"Expected Exception for malformed UTF-8 line, got {type(received[0])}" + ) + # Second item: the valid JSON-RPC message should come through + assert isinstance(received[1], SessionMessage), ( + f"Expected SessionMessage for valid line, got {type(received[1])}" + ) + assert received[1].message == JSONRPCRequest(jsonrpc="2.0", id=1, method="ping") + + @pytest.mark.anyio async def test_stdio_client_universal_cleanup(): """Test that stdio_client completes cleanup within reasonable time