ladybird/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp
Andreas Kling 7b2bb4d49c LibWeb: Fix race in MIME sniff bytes when response completes early
When a streaming HTTP response completes before set_body() is called
on the FetchedDataReceiver, the sniff bytes would never be marked as
complete, causing navigation to hang intermittently.

The sequence that triggers this:

1. handle_network_bytes(data, Ongoing) runs, but m_body is null,
   so bytes only go into the FetchedDataReceiver's own m_buffer.
2. handle_network_bytes({}, Complete) runs, but m_body is still
   null, so the m_body->set_sniff_bytes_complete() call is skipped.
3. set_body(body) is called, which flushes m_buffer into the body
   via append_sniff_bytes(), but never marks them as complete.
4. populate_session_history_entry_document() calls
   sniff_bytes_if_available() on the body. Since the source is
   Empty (streaming) and m_sniff_bytes_complete is false, it
   returns no value.
5. The async path registers a callback via wait_for_sniff_bytes(),
   but since the stream already completed, no more data arrives,
   and the callback never fires. Navigation hangs.

Fix this by checking the lifecycle state in set_body(). If we have
already moved past the Receiving state, the Complete was already
processed and we need to mark sniff bytes as complete now.
2026-03-08 11:39:41 +01:00

201 lines
7.7 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2024-2026, Tim Flynn <trflynn89@ladybird.org>
* Copyright (c) 2025, Aliaksandr Kalenik <kalenik.aliaksandr@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibGC/Function.h>
#include <LibHTTP/Cache/MemoryCache.h>
#include <LibWeb/Bindings/ExceptionOrUtils.h>
#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
#include <LibWeb/Fetch/Infrastructure/FetchParams.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Bodies.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Responses.h>
#include <LibWeb/Fetch/Infrastructure/Task.h>
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
#include <LibWeb/Streams/ReadableStream.h>
#include <LibWeb/WebIDL/Promise.h>
namespace Web::Fetch::Fetching {
GC_DEFINE_ALLOCATOR(FetchedDataReceiver);
FetchedDataReceiver::FetchedDataReceiver(GC::Ref<Infrastructure::FetchParams const> fetch_params, GC::Ref<Streams::ReadableStream> stream, RefPtr<HTTP::MemoryCache> http_cache)
: m_fetch_params(fetch_params)
, m_stream(stream)
, m_http_cache(move(http_cache))
{
}
FetchedDataReceiver::~FetchedDataReceiver() = default;
void FetchedDataReceiver::set_body(GC::Ref<Fetch::Infrastructure::Body> body)
{
m_body = body;
// Flush any bytes that were buffered before the body was set
if (!m_buffer.is_empty())
m_body->append_sniff_bytes(m_buffer);
// If the stream already completed before the body was set,
// we missed the set_sniff_bytes_complete() call in handle_network_bytes.
if (m_lifecycle_state != LifecycleState::Receiving)
m_body->set_sniff_bytes_complete();
}
void FetchedDataReceiver::visit_edges(Visitor& visitor)
{
Base::visit_edges(visitor);
visitor.visit(m_fetch_params);
visitor.visit(m_response);
visitor.visit(m_body);
visitor.visit(m_stream);
visitor.visit(m_pending_promise);
}
void FetchedDataReceiver::set_pending_promise(GC::Ref<WebIDL::Promise> promise)
{
VERIFY(!m_pending_promise);
VERIFY(!m_has_unfulfilled_promise);
m_pending_promise = promise;
if (!buffer_is_eof()) {
pull_bytes_into_stream();
} else if (m_lifecycle_state == LifecycleState::ReadyToClose) {
close_stream();
}
}
// This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
// https://fetch.spec.whatwg.org/#ref-for-in-parallel⑤
void FetchedDataReceiver::handle_network_bytes(ReadonlyBytes bytes, NetworkState state)
{
VERIFY(m_lifecycle_state == LifecycleState::Receiving);
if (state == NetworkState::Complete) {
VERIFY(bytes.is_empty());
m_lifecycle_state = LifecycleState::CompletePending;
// Mark sniff bytes as complete when the stream ends
if (m_body)
m_body->set_sniff_bytes_complete();
}
if (state == NetworkState::Ongoing) {
m_buffer.append(bytes);
// Capture bytes for MIME sniffing
if (m_body)
m_body->append_sniff_bytes(bytes);
}
if (!m_pending_promise) {
if (m_lifecycle_state == LifecycleState::CompletePending && buffer_is_eof() && !m_has_unfulfilled_promise)
m_lifecycle_state = LifecycleState::ReadyToClose;
return;
}
// 1. If one or more bytes have been transmitted from responses message body, then:
if (!bytes.is_empty()) {
// 1. Let bytes be the transmitted bytes.
// FIXME: 2. Let codings be the result of extracting header list values given `Content-Encoding` and responses header list.
// FIXME: 3. Increase responses body infos encoded size by bytess length.
// FIXME: 4. Set bytes to the result of handling content codings given codings and bytes.
// FIXME: 5. Increase responses body infos decoded size by bytess length.
// FIXME: 6. If bytes is failure, then terminate fetchParamss controller.
// 7. Append bytes to buffer.
pull_bytes_into_stream();
// FIXME: 8. If the size of buffer is larger than an upper limit chosen by the user agent, ask the user agent
// to suspend the ongoing fetch.
return;
}
// 2. Otherwise, if the bytes transmission for responses message body is done normally and stream is readable,
// then close stream, and abort these in-parallel steps.
if (m_stream->is_readable()) {
VERIFY(m_lifecycle_state == LifecycleState::CompletePending);
close_stream();
}
}
// This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
// https://fetch.spec.whatwg.org/#ref-for-in-parallel④
void FetchedDataReceiver::pull_bytes_into_stream()
{
VERIFY(m_lifecycle_state == LifecycleState::Receiving || m_lifecycle_state == LifecycleState::CompletePending);
// FIXME: 1. If the size of buffer is smaller than a lower limit chosen by the user agent and the ongoing fetch
// is suspended, resume the fetch.
// 2. Wait until buffer is not empty.
// NB: It would be nice to avoid a copy here, but ReadableStream::pull_from_bytes currently requires an allocated
// ByteBuffer to create a JS::ArrayBuffer.
auto bytes = copy_unpulled_bytes();
VERIFY(!bytes.is_empty());
// 3. Queue a fetch task to run the following steps, with fetchParamss task destination.
VERIFY(!m_has_unfulfilled_promise);
m_has_unfulfilled_promise = true;
Infrastructure::queue_fetch_task(
m_fetch_params->controller(),
m_fetch_params->task_destination(),
GC::create_function(heap(), [this, bytes = move(bytes), pending_promise = m_pending_promise]() mutable {
m_has_unfulfilled_promise = false;
VERIFY(m_lifecycle_state == LifecycleState::Receiving || m_lifecycle_state == LifecycleState::CompletePending);
HTML::TemporaryExecutionContext execution_context { m_stream->realm(), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
// 1. Pull from bytes buffer into stream.
if (auto result = m_stream->pull_from_bytes(move(bytes)); result.is_error()) {
auto throw_completion = Bindings::exception_to_throw_completion(m_stream->vm(), result.release_error());
dbgln("FetchedDataReceiver: Stream error pulling bytes");
HTML::report_exception(throw_completion, m_stream->realm());
return;
}
// 2. If stream is errored, then terminate fetchParamss controller.
if (m_stream->is_errored())
m_fetch_params->controller()->terminate();
// 3. Resolve promise with undefined.
WebIDL::resolve_promise(m_stream->realm(), *pending_promise, JS::js_undefined());
if (m_lifecycle_state == LifecycleState::CompletePending && buffer_is_eof())
m_lifecycle_state = LifecycleState::ReadyToClose;
}));
m_pending_promise = {};
}
void FetchedDataReceiver::close_stream()
{
VERIFY(m_has_unfulfilled_promise == 0);
VERIFY(buffer_is_eof());
WebIDL::resolve_promise(m_stream->realm(), *m_pending_promise, JS::js_undefined());
m_pending_promise = {};
m_lifecycle_state = LifecycleState::Closed;
m_stream->close();
if (m_http_cache) {
auto request = m_fetch_params->request();
if (m_response && request->cache_mode() != HTTP::CacheMode::NoStore)
m_http_cache->finalize_entry(request->current_url(), request->method(), request->header_list(), m_response->status(), m_response->header_list(), move(m_buffer));
m_http_cache.clear();
}
}
ByteBuffer FetchedDataReceiver::copy_unpulled_bytes()
{
auto bytes = MUST(m_buffer.slice(m_pulled_bytes, m_buffer.size() - m_pulled_bytes));
m_pulled_bytes += bytes.size();
return bytes;
}
}