LibWeb: Use unbuffered network requests for all Fetch requests

Previously, unbuffered requests were only available as a special mode for EventSource. With this change, they are enabled by default, which means chunks can be read from the stream as soon as they arrive. This unlocks some interesting possibilities, such as starting to parse HTML documents before the entire response has been received (that, in turn, allows us to initiate subresource fetches earlier or begin executing scripts sooner), or start rendering videos before they are fully downloaded. Co-authored-by: Timothy Flynn <trflynn89@pm.me>
Author: https://github.com/kalenikaliaksandr Commit: 3058274386 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6839 Reviewed-by: https://github.com/trflynn89 ✅
2025-12-08 06:09:58 +00:00 · 2025-04-17 07:58:24 -04:00 · 2025-04-17 07:58:24 -04:00 · 3058274386 · 2025-11-20 11:30:52 +00:00
commit 3058274386
parent f942fef39b
4 changed files with 185 additions and 184 deletions
--- a/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp
+++ b/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2025, Aliaksandr Kalenik <kalenik.aliaksandr@gmail.com>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */
@ -36,35 +37,82 @@ void FetchedDataReceiver::visit_edges(Visitor& visitor)

 void FetchedDataReceiver::set_pending_promise(GC::Ref<WebIDL::Promise> promise)
 {
-    auto had_pending_promise = m_pending_promise != nullptr;
+    VERIFY(!m_pending_promise);
+    VERIFY(!m_has_unfulfilled_promise);
    m_pending_promise = promise;

-    if (!had_pending_promise && !m_buffer.is_empty()) {
-        on_data_received(m_buffer);
-        m_buffer.clear();
+    if (!m_buffer.is_empty()) {
+        pull_bytes_into_stream(move(m_buffer));
+    } else if (m_lifecycle_state == LifecycleState::ReadyToClose) {
+        close_stream();
+    }
+}
+
+// This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
+// https://fetch.spec.whatwg.org/#ref-for-in-parallel⑤
+void FetchedDataReceiver::handle_network_bytes(ReadonlyBytes bytes, NetworkState state)
+{
+    VERIFY(m_lifecycle_state == LifecycleState::Receiving);
+
+    if (state == NetworkState::Complete) {
+        VERIFY(bytes.is_empty());
+        m_lifecycle_state = LifecycleState::CompletePending;
+    }
+
+    if (!m_pending_promise) {
+        if (state == NetworkState::Ongoing)
+            m_buffer.append(bytes);
+        if (m_lifecycle_state == LifecycleState::CompletePending && m_buffer.is_empty() && !m_has_unfulfilled_promise)
+            m_lifecycle_state = LifecycleState::ReadyToClose;
+        return;
+    }
+
+    // 1. If one or more bytes have been transmitted from response’s message body, then:
+    if (!bytes.is_empty()) {
+        // 1. Let bytes be the transmitted bytes.
+
+        // FIXME: 2. Let codings be the result of extracting header list values given `Content-Encoding` and response’s header list.
+        // FIXME: 3. Increase response’s body info’s encoded size by bytes’s length.
+        // FIXME: 4. Set bytes to the result of handling content codings given codings and bytes.
+        // FIXME: 5. Increase response’s body info’s decoded size by bytes’s length.
+        // FIXME: 6. If bytes is failure, then terminate fetchParams’s controller.
+
+        // 7. Append bytes to buffer.
+        pull_bytes_into_stream(MUST(ByteBuffer::copy(bytes)));
+
+        // FIXME: 8. If the size of buffer is larger than an upper limit chosen by the user agent, ask the user agent
+        //           to suspend the ongoing fetch.
+        return;
+    }
+    // 2. Otherwise, if the bytes transmission for response’s message body is done normally and stream is readable,
+    //    then close stream, and abort these in-parallel steps.
+    if (m_stream->is_readable()) {
+        VERIFY(m_lifecycle_state == LifecycleState::CompletePending);
+        close_stream();
    }
 }

 // This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
 // https://fetch.spec.whatwg.org/#ref-for-in-parallel④
-void FetchedDataReceiver::on_data_received(ReadonlyBytes bytes)
+void FetchedDataReceiver::pull_bytes_into_stream(ByteBuffer&& bytes)
 {
    // FIXME: 1. If the size of buffer is smaller than a lower limit chosen by the user agent and the ongoing fetch
    //           is suspended, resume the fetch.
-    // FIXME: 2. Wait until buffer is not empty.

-    // If the remote end sends data immediately after we receive headers, we will often get that data here before the
-    // stream tasks have all been queued internally. Just hold onto that data.
-    if (!m_pending_promise) {
-        m_buffer.append(bytes);
-        return;
-    }
+    // 2. Wait until buffer is not empty.
+    VERIFY(!bytes.is_empty());
+    VERIFY(m_lifecycle_state == LifecycleState::Receiving || m_lifecycle_state == LifecycleState::CompletePending);

    // 3. Queue a fetch task to run the following steps, with fetchParams’s task destination.
+    VERIFY(!m_has_unfulfilled_promise);
+    m_has_unfulfilled_promise = true;
    Infrastructure::queue_fetch_task(
        m_fetch_params->controller(),
        m_fetch_params->task_destination(),
-        GC::create_function(heap(), [this, bytes = MUST(ByteBuffer::copy(bytes))]() mutable {
+        GC::create_function(heap(), [this, bytes = move(bytes), pending_promise = m_pending_promise]() mutable {
+            m_has_unfulfilled_promise = false;
+            VERIFY(m_lifecycle_state == LifecycleState::Receiving || m_lifecycle_state == LifecycleState::CompletePending);
+
            HTML::TemporaryExecutionContext execution_context { m_stream->realm(), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };

            // 1. Pull from bytes buffer into stream.
@ -82,8 +130,22 @@ void FetchedDataReceiver::on_data_received(ReadonlyBytes bytes)
                m_fetch_params->controller()->terminate();

            // 3. Resolve promise with undefined.
-            WebIDL::resolve_promise(m_stream->realm(), *m_pending_promise, JS::js_undefined());
+            WebIDL::resolve_promise(m_stream->realm(), *pending_promise, JS::js_undefined());
+
+            if (m_lifecycle_state == LifecycleState::CompletePending && m_buffer.is_empty())
+                m_lifecycle_state = LifecycleState::ReadyToClose;
        }));
+
+    m_pending_promise = {};
+}
+
+void FetchedDataReceiver::close_stream()
+{
+    VERIFY(m_has_unfulfilled_promise == 0);
+    WebIDL::resolve_promise(m_stream->realm(), *m_pending_promise, JS::js_undefined());
+    m_pending_promise = {};
+    m_lifecycle_state = LifecycleState::Closed;
+    m_stream->close();
 }

 }
--- a/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.h
+++ b/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.h
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2025, Aliaksandr Kalenik <kalenik.aliaksandr@gmail.com>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */
@ -21,17 +22,35 @@ public:
    virtual ~FetchedDataReceiver() override;

    void set_pending_promise(GC::Ref<WebIDL::Promise>);
-    void on_data_received(ReadonlyBytes);
+
+    enum class NetworkState {
+        Ongoing,
+        Complete,
+        Error,
+    };
+    void handle_network_bytes(ReadonlyBytes, NetworkState);

 private:
    FetchedDataReceiver(GC::Ref<Infrastructure::FetchParams const>, GC::Ref<Streams::ReadableStream>);

    virtual void visit_edges(Visitor& visitor) override;

+    void pull_bytes_into_stream(ByteBuffer&&);
+    void close_stream();
+
    GC::Ref<Infrastructure::FetchParams const> m_fetch_params;
    GC::Ref<Streams::ReadableStream> m_stream;
    GC::Ptr<WebIDL::Promise> m_pending_promise;
+
    ByteBuffer m_buffer;
+    enum class LifecycleState {
+        Receiving,
+        CompletePending,
+        ReadyToClose,
+        Closed,
+    };
+    LifecycleState m_lifecycle_state { LifecycleState::Receiving };
+    bool m_has_unfulfilled_promise { false };
 };

 }
--- a/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp
+++ b/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp
@ -2309,6 +2309,8 @@ GC::Ref<PendingResponse> nonstandard_resource_loader_file_or_http_network_fetch(

    (void)include_credentials;
    (void)is_new_connection_fetch;
+    (void)fetch_timing_info;
+    (void)cross_origin_isolated_capability;

    auto request = fetch_params.request();

@ -2344,15 +2346,12 @@ GC::Ref<PendingResponse> nonstandard_resource_loader_file_or_http_network_fetch(
        log_load_request(load_request);
    }

-    // FIXME: This check should be removed and all HTTP requests should go through the `ResourceLoader::load_unbuffered`
-    //        path. The buffer option should then be supplied to the steps below that allow us to buffer data up to a
-    //        user-agent-defined limit (or not). However, we will need to fully use stream operations throughout the
-    //        fetch process to enable this (e.g. Body::fully_read must use streams for this to work).
-    if (request->buffer_policy() == Infrastructure::Request::BufferPolicy::DoNotBufferResponse) {
    HTML::TemporaryExecutionContext execution_context { realm, HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };

    // 10. Let stream be a new ReadableStream.
    auto stream = realm.create<Streams::ReadableStream>(realm);
+
+    // 9. Let buffer be an empty byte sequence.
    auto fetched_data_receiver = realm.create<FetchedDataReceiver>(fetch_params, stream);

    // 11. Let pullAlgorithm be the following steps:
@ -2377,8 +2376,7 @@ GC::Ref<PendingResponse> nonstandard_resource_loader_file_or_http_network_fetch(
    // 13. Set up stream with byte reading support with pullAlgorithm set to pullAlgorithm, cancelAlgorithm set to cancelAlgorithm.
    stream->set_up_with_byte_reading_support(pull_algorithm, cancel_algorithm);

-        auto on_headers_received = GC::create_function(vm.heap(), [&vm, request, pending_response, stream](HTTP::HeaderMap const& response_headers, Optional<u32> status_code, Optional<String> const& reason_phrase) {
-            (void)request;
+    auto on_headers_received = GC::create_function(vm.heap(), [&vm, pending_response, stream, request](HTTP::HeaderMap const& response_headers, Optional<u32> status_code, Optional<String> const& reason_phrase) {
        if (pending_response->is_resolved()) {
            // RequestServer will send us the response headers twice, the second time being for HTTP trailers. This
            // fetch algorithm is not interested in trailers, so just drop them here.
@ -2391,6 +2389,7 @@ GC::Ref<PendingResponse> nonstandard_resource_loader_file_or_http_network_fetch(
        if (reason_phrase.has_value())
            response->set_status_message(MUST(ByteBuffer::copy(reason_phrase.value().bytes())));

+        (void)request;
        if constexpr (WEB_FETCH_DEBUG) {
            dbgln("Fetch: ResourceLoader load for '{}' {}: (status {})",
                request->url(),
@ -2415,37 +2414,18 @@ GC::Ref<PendingResponse> nonstandard_resource_loader_file_or_http_network_fetch(
    // 16. Run these steps in parallel:
    //     FIXME: 1. Run these steps, but abort when fetchParams is canceled:
    auto on_data_received = GC::create_function(vm.heap(), [fetched_data_receiver](ReadonlyBytes bytes) {
-            // 1. If one or more bytes have been transmitted from response’s message body, then:
-            if (!bytes.is_empty()) {
-                // 1. Let bytes be the transmitted bytes.
-
-                // FIXME: 2. Let codings be the result of extracting header list values given `Content-Encoding` and response’s header list.
-                // FIXME: 3. Increase response’s body info’s encoded size by bytes’s length.
-                // FIXME: 4. Set bytes to the result of handling content codings given codings and bytes.
-                // FIXME: 5. Increase response’s body info’s decoded size by bytes’s length.
-                // FIXME: 6. If bytes is failure, then terminate fetchParams’s controller.
-
-                // 7. Append bytes to buffer.
-                fetched_data_receiver->on_data_received(bytes);
-
-                // FIXME: 8. If the size of buffer is larger than an upper limit chosen by the user agent, ask the user agent
-                //           to suspend the ongoing fetch.
-            }
+        fetched_data_receiver->handle_network_bytes(bytes, FetchedDataReceiver::NetworkState::Ongoing);
    });

-        auto on_complete = GC::create_function(vm.heap(), [&vm, &realm, pending_response, stream](bool success, Requests::RequestTimingInfo const&, Optional<StringView> error_message) {
-            dbgln("FIXME: Implement on_complete timing info for unbuffered requests");
+    auto on_complete = GC::create_function(vm.heap(), [&vm, &realm, pending_response, stream, fetched_data_receiver](bool success, Requests::RequestTimingInfo const&, Optional<StringView> error_message) {
+        // FIXME: Implement on_complete timing info for unbuffered requests
        HTML::TemporaryExecutionContext execution_context { realm, HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };

-            // 16.1.1.2. Otherwise, if the bytes transmission for response’s message body is done normally and stream is readable,
-            //           then close stream, and abort these in-parallel steps.
        if (success) {
-                if (stream->is_readable())
-                    stream->close();
-            }
+            fetched_data_receiver->handle_network_bytes({}, FetchedDataReceiver::NetworkState::Complete);
+        } else {
            // 16.1.2.2. Otherwise, if stream is readable, error stream with a TypeError.
-            else {
-                auto error = MUST(String::formatted("Load failed: {}", error_message));
+            auto error = MUST(String::formatted("Load failed: {}", error_message.value_or("Unknown error"sv)));

            if (stream->is_readable())
                stream->error(JS::TypeError::create(realm, error));
@ -2456,67 +2436,6 @@ GC::Ref<PendingResponse> nonstandard_resource_loader_file_or_http_network_fetch(
    });

    ResourceLoader::the().load_unbuffered(load_request, on_headers_received, on_data_received, on_complete);
-    } else {
-        auto on_load_success = GC::create_function(vm.heap(), [&realm, &vm, request, pending_response, fetch_timing_info, cross_origin_isolated_capability](ReadonlyBytes data, Requests::RequestTimingInfo const& timing_info, HTTP::HeaderMap const& response_headers, Optional<u32> status_code, Optional<String> const& reason_phrase) {
-            (void)request;
-            dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' complete", request->url());
-            if constexpr (WEB_FETCH_DEBUG)
-                log_response(status_code, response_headers, data);
-            auto [body, _] = TRY_OR_IGNORE(extract_body(realm, data));
-            auto response = Infrastructure::Response::create(vm);
-            response->set_status(status_code.value_or(200));
-            response->set_body(move(body));
-            auto body_info = response->body_info();
-            body_info.encoded_size = timing_info.encoded_body_size;
-            body_info.decoded_size = data.size();
-            response->set_body_info(body_info);
-            for (auto const& [name, value] : response_headers.headers()) {
-                auto header = Infrastructure::Header::from_latin1_pair(name, value);
-                response->header_list()->append(move(header));
-            }
-
-            if (reason_phrase.has_value())
-                response->set_status_message(MUST(ByteBuffer::copy(reason_phrase.value().bytes())));
-
-            fetch_timing_info->update_final_timings(timing_info, cross_origin_isolated_capability);
-
-            pending_response->resolve(response);
-        });
-
-        auto on_load_error = GC::create_function(vm.heap(), [&realm, &vm, request, pending_response, fetch_timing_info, cross_origin_isolated_capability](ByteString const& error, Requests::RequestTimingInfo const& timing_info, Optional<u32> status_code, Optional<String> const& reason_phrase, ReadonlyBytes data, HTTP::HeaderMap const& response_headers) {
-            (void)request;
-            dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' failed: {} (status {})", request->url(), error, status_code.value_or(0));
-            if constexpr (WEB_FETCH_DEBUG)
-                log_response(status_code, response_headers, data);
-            auto response = Infrastructure::Response::create(vm);
-            // FIXME: This is ugly, ResourceLoader should tell us.
-            if (status_code.value_or(0) == 0) {
-                response = Infrastructure::Response::network_error(vm, TRY_OR_IGNORE(String::from_byte_string(error)));
-            } else {
-                response->set_type(Infrastructure::Response::Type::Error);
-                response->set_status(status_code.value_or(400));
-                auto [body, _] = TRY_OR_IGNORE(extract_body(realm, data));
-                response->set_body(move(body));
-                auto body_info = response->body_info();
-                body_info.encoded_size = timing_info.encoded_body_size;
-                body_info.decoded_size = data.size();
-                response->set_body_info(body_info);
-                for (auto const& [name, value] : response_headers.headers()) {
-                    auto header = Infrastructure::Header::from_latin1_pair(name, value);
-                    response->header_list()->append(move(header));
-                }
-
-                if (reason_phrase.has_value())
-                    response->set_status_message(MUST(ByteBuffer::copy(reason_phrase.value().bytes())));
-            }
-
-            fetch_timing_info->update_final_timings(timing_info, cross_origin_isolated_capability);
-
-            pending_response->resolve(response);
-        });
-
-        ResourceLoader::the().load(load_request, on_load_success, on_load_error);
-    }

    return pending_response;
 }
--- a/Libraries/LibWeb/Loader/ResourceLoader.cpp
+++ b/Libraries/LibWeb/Loader/ResourceLoader.cpp
@ -544,8 +544,9 @@ void ResourceLoader::load_unbuffered(LoadRequest& request, GC::Root<OnHeadersRec
    }

    if (!url.scheme().is_one_of("http"sv, "https"sv)) {
-        // FIXME: Non-network requests from fetch should not go through this path.
-        on_complete->function()(false, {}, "Cannot establish connection non-network scheme"sv);
+        auto not_implemented_error = ByteString::formatted("Protocol not implemented: {}", url.scheme());
+        log_failure(request, not_implemented_error);
+        on_complete->function()(false, {}, not_implemented_error);
        return;
    }