2022-07-12 18:04:24 +01:00
|
|
|
|
/*
|
2023-03-02 23:26:35 +00:00
|
|
|
|
* Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
|
2024-11-04 14:11:07 +01:00
|
|
|
|
* Copyright (c) 2024, Jelle Raaijmakers <jelle@ladybird.org>
|
2022-07-12 18:04:24 +01:00
|
|
|
|
*
|
|
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2022-10-02 10:59:22 +01:00
|
|
|
|
#include <LibJS/Runtime/PromiseCapability.h>
|
2024-11-04 14:11:07 +01:00
|
|
|
|
#include <LibWeb/Bindings/ExceptionOrUtils.h>
|
2022-09-25 19:25:53 +01:00
|
|
|
|
#include <LibWeb/Bindings/MainThreadVM.h>
|
2022-10-30 14:39:32 +00:00
|
|
|
|
#include <LibWeb/Fetch/BodyInit.h>
|
2022-07-12 18:04:24 +01:00
|
|
|
|
#include <LibWeb/Fetch/Infrastructure/HTTP/Bodies.h>
|
2024-05-17 22:06:27 +02:00
|
|
|
|
#include <LibWeb/Fetch/Infrastructure/IncrementalReadLoopReadRequest.h>
|
2023-02-28 18:12:44 +00:00
|
|
|
|
#include <LibWeb/Fetch/Infrastructure/Task.h>
|
2024-01-27 09:16:09 -05:00
|
|
|
|
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
|
2025-04-17 15:47:53 -04:00
|
|
|
|
#include <LibWeb/Streams/ReadableStream.h>
|
2022-07-12 18:04:24 +01:00
|
|
|
|
|
2022-07-17 23:52:02 +01:00
|
|
|
|
namespace Web::Fetch::Infrastructure {
|
2022-07-12 18:04:24 +01:00
|
|
|
|
|
2024-11-15 04:01:23 +13:00
|
|
|
|
GC_DEFINE_ALLOCATOR(Body);
|
2023-11-19 19:47:52 +01:00
|
|
|
|
|
LibWeb: Support MIME type sniffing for streaming HTTP responses
Previously, when loading a document, we would try to sniff the MIME
type by reading from the response body's source. However, for streaming
HTTP responses, the body source is Empty (the data comes through the
stream instead), so we had no bytes to sniff.
This caused pages like hypr.land (which sends no Content-Type header)
to be misidentified as plain text instead of HTML, since the MIME
sniffing algorithm would receive zero bytes and fall back to the
default type.
The fix captures the first bytes of the response body during fetch,
storing them on the Body object. These bytes are the "resource header"
defined by the MIME Sniffing spec - up to 1445 bytes, which is enough
to identify any MIME type the spec can detect.
Since bytes may arrive asynchronously during streaming, we use a
callback mechanism: if bytes aren't ready yet when load_document()
needs them, it registers a callback that fires once enough bytes have
been captured (or the stream ends).
The flow is:
1. FetchedDataReceiver receives network bytes, buffers them
2. When Body is created, buffered bytes are flushed to Body's sniff
buffer, and subsequent bytes are appended as they arrive
3. Before calling load_document(), Navigable waits for sniff bytes
4. load_document() passes the bytes to MimeSniff::Resource::sniff()
2026-01-24 12:25:01 +01:00
|
|
|
|
// https://mimesniff.spec.whatwg.org/#reading-the-resource-header
|
|
|
|
|
|
// To read the resource header, a user agent MUST read bytes of the resource until one of the following conditions is met:
|
|
|
|
|
|
// - the end of the resource is reached
|
|
|
|
|
|
// - 1445 or more bytes have been read
|
|
|
|
|
|
static constexpr size_t MAX_SNIFF_BYTES = 1445;
|
|
|
|
|
|
|
2026-02-12 16:11:55 +00:00
|
|
|
|
static Body::SourceTypeInternal to_source_type_internal(Body::SourceType&& source_type)
|
|
|
|
|
|
{
|
|
|
|
|
|
return source_type.visit(
|
|
|
|
|
|
[](Empty) -> Body::SourceTypeInternal { return Empty {}; },
|
|
|
|
|
|
[](ByteBuffer& buffer) -> Body::SourceTypeInternal { return move(buffer); },
|
|
|
|
|
|
[](GC::Root<FileAPI::Blob> const& blob) -> Body::SourceTypeInternal { return GC::Ref { *blob }; });
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-11-15 04:01:23 +13:00
|
|
|
|
GC::Ref<Body> Body::create(JS::VM& vm, GC::Ref<Streams::ReadableStream> stream)
|
2023-08-18 19:38:13 +02:00
|
|
|
|
{
|
2024-11-14 06:13:46 +13:00
|
|
|
|
return vm.heap().allocate<Body>(stream);
|
2023-08-18 19:38:13 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-11-15 04:01:23 +13:00
|
|
|
|
GC::Ref<Body> Body::create(JS::VM& vm, GC::Ref<Streams::ReadableStream> stream, SourceType source, Optional<u64> length)
|
2026-02-12 16:11:55 +00:00
|
|
|
|
{
|
|
|
|
|
|
return create(vm, stream, to_source_type_internal(move(source)), length);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
GC::Ref<Body> Body::create(JS::VM& vm, GC::Ref<Streams::ReadableStream> stream, SourceTypeInternal source, Optional<u64> length)
|
2023-08-18 19:38:13 +02:00
|
|
|
|
{
|
2024-11-14 06:13:46 +13:00
|
|
|
|
return vm.heap().allocate<Body>(stream, source, length);
|
2023-08-18 19:38:13 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-11-15 04:01:23 +13:00
|
|
|
|
Body::Body(GC::Ref<Streams::ReadableStream> stream)
|
2025-04-17 15:47:53 -04:00
|
|
|
|
: m_stream(stream)
|
2022-07-12 18:04:24 +01:00
|
|
|
|
{
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-12 16:11:55 +00:00
|
|
|
|
Body::Body(GC::Ref<Streams::ReadableStream> stream, SourceTypeInternal source, Optional<u64> length)
|
2025-04-17 15:47:53 -04:00
|
|
|
|
: m_stream(stream)
|
2022-07-12 18:04:24 +01:00
|
|
|
|
, m_source(move(source))
|
|
|
|
|
|
, m_length(move(length))
|
|
|
|
|
|
{
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2023-08-18 19:38:13 +02:00
|
|
|
|
void Body::visit_edges(Cell::Visitor& visitor)
|
|
|
|
|
|
{
|
|
|
|
|
|
Base::visit_edges(visitor);
|
|
|
|
|
|
visitor.visit(m_stream);
|
LibWeb: Support MIME type sniffing for streaming HTTP responses
Previously, when loading a document, we would try to sniff the MIME
type by reading from the response body's source. However, for streaming
HTTP responses, the body source is Empty (the data comes through the
stream instead), so we had no bytes to sniff.
This caused pages like hypr.land (which sends no Content-Type header)
to be misidentified as plain text instead of HTML, since the MIME
sniffing algorithm would receive zero bytes and fall back to the
default type.
The fix captures the first bytes of the response body during fetch,
storing them on the Body object. These bytes are the "resource header"
defined by the MIME Sniffing spec - up to 1445 bytes, which is enough
to identify any MIME type the spec can detect.
Since bytes may arrive asynchronously during streaming, we use a
callback mechanism: if bytes aren't ready yet when load_document()
needs them, it registers a callback that fires once enough bytes have
been captured (or the stream ends).
The flow is:
1. FetchedDataReceiver receives network bytes, buffers them
2. When Body is created, buffered bytes are flushed to Body's sniff
buffer, and subsequent bytes are appended as they arrive
3. Before calling load_document(), Navigable waits for sniff bytes
4. load_document() passes the bytes to MimeSniff::Resource::sniff()
2026-01-24 12:25:01 +01:00
|
|
|
|
visitor.visit(m_sniff_bytes_callback);
|
2026-02-12 16:11:55 +00:00
|
|
|
|
m_source.visit(
|
|
|
|
|
|
[&](GC::Ref<FileAPI::Blob> const& blob) { visitor.visit(blob); },
|
|
|
|
|
|
[](auto const&) {});
|
LibWeb: Support MIME type sniffing for streaming HTTP responses
Previously, when loading a document, we would try to sniff the MIME
type by reading from the response body's source. However, for streaming
HTTP responses, the body source is Empty (the data comes through the
stream instead), so we had no bytes to sniff.
This caused pages like hypr.land (which sends no Content-Type header)
to be misidentified as plain text instead of HTML, since the MIME
sniffing algorithm would receive zero bytes and fall back to the
default type.
The fix captures the first bytes of the response body during fetch,
storing them on the Body object. These bytes are the "resource header"
defined by the MIME Sniffing spec - up to 1445 bytes, which is enough
to identify any MIME type the spec can detect.
Since bytes may arrive asynchronously during streaming, we use a
callback mechanism: if bytes aren't ready yet when load_document()
needs them, it registers a callback that fires once enough bytes have
been captured (or the stream ends).
The flow is:
1. FetchedDataReceiver receives network bytes, buffers them
2. When Body is created, buffered bytes are flushed to Body's sniff
buffer, and subsequent bytes are appended as they arrive
3. Before calling load_document(), Navigable waits for sniff bytes
4. load_document() passes the bytes to MimeSniff::Resource::sniff()
2026-01-24 12:25:01 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Body::append_sniff_bytes(ReadonlyBytes bytes)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (m_sniff_bytes_complete)
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
|
|
size_t space_remaining = MAX_SNIFF_BYTES - m_sniff_bytes.size();
|
|
|
|
|
|
if (space_remaining == 0) {
|
|
|
|
|
|
set_sniff_bytes_complete();
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t to_append = min(bytes.size(), space_remaining);
|
|
|
|
|
|
m_sniff_bytes.append(bytes.slice(0, to_append));
|
|
|
|
|
|
|
|
|
|
|
|
if (m_sniff_bytes.size() >= MAX_SNIFF_BYTES)
|
|
|
|
|
|
set_sniff_bytes_complete();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Body::set_sniff_bytes_complete()
|
|
|
|
|
|
{
|
|
|
|
|
|
if (m_sniff_bytes_complete)
|
|
|
|
|
|
return;
|
|
|
|
|
|
m_sniff_bytes_complete = true;
|
|
|
|
|
|
if (m_sniff_bytes_callback) {
|
|
|
|
|
|
auto callback = exchange(m_sniff_bytes_callback, nullptr);
|
|
|
|
|
|
callback->function()(m_sniff_bytes);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Optional<ReadonlyBytes> Body::sniff_bytes_if_available() const
|
|
|
|
|
|
{
|
|
|
|
|
|
// Non-streaming body: source has bytes
|
|
|
|
|
|
if (m_source.has<ByteBuffer>()) {
|
|
|
|
|
|
auto const& buffer = m_source.get<ByteBuffer>();
|
|
|
|
|
|
return buffer.bytes().slice(0, min(buffer.size(), MAX_SNIFF_BYTES));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-12 16:11:55 +00:00
|
|
|
|
if (m_source.has<GC::Ref<FileAPI::Blob>>()) {
|
|
|
|
|
|
auto raw = m_source.get<GC::Ref<FileAPI::Blob>>()->raw_bytes();
|
LibWeb: Support MIME type sniffing for streaming HTTP responses
Previously, when loading a document, we would try to sniff the MIME
type by reading from the response body's source. However, for streaming
HTTP responses, the body source is Empty (the data comes through the
stream instead), so we had no bytes to sniff.
This caused pages like hypr.land (which sends no Content-Type header)
to be misidentified as plain text instead of HTML, since the MIME
sniffing algorithm would receive zero bytes and fall back to the
default type.
The fix captures the first bytes of the response body during fetch,
storing them on the Body object. These bytes are the "resource header"
defined by the MIME Sniffing spec - up to 1445 bytes, which is enough
to identify any MIME type the spec can detect.
Since bytes may arrive asynchronously during streaming, we use a
callback mechanism: if bytes aren't ready yet when load_document()
needs them, it registers a callback that fires once enough bytes have
been captured (or the stream ends).
The flow is:
1. FetchedDataReceiver receives network bytes, buffers them
2. When Body is created, buffered bytes are flushed to Body's sniff
buffer, and subsequent bytes are appended as they arrive
3. Before calling load_document(), Navigable waits for sniff bytes
4. load_document() passes the bytes to MimeSniff::Resource::sniff()
2026-01-24 12:25:01 +01:00
|
|
|
|
return raw.slice(0, min(raw.size(), MAX_SNIFF_BYTES));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Streaming body: bytes captured during fetch
|
|
|
|
|
|
if (m_sniff_bytes_complete)
|
|
|
|
|
|
return m_sniff_bytes;
|
|
|
|
|
|
|
|
|
|
|
|
// Still waiting for bytes
|
|
|
|
|
|
return {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Body::wait_for_sniff_bytes(SniffBytesCallback on_ready)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (auto bytes = sniff_bytes_if_available(); bytes.has_value()) {
|
|
|
|
|
|
on_ready->function()(bytes.value());
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Wait for bytes to arrive
|
|
|
|
|
|
m_sniff_bytes_callback = on_ready;
|
2023-08-18 19:38:13 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2022-09-25 19:25:53 +01:00
|
|
|
|
// https://fetch.spec.whatwg.org/#concept-body-clone
|
2024-11-15 04:01:23 +13:00
|
|
|
|
GC::Ref<Body> Body::clone(JS::Realm& realm)
|
2022-09-25 19:25:53 +01:00
|
|
|
|
{
|
2024-10-24 20:39:18 +13:00
|
|
|
|
HTML::TemporaryExecutionContext execution_context { realm, HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
|
2024-01-27 09:16:09 -05:00
|
|
|
|
|
2022-09-25 19:25:53 +01:00
|
|
|
|
// To clone a body body, run these steps:
|
2024-01-28 14:21:32 -05:00
|
|
|
|
// 1. Let « out1, out2 » be the result of teeing body’s stream.
|
2025-04-30 07:31:15 -04:00
|
|
|
|
auto [out1, out2] = m_stream->tee(&realm).release_value_but_fixme_should_propagate_errors();
|
2024-01-27 09:16:09 -05:00
|
|
|
|
|
2024-01-28 14:21:32 -05:00
|
|
|
|
// 2. Set body’s stream to out1.
|
|
|
|
|
|
m_stream = out1;
|
2022-09-25 19:25:53 +01:00
|
|
|
|
|
|
|
|
|
|
// 3. Return a body whose stream is out2 and other members are copied from body.
|
2024-01-27 09:16:09 -05:00
|
|
|
|
return Body::create(realm.vm(), *out2, m_source, m_length);
|
2022-09-25 19:25:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2023-02-28 18:12:44 +00:00
|
|
|
|
// https://fetch.spec.whatwg.org/#body-fully-read
|
2024-04-26 14:57:40 -04:00
|
|
|
|
void Body::fully_read(JS::Realm& realm, Web::Fetch::Infrastructure::Body::ProcessBodyCallback process_body, Web::Fetch::Infrastructure::Body::ProcessBodyErrorCallback process_body_error, TaskDestination task_destination) const
|
2022-09-25 19:27:02 +01:00
|
|
|
|
{
|
2024-11-04 14:11:07 +01:00
|
|
|
|
HTML::TemporaryExecutionContext execution_context { realm, HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
|
|
|
|
|
|
|
2025-07-16 12:29:26 +02:00
|
|
|
|
// 1. If taskDestination is null, then set taskDestination to the result of starting a new parallel queue.
|
|
|
|
|
|
if (task_destination.has<Empty>())
|
|
|
|
|
|
task_destination = HTML::ParallelQueue::create();
|
2023-02-28 18:12:44 +00:00
|
|
|
|
|
|
|
|
|
|
// 2. Let successSteps given a byte sequence bytes be to queue a fetch task to run processBody given bytes, with taskDestination.
|
2025-07-16 12:29:26 +02:00
|
|
|
|
auto success_steps = [&realm, process_body, task_destination](ByteBuffer bytes) {
|
|
|
|
|
|
queue_fetch_task(task_destination, GC::create_function(realm.heap(), [process_body, bytes = move(bytes)]() mutable {
|
2024-11-04 14:11:07 +01:00
|
|
|
|
process_body->function()(move(bytes));
|
2024-04-19 10:23:40 +02:00
|
|
|
|
}));
|
2023-02-28 18:12:44 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
2024-11-04 14:11:07 +01:00
|
|
|
|
// 3. Let errorSteps optionally given an exception exception be to queue a fetch task to run processBodyError given
|
|
|
|
|
|
// exception, with taskDestination.
|
2025-07-16 12:29:26 +02:00
|
|
|
|
auto error_steps = [&realm, process_body_error, task_destination](JS::Value exception) {
|
|
|
|
|
|
queue_fetch_task(task_destination, GC::create_function(realm.heap(), [process_body_error, exception]() {
|
2024-04-30 07:20:41 -04:00
|
|
|
|
process_body_error->function()(exception);
|
2024-04-19 10:23:40 +02:00
|
|
|
|
}));
|
2023-02-28 18:12:44 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
2024-11-04 14:11:07 +01:00
|
|
|
|
// 4. Let reader be the result of getting a reader for body’s stream. If that threw an exception, then run errorSteps
|
|
|
|
|
|
// with that exception and return.
|
2024-12-24 12:38:25 +13:00
|
|
|
|
auto reader = m_stream->get_a_reader();
|
2024-11-04 14:11:07 +01:00
|
|
|
|
|
|
|
|
|
|
if (reader.is_exception()) {
|
2024-11-04 14:37:27 +01:00
|
|
|
|
auto throw_completion = Bindings::exception_to_throw_completion(realm.vm(), reader.release_error());
|
2025-04-04 18:11:45 +02:00
|
|
|
|
error_steps(throw_completion.release_value());
|
2024-11-04 14:11:07 +01:00
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2023-02-28 18:12:44 +00:00
|
|
|
|
// 5. Read all bytes from reader, given successSteps and errorSteps.
|
2024-11-04 14:11:07 +01:00
|
|
|
|
reader.value()->read_all_bytes(GC::create_function(realm.heap(), move(success_steps)), GC::create_function(realm.heap(), move(error_steps)));
|
2022-09-25 19:27:02 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-05-17 22:06:27 +02:00
|
|
|
|
// https://fetch.spec.whatwg.org/#body-incrementally-read
|
|
|
|
|
|
void Body::incrementally_read(ProcessBodyChunkCallback process_body_chunk, ProcessEndOfBodyCallback process_end_of_body, ProcessBodyErrorCallback process_body_error, TaskDestination task_destination)
|
|
|
|
|
|
{
|
2024-10-24 20:39:18 +13:00
|
|
|
|
HTML::TemporaryExecutionContext const execution_context { m_stream->realm(), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
|
2024-05-17 22:06:27 +02:00
|
|
|
|
|
2025-07-16 12:29:26 +02:00
|
|
|
|
// 1. If taskDestination is null, then set taskDestination to the result of starting a new parallel queue.
|
|
|
|
|
|
if (task_destination.has<Empty>())
|
|
|
|
|
|
task_destination = HTML::ParallelQueue::create();
|
2024-05-17 22:06:27 +02:00
|
|
|
|
|
|
|
|
|
|
// 2. Let reader be the result of getting a reader for body’s stream.
|
|
|
|
|
|
// NOTE: This operation will not throw an exception.
|
2024-12-24 12:38:25 +13:00
|
|
|
|
auto reader = MUST(m_stream->get_a_reader());
|
2024-05-17 22:06:27 +02:00
|
|
|
|
|
|
|
|
|
|
// 3. Perform the incrementally-read loop given reader, taskDestination, processBodyChunk, processEndOfBody, and processBodyError.
|
2025-07-16 12:29:26 +02:00
|
|
|
|
VERIFY(!task_destination.has<Empty>());
|
2024-11-15 04:01:23 +13:00
|
|
|
|
incrementally_read_loop(reader, task_destination.get<GC::Ref<JS::Object>>(), process_body_chunk, process_end_of_body, process_body_error);
|
2024-05-17 22:06:27 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// https://fetch.spec.whatwg.org/#incrementally-read-loop
|
2025-07-16 12:29:26 +02:00
|
|
|
|
void Body::incrementally_read_loop(Streams::ReadableStreamDefaultReader& reader, TaskDestination task_destination, ProcessBodyChunkCallback process_body_chunk, ProcessEndOfBodyCallback process_end_of_body, ProcessBodyErrorCallback process_body_error)
|
2024-05-17 22:06:27 +02:00
|
|
|
|
{
|
|
|
|
|
|
auto& realm = reader.realm();
|
|
|
|
|
|
// 1. Let readRequest be the following read request:
|
2024-11-14 05:50:17 +13:00
|
|
|
|
auto read_request = realm.create<IncrementalReadLoopReadRequest>(*this, reader, task_destination, process_body_chunk, process_end_of_body, process_body_error);
|
2024-05-17 22:06:27 +02:00
|
|
|
|
|
|
|
|
|
|
// 2. Read a chunk from reader given readRequest.
|
|
|
|
|
|
reader.read_a_chunk(read_request);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-10-30 14:39:32 +00:00
|
|
|
|
// https://fetch.spec.whatwg.org/#byte-sequence-as-a-body
|
2024-11-04 16:06:01 +01:00
|
|
|
|
GC::Ref<Body> byte_sequence_as_body(JS::Realm& realm, ReadonlyBytes bytes)
|
2022-10-30 14:39:32 +00:00
|
|
|
|
{
|
|
|
|
|
|
// To get a byte sequence bytes as a body, return the body of the result of safely extracting bytes.
|
2024-11-04 16:06:01 +01:00
|
|
|
|
auto [body, _] = safely_extract_body(realm, bytes);
|
2022-10-30 14:39:32 +00:00
|
|
|
|
return body;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-07-12 18:04:24 +01:00
|
|
|
|
}
|