LibHTTP: Parse token-list headers according to their ABNF

The previous implementation did not fully align with each
headers ABNF, so would not reject some headers as we should
have been doing.

Fixes 6 WPT subtests for

https://wpt.live/cors/access-control-expose-headers-parsing.window.html
This commit is contained in:
Shannon Booth 2026-02-28 18:36:44 +01:00 committed by Luke Wilde
parent 65b08e7d9f
commit db5f16f042
Notes: github-actions[bot] 2026-03-01 18:17:39 +00:00
2 changed files with 128 additions and 15 deletions

View file

@ -1,6 +1,7 @@
/*
* Copyright (c) 2024, Andreas Kling <andreas@ladybird.org>
* Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
* Copyright (c) 2026, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -23,30 +24,55 @@ Header Header::isomorphic_encode(StringView name, StringView value)
return { TextCodec::isomorphic_encode(name), TextCodec::isomorphic_encode(value) };
}
static Optional<Vector<ByteString>> extract_token_headers(ByteString const& value)
{
auto parts = value.split(',', SplitBehavior::Nothing);
for (auto& part : parts) {
part = part.trim(HTTP_WHITESPACE, TrimMode::Both);
if (part.is_empty())
return {};
if (!is_header_name(part))
return {};
}
return parts;
}
// https://fetch.spec.whatwg.org/#extract-header-values
Optional<Vector<ByteString>> Header::extract_header_values() const
{
// FIXME: 1. If parsing headers value, per the ABNF for headers name, fails, then return failure.
// FIXME: 2. Return one or more values resulting from parsing headers value, per the ABNF for headers name.
// NB: There is some specification work to try and rework this function, see: https://github.com/whatwg/fetch/issues/814
// For now we only parse some headers that are of the ABNF list form "#something"
// 1. If parsing headers value, per the ABNF for headers name, fails, then return failure.
// 2. Return one or more values resulting from parsing headers value, per the ABNF for headers name.
// ABNF taken from:
// * https://fetch.spec.whatwg.org/#http-new-header-syntax
// * https://httpwg.org/specs/rfc9110.html#field.accept-ranges
// Access-Control-Expose-Headers = #field-name (field-name = token)
// Access-Control-Allow-Headers = #field-name (field-name = token)
// Access-Control-Allow-Methods = #method (method = token)
if (name.is_one_of_ignoring_ascii_case(
"Accept-Ranges"sv,
"Access-Control-Request-Headers"sv,
"Access-Control-Expose-Headers"sv,
"Access-Control-Allow-Headers"sv,
"Access-Control-Allow-Methods"sv)
&& !value.is_empty()) {
Vector<ByteString> trimmed_values;
value.view().for_each_split_view(',', SplitBehavior::Nothing, [&](auto value) {
trimmed_values.append(value.trim(" \t"sv));
});
return trimmed_values;
"Access-Control-Allow-Methods"sv)) {
return extract_token_headers(value);
}
// This always ignores the ABNF rules for now and returns the header value as a single list item.
// Access-Control-Request-Headers = 1#field-name (field-name = token)
// Accept-Ranges = acceptable-ranges (acceptable-ranges = 1#range-unit, range-unit = token)
if (name.is_one_of_ignoring_ascii_case(
"Access-Control-Request-Headers"sv,
"Accept-Ranges"sv)) {
if (auto headers = extract_token_headers(value); headers.has_value()) {
if (headers->is_empty())
return {};
return headers;
}
return {};
}
// FIXME: What other headers should we handle here (or elsewhere?)
return Vector { value };
}

View file

@ -10,6 +10,7 @@
#include <AK/String.h>
#include <LibHTTP/Cache/Utilities.h>
#include <LibHTTP/HTTP.h>
#include <LibHTTP/Header.h>
TEST_CASE(collect_an_http_quoted_string)
{
@ -125,3 +126,89 @@ TEST_CASE(extract_cache_control_directive)
EXPECT_EQ(HTTP::extract_cache_control_directive("max-age=4="sv, "max-age"sv), "4="sv);
EXPECT(!HTTP::contains_cache_control_directive("=4"sv, "max-age"sv));
}
TEST_CASE(extract_header_values)
{
struct TestHeader {
StringView name;
bool requires_at_least_one; // true = 1#token, false = #token
};
TestHeader const headers[] = {
{ "Access-Control-Expose-Headers"sv, false },
{ "access-control-expose-headers"sv, false },
{ "Access-Control-Allow-Headers"sv, false },
{ "Access-Control-Allow-Methods"sv, false },
{ "Access-Control-Request-Headers"sv, true },
{ "Accept-Ranges"sv, true },
};
for (auto const& [header, requires_at_least_one] : headers) {
// Valid single token.
auto result = HTTP::Header { header, "bb-8"sv }.extract_header_values();
EXPECT_EQ(result, (Vector<ByteString> { "bb-8" }));
// Valid multiple tokens, whitespace trimmed.
result = HTTP::Header { header, "bb-8, no"sv }.extract_header_values();
EXPECT_EQ(result, (Vector<ByteString> { "bb-8", "no" }));
// Wildcard is a valid token.
result = HTTP::Header { header, "*"sv }.extract_header_values();
EXPECT_EQ(result, (Vector<ByteString> { "*" }));
// Single-quoted tokens are valid (apostrophe is a tchar).
result = HTTP::Header { header, "'bb-8',bb-8"sv }.extract_header_values();
EXPECT_EQ(result, (Vector<ByteString> { "'bb-8'", "bb-8" }));
// Leading/trailing commas: empty parts discarded.
result = HTTP::Header { header, ",bb-8,"sv }.extract_header_values();
EXPECT_EQ(result, (Vector<ByteString> { "bb-8" }));
// Empty value and only-commas.
result = HTTP::Header { header, ""sv }.extract_header_values();
if (requires_at_least_one) {
EXPECT_EQ(result, OptionalNone {});
} else {
EXPECT_EQ(result, (Vector<ByteString> {}));
}
result = HTTP::Header { header, ",,,"sv }.extract_header_values();
if (requires_at_least_one) {
EXPECT_EQ(result, OptionalNone {});
} else {
EXPECT_EQ(result, (Vector<ByteString> {}));
}
// Space inside a token is invalid.
result = HTTP::Header { header, "no no"sv }.extract_header_values();
EXPECT_EQ(result, OptionalNone {});
// Double-quote is invalid.
result = HTTP::Header { header, "\"bb-8\",bb-8"sv }.extract_header_values();
EXPECT_EQ(result, OptionalNone {});
// @ is invalid.
result = HTTP::Header { header, "@invalid,bb-8"sv }.extract_header_values();
EXPECT_EQ(result, OptionalNone {});
// Vertical tab (0x0B) is invalid.
result = HTTP::Header { header, "bb-8\x0B"sv }.extract_header_values();
EXPECT_EQ(result, OptionalNone {});
// Form feed (0x0C) is invalid.
result = HTTP::Header { header, "bb-8\x0C"sv }.extract_header_values();
EXPECT_EQ(result, OptionalNone {});
// Invalid token alongside a valid one still fails.
result = HTTP::Header { header, "bb-8,no no"sv }.extract_header_values();
EXPECT_EQ(result, OptionalNone {});
// Whitespace-only item between commas fails.
result = HTTP::Header { header, "bb-8, ,no"sv }.extract_header_values();
EXPECT_EQ(result, OptionalNone {});
}
// Other headers: returned as a single-element list regardless of content.
auto result = HTTP::Header { "Content-Type"sv, "text/html; charset=utf-8"sv }.extract_header_values();
EXPECT_EQ(result, (Vector<ByteString> { "text/html; charset=utf-8" }));
}