mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2026-04-19 02:10:26 +00:00
It turns out that the validation of header values in db5f16f042
was a bit over aggressive. extract_token_headers previously treated
empty list elements (empty or whitespace-only after trimming) as parse
failures. This is incorrect per RFC 9110, which specifies that
recipients must ignore empty list elements in comma-separated header
values.
> A recipient MUST parse and ignore a reasonable number of empty
> list elements
368 lines
14 KiB
C++
368 lines
14 KiB
C++
/*
|
||
* Copyright (c) 2024, Andreas Kling <andreas@ladybird.org>
|
||
* Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
|
||
* Copyright (c) 2026, Shannon Booth <shannon@serenityos.org>
|
||
*
|
||
* SPDX-License-Identifier: BSD-2-Clause
|
||
*/
|
||
|
||
#include <AK/GenericLexer.h>
|
||
#include <AK/QuickSort.h>
|
||
#include <LibHTTP/HTTP.h>
|
||
#include <LibHTTP/Header.h>
|
||
#include <LibHTTP/Method.h>
|
||
#include <LibIPC/Decoder.h>
|
||
#include <LibIPC/Encoder.h>
|
||
#include <LibRegex/Regex.h>
|
||
#include <LibTextCodec/Decoder.h>
|
||
#include <LibTextCodec/Encoder.h>
|
||
|
||
namespace HTTP {
|
||
|
||
Header Header::isomorphic_encode(StringView name, StringView value)
|
||
{
|
||
return { TextCodec::isomorphic_encode(name), TextCodec::isomorphic_encode(value) };
|
||
}
|
||
|
||
// https://www.rfc-editor.org/rfc/rfc9110.html#name-recipient-requirements
|
||
static Optional<Vector<ByteString>> extract_token_headers(ByteString const& value)
|
||
{
|
||
Vector<ByteString> result;
|
||
for (auto& part : value.split(',', SplitBehavior::Nothing)) {
|
||
auto trimmed = part.trim(HTTP_WHITESPACE, TrimMode::Both);
|
||
if (trimmed.is_empty())
|
||
continue;
|
||
if (!is_header_name(trimmed))
|
||
return {};
|
||
result.append(move(trimmed));
|
||
}
|
||
return result;
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#extract-header-values
|
||
Optional<Vector<ByteString>> Header::extract_header_values() const
|
||
{
|
||
// NB: There is some specification work to try and rework this function, see: https://github.com/whatwg/fetch/issues/814
|
||
|
||
// 1. If parsing header’s value, per the ABNF for header’s name, fails, then return failure.
|
||
// 2. Return one or more values resulting from parsing header’s value, per the ABNF for header’s name.
|
||
|
||
// ABNF taken from:
|
||
// * https://fetch.spec.whatwg.org/#http-new-header-syntax
|
||
// * https://httpwg.org/specs/rfc9110.html#field.accept-ranges
|
||
|
||
// Access-Control-Expose-Headers = #field-name (field-name = token)
|
||
// Access-Control-Allow-Headers = #field-name (field-name = token)
|
||
// Access-Control-Allow-Methods = #method (method = token)
|
||
if (name.is_one_of_ignoring_ascii_case(
|
||
"Access-Control-Expose-Headers"sv,
|
||
"Access-Control-Allow-Headers"sv,
|
||
"Access-Control-Allow-Methods"sv)) {
|
||
return extract_token_headers(value);
|
||
}
|
||
|
||
// Access-Control-Request-Headers = 1#field-name (field-name = token)
|
||
// Accept-Ranges = acceptable-ranges (acceptable-ranges = 1#range-unit, range-unit = token)
|
||
if (name.is_one_of_ignoring_ascii_case(
|
||
"Access-Control-Request-Headers"sv,
|
||
"Accept-Ranges"sv)) {
|
||
if (auto headers = extract_token_headers(value); headers.has_value()) {
|
||
if (headers->is_empty())
|
||
return {};
|
||
return headers;
|
||
}
|
||
return {};
|
||
}
|
||
|
||
// FIXME: What other headers should we handle here (or elsewhere?)
|
||
return Vector { value };
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#header-name
|
||
bool is_header_name(StringView header_name)
|
||
{
|
||
// A header name is a byte sequence that matches the field-name token production.
|
||
Regex<ECMA262Parser> regex { R"~~~(^[A-Za-z0-9!#$%&'*+\-.^_`|~]+$)~~~" };
|
||
return regex.has_match(header_name);
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#header-value
|
||
bool is_header_value(StringView header_value)
|
||
{
|
||
// A header value is a byte sequence that matches the following conditions:
|
||
// - Has no leading or trailing HTTP tab or space bytes.
|
||
// - Contains no 0x00 (NUL) or HTTP newline bytes.
|
||
if (header_value.is_empty())
|
||
return true;
|
||
|
||
auto first_byte = header_value[0];
|
||
auto last_byte = header_value[header_value.length() - 1];
|
||
|
||
if (is_http_tab_or_space(first_byte) || is_http_tab_or_space(last_byte))
|
||
return false;
|
||
|
||
return !any_of(header_value, [](auto byte) {
|
||
return byte == 0x00 || is_http_newline(byte);
|
||
});
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#concept-header-value-normalize
|
||
StringView normalize_header_value(StringView potential_value)
|
||
{
|
||
// To normalize a byte sequence potentialValue, remove any leading and trailing HTTP whitespace bytes from
|
||
// potentialValue.
|
||
if (potential_value.is_empty())
|
||
return {};
|
||
return potential_value.trim(HTTP_WHITESPACE, TrimMode::Both);
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#forbidden-header-name
|
||
bool is_forbidden_request_header(Header const& header)
|
||
{
|
||
auto const& [name, value] = header;
|
||
|
||
// 1. If name is a byte-case-insensitive match for one of:
|
||
// [...]
|
||
// then return true.
|
||
if (name.is_one_of_ignoring_ascii_case(
|
||
"Accept-Charset"sv,
|
||
"Accept-Encoding"sv,
|
||
"Access-Control-Request-Headers"sv,
|
||
"Access-Control-Request-Method"sv,
|
||
"Connection"sv,
|
||
"Content-Length"sv,
|
||
"Cookie"sv,
|
||
"Cookie2"sv,
|
||
"Date"sv,
|
||
"DNT"sv,
|
||
"Expect"sv,
|
||
"Host"sv,
|
||
"Keep-Alive"sv,
|
||
"Origin"sv,
|
||
"Referer"sv,
|
||
"Set-Cookie"sv,
|
||
"TE"sv,
|
||
"Trailer"sv,
|
||
"Transfer-Encoding"sv,
|
||
"Upgrade"sv,
|
||
"Via"sv)) {
|
||
return true;
|
||
}
|
||
|
||
// 2. If name when byte-lowercased starts with `proxy-` or `sec-`, then return true.
|
||
if (name.starts_with("proxy-"sv, CaseSensitivity::CaseInsensitive)
|
||
|| name.starts_with("sec-"sv, CaseSensitivity::CaseInsensitive)) {
|
||
return true;
|
||
}
|
||
|
||
// 3. If name is a byte-case-insensitive match for one of:
|
||
// - `X-HTTP-Method`
|
||
// - `X-HTTP-Method-Override`
|
||
// - `X-Method-Override`
|
||
// then:
|
||
if (name.is_one_of_ignoring_ascii_case(
|
||
"X-HTTP-Method"sv,
|
||
"X-HTTP-Method-Override"sv,
|
||
"X-Method-Override"sv)) {
|
||
// 1. Let parsedValues be the result of getting, decoding, and splitting value.
|
||
auto parsed_values = get_decode_and_split_header_value(value);
|
||
|
||
// 2. For each method of parsedValues: if the isomorphic encoding of method is a forbidden method, then return true.
|
||
// NB: The values returned from get_decode_and_split_header_value have already been decoded.
|
||
if (any_of(parsed_values, [](auto const& method) { return is_forbidden_method(method); }))
|
||
return true;
|
||
}
|
||
|
||
// 4. Return false.
|
||
return false;
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#forbidden-response-header-name
|
||
bool is_forbidden_response_header_name(StringView header_name)
|
||
{
|
||
// A forbidden response-header name is a header name that is a byte-case-insensitive match for one of:
|
||
// - `Set-Cookie`
|
||
// - `Set-Cookie2`
|
||
return header_name.is_one_of_ignoring_ascii_case(
|
||
"Set-Cookie"sv,
|
||
"Set-Cookie2"sv);
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#header-value-get-decode-and-split
|
||
Vector<String> get_decode_and_split_header_value(StringView value)
|
||
{
|
||
// 1. Let input be the result of isomorphic decoding value.
|
||
auto input = TextCodec::isomorphic_decode(value);
|
||
|
||
// 2. Let position be a position variable for input, initially pointing at the start of input.
|
||
GenericLexer lexer { input };
|
||
|
||
// 3. Let values be a list of strings, initially « ».
|
||
Vector<String> values;
|
||
|
||
// 4. Let temporaryValue be the empty string.
|
||
StringBuilder temporary_value_builder;
|
||
|
||
// 5. While true:
|
||
while (true) {
|
||
// 1. Append the result of collecting a sequence of code points that are not U+0022 (") or U+002C (,) from
|
||
// input, given position, to temporaryValue.
|
||
// NOTE: The result might be the empty string.
|
||
temporary_value_builder.append(lexer.consume_until(is_any_of("\","sv)));
|
||
|
||
// 2. If position is not past the end of input and the code point at position within input is U+0022 ("):
|
||
if (!lexer.is_eof() && lexer.peek() == '"') {
|
||
// 1. Append the result of collecting an HTTP quoted string from input, given position, to temporaryValue.
|
||
temporary_value_builder.append(collect_an_http_quoted_string(lexer));
|
||
|
||
// 2. If position is not past the end of input, then continue.
|
||
if (!lexer.is_eof())
|
||
continue;
|
||
}
|
||
|
||
// 3. Remove all HTTP tab or space from the start and end of temporaryValue.
|
||
auto temporary_value = MUST(String::from_utf8(temporary_value_builder.string_view().trim(HTTP_TAB_OR_SPACE, TrimMode::Both)));
|
||
|
||
// 4. Append temporaryValue to values.
|
||
values.append(move(temporary_value));
|
||
|
||
// 5. Set temporaryValue to the empty string.
|
||
temporary_value_builder.clear();
|
||
|
||
// 6. If position is past the end of input, then return values.
|
||
if (lexer.is_eof())
|
||
return values;
|
||
|
||
// 7. Assert: the code point at position within input is U+002C (,).
|
||
VERIFY(lexer.peek() == ',');
|
||
|
||
// 8. Advance position by 1.
|
||
lexer.ignore(1);
|
||
}
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#convert-header-names-to-a-sorted-lowercase-set
|
||
Vector<ByteString> convert_header_names_to_a_sorted_lowercase_set(ReadonlySpan<ByteString> header_names)
|
||
{
|
||
// 1. Let headerNamesSet be a new ordered set.
|
||
HashTable<StringView, CaseInsensitiveASCIIStringTraits> header_names_seen;
|
||
Vector<ByteString> header_names_set;
|
||
|
||
// 2. For each name of headerNames, append the result of byte-lowercasing name to headerNamesSet.
|
||
for (auto const& name : header_names) {
|
||
if (header_names_seen.contains(name))
|
||
continue;
|
||
|
||
header_names_seen.set(name);
|
||
header_names_set.append(name.to_lowercase());
|
||
}
|
||
|
||
// 3. Return the result of sorting headerNamesSet in ascending order with byte less than.
|
||
quick_sort(header_names_set);
|
||
return header_names_set;
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#build-a-content-range
|
||
ByteString build_content_range(u64 range_start, u64 range_end, u64 full_length)
|
||
{
|
||
// 1. Let contentRange be `bytes `.
|
||
// 2. Append rangeStart, serialized and isomorphic encoded, to contentRange.
|
||
// 3. Append 0x2D (-) to contentRange.
|
||
// 4. Append rangeEnd, serialized and isomorphic encoded to contentRange.
|
||
// 5. Append 0x2F (/) to contentRange.
|
||
// 6. Append fullLength, serialized and isomorphic encoded to contentRange.
|
||
// 7. Return contentRange.
|
||
return ByteString::formatted("bytes {}-{}/{}", range_start, range_end, full_length);
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#simple-range-header-value
|
||
Optional<RangeHeaderValue> parse_single_range_header_value(StringView value, bool allow_whitespace)
|
||
{
|
||
// 1. Let data be the isomorphic decoding of value.
|
||
auto data = TextCodec::isomorphic_decode(value);
|
||
|
||
// 2. If data does not start with "bytes", then return failure.
|
||
if (!data.starts_with_bytes("bytes"sv))
|
||
return {};
|
||
|
||
// 3. Let position be a position variable for data, initially pointing at the 5th code point of data.
|
||
GenericLexer lexer { data };
|
||
lexer.ignore(5);
|
||
|
||
// 4. If allowWhitespace is true, collect a sequence of code points that are HTTP tab or space, from data given position.
|
||
if (allow_whitespace)
|
||
lexer.consume_while(is_http_tab_or_space);
|
||
|
||
// 5. If the code point at position within data is not U+003D (=), then return failure.
|
||
// 6. Advance position by 1.
|
||
if (!lexer.consume_specific('='))
|
||
return {};
|
||
|
||
// 7. If allowWhitespace is true, collect a sequence of code points that are HTTP tab or space, from data given position.
|
||
if (allow_whitespace)
|
||
lexer.consume_while(is_http_tab_or_space);
|
||
|
||
// 8. Let rangeStart be the result of collecting a sequence of code points that are ASCII digits, from data given position.
|
||
auto range_start = lexer.consume_while(is_ascii_digit);
|
||
|
||
// 9. Let rangeStartValue be rangeStart, interpreted as decimal number, if rangeStart is not the empty string;
|
||
// otherwise null.
|
||
auto range_start_value = range_start.to_number<u64>();
|
||
|
||
// 10. If allowWhitespace is true, collect a sequence of code points that are HTTP tab or space, from data given position.
|
||
if (allow_whitespace)
|
||
lexer.consume_while(is_http_tab_or_space);
|
||
|
||
// 11. If the code point at position within data is not U+002D (-), then return failure.
|
||
// 12. Advance position by 1.
|
||
if (!lexer.consume_specific('-'))
|
||
return {};
|
||
|
||
// 13. If allowWhitespace is true, collect a sequence of code points that are HTTP tab or space, from data given position.
|
||
if (allow_whitespace)
|
||
lexer.consume_while(is_http_tab_or_space);
|
||
|
||
// 14. Let rangeEnd be the result of collecting a sequence of code points that are ASCII digits, from data given position.
|
||
auto range_end = lexer.consume_while(is_ascii_digit);
|
||
|
||
// 15. Let rangeEndValue be rangeEnd, interpreted as decimal number, if rangeEnd is not the empty string; otherwise null.
|
||
auto range_end_value = range_end.to_number<u64>();
|
||
|
||
// 16. If position is not past the end of data, then return failure.
|
||
if (!lexer.is_eof())
|
||
return {};
|
||
|
||
// 17. If rangeEndValue and rangeStartValue are null, then return failure.
|
||
if (!range_end_value.has_value() && !range_start_value.has_value())
|
||
return {};
|
||
|
||
// 18. If rangeStartValue and rangeEndValue are numbers, and rangeStartValue is greater than rangeEndValue, then
|
||
// return failure.
|
||
if (range_start_value.has_value() && range_end_value.has_value() && *range_start_value > *range_end_value)
|
||
return {};
|
||
|
||
// 19. Return (rangeStartValue, rangeEndValue).
|
||
return RangeHeaderValue { range_start_value, range_end_value };
|
||
}
|
||
|
||
}
|
||
|
||
namespace IPC {
|
||
|
||
template<>
|
||
ErrorOr<void> encode(Encoder& encoder, HTTP::Header const& header)
|
||
{
|
||
TRY(encoder.encode(header.name));
|
||
TRY(encoder.encode(header.value));
|
||
return {};
|
||
}
|
||
|
||
template<>
|
||
ErrorOr<HTTP::Header> decode(Decoder& decoder)
|
||
{
|
||
auto name = TRY(decoder.decode<ByteString>());
|
||
auto value = TRY(decoder.decode<ByteString>());
|
||
return HTTP::Header { move(name), move(value) };
|
||
}
|
||
|
||
}
|