ladybird/Libraries/LibWeb/CSS/Parser/Helpers.cpp
2025-10-16 16:44:42 +02:00

214 lines
9.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2018-2025, Andreas Kling <andreas@ladybird.org>
* Copyright (c) 2020-2023, the SerenityOS developers.
* Copyright (c) 2021-2024, Sam Atkins <atkinssj@serenityos.org>
* Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
* Copyright (c) 2022, MacDue <macdue@dueutil.tech>
* Copyright (c) 2025, Lorenz Ackermann <me@lorenzackermann.xyz>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTextCodec/Decoder.h>
#include <LibWeb/Bindings/MainThreadVM.h>
#include <LibWeb/Bindings/PrincipalHostDefined.h>
#include <LibWeb/CSS/CSSMediaRule.h>
#include <LibWeb/CSS/CSSRuleList.h>
#include <LibWeb/CSS/CSSStyleSheet.h>
#include <LibWeb/CSS/Parser/Parser.h>
#include <LibWeb/HTML/Window.h>
namespace Web {
GC::Ref<JS::Realm> internal_css_realm()
{
static GC::Root<JS::Realm> realm;
static GC::Root<HTML::Window> window;
static OwnPtr<JS::ExecutionContext> execution_context;
if (!realm) {
execution_context = Bindings::create_a_new_javascript_realm(
Bindings::main_thread_vm(),
[&](JS::Realm& realm) -> JS::Object* {
window = HTML::Window::create(realm);
return window;
},
[&](JS::Realm&) -> JS::Object* {
return window;
});
realm = *execution_context->realm;
auto intrinsics = realm->create<Bindings::Intrinsics>(*realm);
auto host_defined = make<Bindings::HostDefined>(intrinsics);
realm->set_host_defined(move(host_defined));
}
return *realm;
}
GC::Ref<CSS::CSSStyleSheet> parse_css_stylesheet(CSS::Parser::ParsingParams const& context, StringView css, Optional<::URL::URL> location, Vector<NonnullRefPtr<CSS::MediaQuery>> media_query_list)
{
if (css.is_empty()) {
auto rule_list = CSS::CSSRuleList::create(*context.realm);
auto media_list = CSS::MediaList::create(*context.realm, {});
auto style_sheet = CSS::CSSStyleSheet::create(*context.realm, rule_list, media_list, location);
style_sheet->set_source_text({});
return style_sheet;
}
auto style_sheet = CSS::Parser::Parser::create(context, css).parse_as_css_stylesheet(location, move(media_query_list));
// FIXME: Avoid this copy
style_sheet->set_source_text(MUST(String::from_utf8(css)));
return style_sheet;
}
CSS::Parser::Parser::PropertiesAndCustomProperties parse_css_property_declaration_block(CSS::Parser::ParsingParams const& context, StringView css)
{
if (css.is_empty())
return {};
return CSS::Parser::Parser::create(context, css).parse_as_property_declaration_block();
}
Vector<CSS::Descriptor> parse_css_descriptor_declaration_block(CSS::Parser::ParsingParams const& parsing_params, CSS::AtRuleID at_rule_id, StringView css)
{
if (css.is_empty())
return {};
return CSS::Parser::Parser::create(parsing_params, css).parse_as_descriptor_declaration_block(at_rule_id);
}
RefPtr<CSS::StyleValue const> parse_css_value(CSS::Parser::ParsingParams const& context, StringView string, CSS::PropertyID property_id)
{
if (string.is_empty())
return nullptr;
return CSS::Parser::Parser::create(context, string).parse_as_css_value(property_id);
}
RefPtr<CSS::StyleValue const> parse_css_descriptor(CSS::Parser::ParsingParams const& parsing_params, CSS::AtRuleID at_rule_id, CSS::DescriptorID descriptor_id, StringView string)
{
if (string.is_empty())
return nullptr;
return CSS::Parser::Parser::create(parsing_params, string).parse_as_descriptor_value(at_rule_id, descriptor_id);
}
CSS::CSSRule* parse_css_rule(CSS::Parser::ParsingParams const& context, StringView css_text)
{
return CSS::Parser::Parser::create(context, css_text).parse_as_css_rule();
}
Optional<CSS::SelectorList> parse_selector(CSS::Parser::ParsingParams const& context, StringView selector_text)
{
return CSS::Parser::Parser::create(context, selector_text).parse_as_selector();
}
Optional<CSS::SelectorList> parse_selector_for_nested_style_rule(CSS::Parser::ParsingParams const& context, StringView selector_text)
{
auto parser = CSS::Parser::Parser::create(context, selector_text);
auto maybe_selectors = parser.parse_as_relative_selector(CSS::Parser::Parser::SelectorParsingMode::Standard);
if (!maybe_selectors.has_value())
return {};
return adapt_nested_relative_selector_list(*maybe_selectors);
}
Optional<CSS::PageSelectorList> parse_page_selector_list(CSS::Parser::ParsingParams const& params, StringView selector_text)
{
return CSS::Parser::Parser::create(params, selector_text).parse_as_page_selector_list();
}
Optional<CSS::Selector::PseudoElementSelector> parse_pseudo_element_selector(CSS::Parser::ParsingParams const& context, StringView selector_text)
{
return CSS::Parser::Parser::create(context, selector_text).parse_as_pseudo_element_selector();
}
RefPtr<CSS::MediaQuery> parse_media_query(CSS::Parser::ParsingParams const& context, StringView string)
{
return CSS::Parser::Parser::create(context, string).parse_as_media_query();
}
Vector<NonnullRefPtr<CSS::MediaQuery>> parse_media_query_list(CSS::Parser::ParsingParams const& context, StringView string)
{
return CSS::Parser::Parser::create(context, string).parse_as_media_query_list();
}
RefPtr<CSS::Supports> parse_css_supports(CSS::Parser::ParsingParams const& context, StringView string)
{
if (string.is_empty())
return {};
return CSS::Parser::Parser::create(context, string).parse_as_supports();
}
Vector<CSS::Parser::ComponentValue> parse_component_values_list(CSS::Parser::ParsingParams const& parsing_params, StringView string)
{
return CSS::Parser::Parser::create(parsing_params, string).parse_as_list_of_component_values();
}
// https://drafts.csswg.org/css-syntax/#css-decode-bytes
ErrorOr<String> css_decode_bytes(Optional<StringView> const& environment_encoding, Optional<String> mime_type_charset, ByteBuffer const& encoded_string)
{
// https://drafts.csswg.org/css-syntax/#determine-the-fallback-encoding
auto determine_the_fallback_encoding = [&mime_type_charset, &environment_encoding, &encoded_string]() -> StringView {
// 1. If HTTP or equivalent protocol provides an encoding label (e.g. via the charset parameter of the Content-Type header) for the stylesheet,
// get an encoding from encoding label. If that does not return failure, return it.
if (mime_type_charset.has_value()) {
if (auto encoding = TextCodec::get_standardized_encoding(mime_type_charset.value()); encoding.has_value())
return encoding.value();
}
// 2. Otherwise, check stylesheets byte stream. If the first 1024 bytes of the stream begin with the hex sequence
// 40 63 68 61 72 73 65 74 20 22 XX* 22 3B
// where each XX byte is a value between 0x16 and 0x21 inclusive or a value between 0x23 and 0x7F inclusive,
// then get an encoding from a string formed out of the sequence of XX bytes, interpreted as ASCII.
auto check_stylesheets_byte_stream = [&encoded_string]() -> Optional<StringView> {
size_t scan_length = min(encoded_string.size(), 1024);
auto pattern_start = "@charset \""sv;
auto pattern_end = "\";"sv;
if (scan_length < pattern_start.length())
return {};
StringView buffer_view = encoded_string.bytes().slice(0, scan_length);
if (!buffer_view.starts_with(pattern_start))
return {};
auto encoding_start = pattern_start.length();
auto end_index = buffer_view.find(pattern_end, encoding_start);
if (!end_index.has_value())
return {};
size_t encoding_length = end_index.value() - encoding_start;
auto encoding_view = buffer_view.substring_view(encoding_start, encoding_length);
for (char c : encoding_view) {
u8 byte = static_cast<u8>(c);
if ((byte < 0x01 || byte > 0x21) && (byte < 0x23 || byte > 0x7F)) {
return {};
}
}
return TextCodec::get_standardized_encoding(encoding_view);
};
// If the return value was utf-16be or utf-16le, return utf-8; if it was anything else except failure, return it.
auto byte_stream_value = check_stylesheets_byte_stream();
if (byte_stream_value.has_value() && (byte_stream_value == "UTF-16BE"sv || byte_stream_value == "UTF-16LE"))
return "utf-8"sv;
if (byte_stream_value.has_value())
return byte_stream_value.value();
// 3. Otherwise, if an environment encoding is provided by the referring document, return it.
if (environment_encoding.has_value())
return environment_encoding.value();
// 4. Otherwise, return utf-8.
return "utf-8"sv;
};
// 1. Determine the fallback encoding of stylesheet, and let fallback be the result.
auto fallback = determine_the_fallback_encoding();
auto decoder = TextCodec::decoder_for(fallback);
if (!decoder.has_value()) {
// If we don't support the encoding yet, let's error out instead of trying to decode it as something it's most likely not.
dbgln("FIXME: Style sheet encoding '{}' is not supported yet", fallback);
return Error::from_string_literal("No Decoder found");
}
// 2. Decode stylesheets stream of bytes with fallback encoding fallback, and return the result.
return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, encoded_string);
}
}