ladybird/Libraries/LibWeb/CSS/Serialize.cpp
Sam Atkins e026c98d64 LibWeb/CSS: Insert required comments when serializing lists of tokens
Certain pairs of tokens are required to have `/**/` inserted between
them to prevent eg two `<ident>`s getting merged together when
round-tripping.
2025-11-04 14:05:31 +01:00

310 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2021-2025, Sam Atkins <sam@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/GenericShorthands.h>
#include <AK/StringBuilder.h>
#include <AK/Utf8View.h>
#include <LibWeb/CSS/Parser/ComponentValue.h>
#include <LibWeb/CSS/Parser/TokenStream.h>
#include <LibWeb/CSS/Serialize.h>
#include <LibWeb/Infra/Strings.h>
namespace Web::CSS {
// https://www.w3.org/TR/cssom-1/#escape-a-character
void escape_a_character(StringBuilder& builder, u32 character)
{
builder.append('\\');
builder.append_code_point(character);
}
// https://www.w3.org/TR/cssom-1/#escape-a-character-as-code-point
void escape_a_character_as_code_point(StringBuilder& builder, u32 character)
{
builder.appendff("\\{:x} ", character);
}
// https://www.w3.org/TR/cssom-1/#serialize-an-identifier
void serialize_an_identifier(StringBuilder& builder, StringView ident)
{
Utf8View characters { ident };
auto first_character = characters.is_empty() ? 0 : *characters.begin();
// To serialize an identifier means to create a string represented by the concatenation of,
// for each character of the identifier:
for (auto character : characters) {
// If the character is NULL (U+0000), then the REPLACEMENT CHARACTER (U+FFFD).
if (character == 0) {
builder.append_code_point(0xFFFD);
continue;
}
// If the character is in the range [\1-\1f] (U+0001 to U+001F) or is U+007F,
// then the character escaped as code point.
if ((character >= 0x0001 && character <= 0x001F) || (character == 0x007F)) {
escape_a_character_as_code_point(builder, character);
continue;
}
// If the character is the first character and is in the range [0-9] (U+0030 to U+0039),
// then the character escaped as code point.
if (builder.is_empty() && character >= '0' && character <= '9') {
escape_a_character_as_code_point(builder, character);
continue;
}
// If the character is the second character and is in the range [0-9] (U+0030 to U+0039)
// and the first character is a "-" (U+002D), then the character escaped as code point.
if (builder.length() == 1 && first_character == '-' && character >= '0' && character <= '9') {
escape_a_character_as_code_point(builder, character);
continue;
}
// If the character is the first character and is a "-" (U+002D), and there is no second
// character, then the escaped character.
if (builder.is_empty() && character == '-' && characters.length() == 1) {
escape_a_character(builder, character);
continue;
}
// If the character is not handled by one of the above rules and is greater than or equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z] (U+0061 to U+007A), then the character itself.
if ((character >= 0x0080)
|| (character == '-') || (character == '_')
|| (character >= '0' && character <= '9')
|| (character >= 'A' && character <= 'Z')
|| (character >= 'a' && character <= 'z')) {
builder.append_code_point(character);
continue;
}
// Otherwise, the escaped character.
escape_a_character(builder, character);
}
}
// https://www.w3.org/TR/cssom-1/#serialize-a-string
void serialize_a_string(StringBuilder& builder, StringView string)
{
Utf8View characters { string };
// To serialize a string means to create a string represented by '"' (U+0022), followed by the result
// of applying the rules below to each character of the given string, followed by '"' (U+0022):
builder.append('"');
for (auto character : characters) {
// If the character is NULL (U+0000), then the REPLACEMENT CHARACTER (U+FFFD).
if (character == 0) {
builder.append_code_point(0xFFFD);
continue;
}
// If the character is in the range [\1-\1f] (U+0001 to U+001F) or is U+007F, the character escaped as code point.
if ((character >= 0x0001 && character <= 0x001F) || (character == 0x007F)) {
escape_a_character_as_code_point(builder, character);
continue;
}
// If the character is '"' (U+0022) or "\" (U+005C), the escaped character.
if (character == 0x0022 || character == 0x005C) {
escape_a_character(builder, character);
continue;
}
// Otherwise, the character itself.
builder.append_code_point(character);
}
builder.append('"');
}
// https://www.w3.org/TR/cssom-1/#serialize-a-url
void serialize_a_url(StringBuilder& builder, StringView url)
{
// To serialize a URL means to create a string represented by "url(",
// followed by the serialization of the URL as a string, followed by ")".
builder.append("url("sv);
serialize_a_string(builder, url);
builder.append(')');
}
// NOTE: No spec currently exists for serializing a <'unicode-range'>.
void serialize_unicode_ranges(StringBuilder& builder, Vector<Gfx::UnicodeRange> const& unicode_ranges)
{
serialize_a_comma_separated_list(builder, unicode_ranges, [](auto& builder, Gfx::UnicodeRange unicode_range) -> void {
return serialize_a_string(builder, unicode_range.to_string());
});
}
// https://drafts.csswg.org/cssom/#serialize-a-css-value
void serialize_a_number(StringBuilder& builder, double value)
{
// -> <number>
// A base-ten number using digits 0-9 (U+0030 to U+0039) in the shortest form possible, using "." to separate
// decimals (if any), rounding the value if necessary to not produce more than 6 decimals, preceded by "-" (U+002D)
// if it is negative.
builder.appendff("{:.6}", value);
}
String serialize_an_identifier(StringView ident)
{
StringBuilder builder;
serialize_an_identifier(builder, ident);
return builder.to_string_without_validation();
}
String serialize_a_string(StringView string)
{
StringBuilder builder;
serialize_a_string(builder, string);
return builder.to_string_without_validation();
}
String serialize_a_url(StringView url)
{
StringBuilder builder;
serialize_a_url(builder, url);
return builder.to_string_without_validation();
}
String serialize_a_number(double value)
{
StringBuilder builder;
serialize_a_number(builder, value);
return builder.to_string_without_validation();
}
// https://drafts.csswg.org/cssom/#serialize-a-css-declaration
String serialize_a_css_declaration(StringView property, StringView value, Important important)
{
// 1. Let s be the empty string.
StringBuilder builder;
// 2. Append property to s.
// AD-HOC: There's no place currently on the spec where the property name properly escaped,
// and this needs to be done when custom properties have special characters.
// Related spec issues:
// - https://github.com/w3c/csswg-drafts/issues/11729
// - https://github.com/w3c/csswg-drafts/issues/12258
serialize_an_identifier(builder, property);
// 3. Append ": " (U+003A U+0020) to s.
builder.append(": "sv);
// 4. If value contains any non-whitespace characters, append value to s.
if (!value.is_whitespace())
builder.append(value);
// 5. If the important flag is set, append " !important" (U+0020 U+0021 U+0069 U+006D U+0070 U+006F U+0072 U+0074
// U+0061 U+006E U+0074) to s.
if (important == Important::Yes)
builder.append(" !important"sv);
// 6. Append ";" (U+003B) to s.
builder.append(';');
// 7. Return s.
return builder.to_string_without_validation();
}
// https://drafts.csswg.org/css-syntax/#serialization
static bool needs_comment_between(Parser::ComponentValue const& first, Parser::ComponentValue const& second)
{
// For any consecutive pair of tokens, if the first token shows up in the row headings of the following table, and
// the second token shows up in the column headings, and theres a ✗ in the cell denoted by the intersection of the
// chosen row and column, the pair of tokens must be serialized with a comment between them.
//
// If the tokenizer preserves comments, and there were comments originally between the token pair, the preserved
// comment(s) should be used; otherwise, an empty comment (/**/) must be inserted. (Preserved comments may be
// reinserted even if the following tables dont require a comment between two tokens.)
//
// Single characters in the row and column headings represent a <delim-token> with that value, except for "(",
// which represents a (-token.
//
// │ ident │ function │ url │ bad url │ - │ number │ percentage │ dimension │ CDC │ ( │ * │ %
// ───────────┼───────┼──────────┼─────┼─────────┼───┼────────┼────────────┼───────────┼─────┼───┼───┼───
// ident │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ │
// at-keyword │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ │ │
// hash │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ │ │
// dimension │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ │ │
// # │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ │ │
// - │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ │ │
// number │ ✗ │ ✗ │ ✗ │ ✗ │ │ ✗ │ ✗ │ ✗ │ ✗ │ │ │ ✗
// @ │ ✗ │ ✗ │ ✗ │ ✗ │ ✗ │ │ │ │ ✗ │ │ │
// . │ │ │ │ │ │ ✗ │ ✗ │ ✗ │ │ │ │
// + │ │ │ │ │ │ ✗ │ ✗ │ ✗ │ │ │ │
// / │ │ │ │ │ │ │ │ │ │ │ ✗ │
if (first.is(Parser::Token::Type::Ident)) {
if (second.is_function())
return true;
// NB: ( may also be part of a block.
if (second.is_block() && second.block().is_paren())
return true;
if (!second.is_token())
return false;
if (second.token().type() == Parser::Token::Type::Delim)
return second.is_delim('-') || second.is_delim('(');
return first_is_one_of(second.token().type(),
Parser::Token::Type::Ident, Parser::Token::Type::Url, Parser::Token::Type::BadUrl, Parser::Token::Type::Number, Parser::Token::Type::Percentage, Parser::Token::Type::Dimension, Parser::Token::Type::CDC);
}
if (first.is(Parser::Token::Type::AtKeyword)
|| first.is(Parser::Token::Type::Hash)
|| first.is(Parser::Token::Type::Dimension)
|| first.is_delim('#')
|| first.is_delim('-')) {
if (second.is_function())
return true;
if (!second.is_token())
return false;
if (second.token().type() == Parser::Token::Type::Delim)
return second.token().delim() == '-';
return first_is_one_of(second.token().type(),
Parser::Token::Type::Ident, Parser::Token::Type::Url, Parser::Token::Type::BadUrl, Parser::Token::Type::Number, Parser::Token::Type::Percentage, Parser::Token::Type::Dimension, Parser::Token::Type::CDC);
}
if (first.is(Parser::Token::Type::Number)) {
if (second.is_function())
return true;
if (!second.is_token())
return false;
if (second.token().type() == Parser::Token::Type::Delim)
return second.token().delim() == '%';
return first_is_one_of(second.token().type(),
Parser::Token::Type::Ident, Parser::Token::Type::Url, Parser::Token::Type::BadUrl, Parser::Token::Type::Number, Parser::Token::Type::Percentage, Parser::Token::Type::Dimension, Parser::Token::Type::CDC);
}
if (first.is_delim('@')) {
if (second.is_function())
return true;
if (!second.is_token())
return false;
if (second.token().type() == Parser::Token::Type::Delim)
return second.token().delim() == '-';
return first_is_one_of(second.token().type(),
Parser::Token::Type::Ident, Parser::Token::Type::Url, Parser::Token::Type::BadUrl, Parser::Token::Type::CDC);
}
if (first.is_delim('.') || first.is_delim('+')) {
return second.is(Parser::Token::Type::Number) || second.is(Parser::Token::Type::Percentage) || second.is(Parser::Token::Type::Dimension);
}
if (first.is_delim('/')) {
return second.is_delim('*');
}
return false;
}
// https://drafts.csswg.org/css-syntax/#serialization
String serialize_a_series_of_component_values(ReadonlySpan<Parser::ComponentValue> component_values)
{
Parser::TokenStream tokens { component_values };
StringBuilder builder;
while (tokens.has_next_token()) {
auto const& current_token = tokens.consume_a_token();
auto const& next_token = tokens.next_token();
builder.append(current_token.to_string());
if (needs_comment_between(current_token, next_token))
builder.append("/**/"sv);
}
return builder.to_string_without_validation();
}
}