Compare commits

...

6 commits

Author SHA1 Message Date
Tim Ledbetter
62c00712fa LibWeb: Update HTMLScriptElement::prepare_script() spec text 2025-10-16 16:46:48 +02:00
Tim Ledbetter
24a7eac4ab LibWeb: Delay module script execution until current script has ended 2025-10-16 16:46:48 +02:00
Lorenz A
e73e0b3c92 LibWeb: Implement CSS decode bytes algo 2025-10-16 16:44:42 +02:00
aplefull
4b989b8efd LibRegex: Add support for forward references to named capture groups
This commit implements support for forward references to named capture
groups. We now allow patterns like \k<name>(?<name>x) and
self-references like (?<name>\k<name>x).
2025-10-16 16:37:54 +02:00
aplefull
25a47ceb1b LibRegex+LibJS: Include all named capture groups in source order
Previously, named capture groups in RegExp results did not always follow
their source order, and unmatched groups were omitted. According to the
spec, all named capture groups must appear in the result object in the
order they are defined, even if they did not participate in the match.
This commit makes sure we follow this requirement.
2025-10-16 16:37:54 +02:00
aplefull
c4eef822de LibRegex: Fix backreferences to undefined capture groups
Fixes handling of backreferences when the referenced capture group is
undefined or hasn't participated in the match.
CharacterCompareType::NamedReference is added to distinguish numbered
(\1) from named (\k<name>) backreferences. Numbered backreferences use
exact group lookup. Named backreferences search for participating
groups among duplicates.
2025-10-16 16:37:54 +02:00
20 changed files with 702 additions and 144 deletions

View file

@ -271,21 +271,23 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
// 22. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
MUST(array->create_data_property_or_throw(vm.names.index, Value(match_index)));
// 24. Let match be the Match { [[StartIndex]]: lastIndex, [[EndIndex]]: e }.
// 23. Perform ! CreateDataPropertyOrThrow(A, "input", S).
MUST(array->create_data_property_or_throw(vm.names.input, string));
// 24. Let match be the Match Record { [[StartIndex]]: lastIndex, [[EndIndex]]: e }.
auto match_indices = Match::create(match);
// 25. Let indices be a new empty List.
Vector<Optional<Match>> indices;
Vector<Utf16String> captured_values;
// 26. Let groupNames be a new empty List.
HashMap<Utf16FlyString, Match> group_names;
Vector<Utf16String> group_names;
// 27. Add match as the last element of indices.
// 27. Append match to indices.
indices.append(move(match_indices));
// 28. Let matchedValue be ! GetMatchString(S, match).
// 29. Perform ! CreateDataPropertyOrThrow(A, "0", matchedValue).
// 28. Let matchedSubstr be GetMatchString(S, match).
// 29. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr).
MUST(array->create_data_property_or_throw(0, PrimitiveString::create(vm, match.view.u16_view())));
// 30. If R contains any GroupName, then
@ -295,11 +297,19 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
// a. Let groups be undefined.
// b. Let hasGroups be false.
bool has_groups = result.n_named_capture_groups != 0;
auto groups_object = has_groups ? Object::create(realm, nullptr) : GC::Ptr<Object> {};
auto groups = has_groups ? Object::create(realm, nullptr) : js_undefined();
// 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
// 32. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
MUST(array->create_data_property_or_throw(vm.names.groups, groups));
// 33. Let matchedGroupNames be a new empty List.
Vector<Utf16FlyString> matched_group_names;
Vector<Utf16String> captured_values;
// 34. For each integer i such that 1 ≤ i ≤ n, in ascending order, do
for (size_t i = 1; i <= result.n_capture_groups; ++i) {
// a. Let captureI be ith element of r's captures List.
// a. Let captureI be ith element of r.[[Captures]].
auto& capture = result.capture_group_matches[0][i - 1];
Value captured_value;
@ -310,23 +320,21 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
captured_value = js_undefined();
// ii. Append undefined to indices.
indices.append({});
// iii. Append capture to indices.
captured_values.append({});
}
// c. Else,
else {
// i. Let captureStart be captureI's startIndex.
// ii. Let captureEnd be captureI's endIndex.
// i. Let captureStart be captureI.[[StartIndex]].
// ii. Let captureEnd be captureI.[[EndIndex]].
// iii. If fullUnicode is true, then
// 1. Set captureStart to ! GetStringIndex(S, Input, captureStart).
// 2. Set captureEnd to ! GetStringIndex(S, Input, captureEnd).
// iv. Let capture be the Match { [[StartIndex]]: captureStart, [[EndIndex]: captureEnd }.
// v. Let capturedValue be ! GetMatchString(S, capture).
// 1. Set captureStart to GetStringIndex(S, captureStart).
// 2. Set captureEnd to GetStringIndex(S, captureEnd).
// iv. Let capture be the Match Record { [[StartIndex]]: captureStart, [[EndIndex]]: captureEnd }.
// v. Let capturedValue be GetMatchString(S, capture).
auto capture_as_utf16_string = Utf16String::from_utf16(capture.view.u16_view());
captured_value = PrimitiveString::create(vm, capture_as_utf16_string);
// vi. Append capture to indices.
indices.append(Match::create(capture));
// vii. Append capturedValue to the end of capturedValues.
captured_values.append(capture_as_utf16_string);
}
@ -335,22 +343,51 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
// e. If the ith capture of R was defined with a GroupName, then
if (capture.capture_group_name >= 0) {
// i. Let s be the CapturingGroupName of the corresponding RegExpIdentifierName.
// i. Let s be the CapturingGroupName of that GroupName.
auto group_name = Utf16FlyString::from_utf8(regex.parser_result.bytecode.get_string(capture.capture_group_name));
// ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
MUST(groups_object->create_data_property_or_throw(group_name, captured_value));
// iii. Append s to groupNames.
group_names.set(move(group_name), Match::create(capture));
// ii. If matchedGroupNames contains s, then
if (matched_group_names.contains_slow(group_name)) {
// 1. Assert: capturedValue is undefined.
VERIFY(captured_value.is_undefined());
// 2. Append undefined to groupNames.
group_names.append({});
}
// iii. Else,
else {
// 1. If capturedValue is not undefined, append s to matchedGroupNames.
if (!captured_value.is_undefined())
matched_group_names.append(group_name);
// 2. NOTE: If there are multiple groups named s, groups may already have an s property at this point.
// However, because groups is an ordinary object whose properties are all writable data properties,
// the call to CreateDataPropertyOrThrow is nevertheless guaranteed to succeed.
// 3. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
MUST(groups.as_object().create_data_property_or_throw(group_name, captured_value));
// 4. Append s to groupNames.
group_names.append(group_name.to_utf16_string());
}
}
// f. Else,
else {
// i. Append undefined to groupNames.
// See the note in MakeIndicesArray for why this step is skipped.
group_names.append({});
}
}
// Ensure named groups are enumerated in source order
if (has_groups) {
auto original_groups = groups;
groups = Object::create(realm, nullptr);
for (auto const& group_name_str : regex.parser_result.capture_groups) {
auto group_name = Utf16FlyString::from_utf8(group_name_str);
auto value = original_groups.as_object().get_without_side_effects(group_name);
MUST(groups.as_object().create_data_property_or_throw(group_name, value));
}
MUST(array->set(vm.names.groups, groups, Object::ShouldThrowExceptions::Yes));
}
// https://github.com/tc39/proposal-regexp-legacy-features#regexpbuiltinexec--r-s-
// 5. Let thisRealm be the current Realm Record.
auto* this_realm = &realm;
@ -370,24 +407,39 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
}
}
// 32. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
// NOTE: This step must be performed after the above loop in order for groups to be populated.
Value groups = has_groups ? groups_object : js_undefined();
MUST(array->create_data_property_or_throw(vm.names.groups, groups));
// 34. If hasIndices is true, then
// 35. If hasIndices is true, then
if (has_indices) {
// a. Let indicesArray be MakeMatchIndicesIndexPairArray(S, indices, groupNames, hasGroups).
auto indices_array = make_match_indices_index_pair_array(vm, string->utf16_string_view(), indices, group_names, has_groups);
// b. Perform ! CreateDataProperty(A, "indices", indicesArray).
MUST(array->create_data_property(vm.names.indices, indices_array));
HashMap<Utf16FlyString, Match> indices_group_names;
for (size_t i = 0; i < group_names.size(); ++i) {
if (!group_names[i].is_empty()) {
auto& capture = result.capture_group_matches[0][i];
if (!capture.view.is_null()) {
indices_group_names.set(Utf16FlyString { group_names[i] }, Match::create(capture));
}
}
}
auto indices_array = make_match_indices_index_pair_array(vm, string->utf16_string_view(), indices, indices_group_names, has_groups);
// Make sure indices.groups includes all named groups in source order
if (has_groups) {
auto& indices_groups_object = indices_array.as_object().get_without_side_effects(vm.names.groups).as_object();
auto ordered_indices_groups_object = Object::create(realm, nullptr);
for (auto const& group_name_str : regex.parser_result.capture_groups) {
auto group_name = Utf16FlyString::from_utf8(group_name_str);
auto value = indices_groups_object.get_without_side_effects(group_name);
MUST(ordered_indices_groups_object->create_data_property_or_throw(group_name, value));
}
MUST(indices_array.as_object().set(vm.names.groups, ordered_indices_groups_object, Object::ShouldThrowExceptions::Yes));
}
// b. Perform ! CreateDataPropertyOrThrow(A, "indices", indicesArray).
MUST(array->create_data_property_or_throw(vm.names.indices, indices_array));
}
// 23. Perform ! CreateDataPropertyOrThrow(A, "input", S).
// NOTE: This step is performed last to allow the string to be moved into the PrimitiveString::create() invocation.
MUST(array->create_data_property_or_throw(vm.names.input, string));
// 35. Return A.
// 36. Return A.
return array;
}

View file

@ -226,3 +226,105 @@ test("cached UTF-16 code point length", () => {
expect(match.codePointAt(0)).toBe(0x1f600);
});
test("named groups source order", () => {
// Test that named groups appear in source order, not match order
let re = /(?<y>a)(?<x>a)|(?<x>b)(?<y>b)/;
let result1 = re.exec("aa");
expect(Object.keys(result1.groups)).toEqual(["y", "x"]);
expect(result1.groups.y).toBe("a");
expect(result1.groups.x).toBe("a");
let result2 = re.exec("bb");
expect(Object.keys(result2.groups)).toEqual(["y", "x"]);
expect(result2.groups.y).toBe("b");
expect(result2.groups.x).toBe("b");
});
test("named groups all present in groups object", () => {
// Test that all named groups appear in groups object, even unmatched ones
let re = /(?<fst>.)|(?<snd>.)/u;
let result = re.exec("abcd");
expect(Object.getOwnPropertyNames(result.groups)).toEqual(["fst", "snd"]);
expect(result.groups.fst).toBe("a");
expect(result.groups.snd).toBe(undefined);
});
test("named groups with hasIndices flag", () => {
// Test that indices.groups also contains all named groups in source order
let re = /(?<fst>.)|(?<snd>.)/du;
let result = re.exec("abcd");
expect(Object.getOwnPropertyNames(result.indices.groups)).toEqual(["fst", "snd"]);
expect(result.indices.groups.fst).toEqual([0, 1]);
expect(result.indices.groups.snd).toBe(undefined);
});
test("complex named groups ordering", () => {
// Test multiple groups in different order
let re = /(?<third>c)|(?<first>a)|(?<second>b)/;
let result1 = re.exec("a");
expect(Object.keys(result1.groups)).toEqual(["third", "first", "second"]);
expect(result1.groups.third).toBe(undefined);
expect(result1.groups.first).toBe("a");
expect(result1.groups.second).toBe(undefined);
let result2 = re.exec("b");
expect(Object.keys(result2.groups)).toEqual(["third", "first", "second"]);
expect(result2.groups.third).toBe(undefined);
expect(result2.groups.first).toBe(undefined);
expect(result2.groups.second).toBe("b");
let result3 = re.exec("c");
expect(Object.keys(result3.groups)).toEqual(["third", "first", "second"]);
expect(result3.groups.third).toBe("c");
expect(result3.groups.first).toBe(undefined);
expect(result3.groups.second).toBe(undefined);
});
test("forward references to named groups", () => {
// Self-reference inside group
let result1 = /(?<a>\k<a>\w)../.exec("bab");
expect(result1).not.toBe(null);
expect(result1[0]).toBe("bab");
expect(result1[1]).toBe("b");
expect(result1.groups.a).toBe("b");
// Reference before group definition
let result2 = /\k<a>(?<a>b)\w\k<a>/.exec("bab");
expect(result2).not.toBe(null);
expect(result2[0]).toBe("bab");
expect(result2[1]).toBe("b");
expect(result2.groups.a).toBe("b");
let result3 = /(?<b>b)\k<a>(?<a>a)\k<b>/.exec("bab");
expect(result3).not.toBe(null);
expect(result3[0]).toBe("bab");
expect(result3[1]).toBe("b");
expect(result3[2]).toBe("a");
expect(result3.groups.a).toBe("a");
expect(result3.groups.b).toBe("b");
// Backward reference
let result4 = /(?<a>a)(?<b>b)\k<a>/.exec("aba");
expect(result4).not.toBe(null);
expect(result4[0]).toBe("aba");
expect(result4.groups.a).toBe("a");
expect(result4.groups.b).toBe("b");
// Mixed forward/backward with alternation
let result5 = /(?<a>a)(?<b>b)\k<a>|(?<c>c)/.exec("aba");
expect(result5).not.toBe(null);
expect(result5.groups.a).toBe("a");
expect(result5.groups.b).toBe("b");
expect(result5.groups.c).toBe(undefined);
});
test("invalid named group references", () => {
expect(() => {
new RegExp("(?<a>x)\\k<nonexistent>");
}).toThrow();
});

View file

@ -609,12 +609,21 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
}
case CharacterCompareType::Reference: {
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
if (input.match_index >= state.capture_group_matches_size())
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (input.match_index >= state.capture_group_matches_size()) {
had_zero_length_match = true;
if (current_inversion_state())
inverse_matched = true;
break;
}
auto groups = state.capture_group_matches(input.match_index);
if (groups.size() <= reference_number)
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (groups.size() <= reference_number) {
had_zero_length_match = true;
if (current_inversion_state())
inverse_matched = true;
break;
}
auto str = groups.at(reference_number).view;
@ -628,6 +637,59 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
}
break;
}
case CharacterCompareType::NamedReference: {
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
if (input.match_index >= state.capture_group_matches_size()) {
had_zero_length_match = true;
if (current_inversion_state())
inverse_matched = true;
break;
}
auto groups = state.capture_group_matches(input.match_index);
if (groups.size() <= reference_number) {
had_zero_length_match = true;
if (current_inversion_state())
inverse_matched = true;
break;
}
RegexStringView str {};
auto reference_name_index = m_bytecode->get_group_name_index(reference_number);
if (reference_name_index.has_value()) {
auto target_name_string = m_bytecode->get_string(reference_name_index.value());
for (size_t i = 0; i < groups.size(); ++i) {
if (groups[i].view.is_null())
continue;
auto group_name_index = m_bytecode->get_group_name_index(i);
if (group_name_index.has_value()) {
auto group_name_string = m_bytecode->get_string(group_name_index.value());
if (group_name_string == target_name_string) {
str = groups[i].view;
break;
}
}
}
}
if (input.view.length() < state.string_position + str.length()) {
return ExecutionResult::Failed_ExecuteLowPrioForks;
}
if (compare_string(input, state, str, had_zero_length_match)) {
if (current_inversion_state())
inverse_matched = true;
}
break;
}
case CharacterCompareType::Property: {
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
compare_property(input, state, property, current_inversion_state(), inverse_matched);
@ -946,6 +1008,9 @@ Vector<CompareTypeAndValuePair> OpCode_Compare::flat_compares() const
} else if (compare_type == CharacterCompareType::Reference) {
auto ref = m_bytecode->at(offset++);
result.append({ compare_type, ref });
} else if (compare_type == CharacterCompareType::NamedReference) {
auto ref = m_bytecode->at(offset++);
result.append({ compare_type, ref });
} else if (compare_type == CharacterCompareType::String) {
auto& length = m_bytecode->at(offset++);
for (size_t k = 0; k < length; ++k)
@ -1028,6 +1093,24 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
}
}
} else if (compare_type == CharacterCompareType::NamedReference) {
auto ref = m_bytecode->at(offset++);
result.empend(ByteString::formatted(" named_number={}", ref));
if (input.has_value()) {
if (state().capture_group_matches_size() > input->match_index) {
auto match = state().capture_group_matches(input->match_index);
if (match.size() > ref) {
auto& group = match[ref];
result.empend(ByteString::formatted(" left={}", group.left_column));
result.empend(ByteString::formatted(" right={}", group.left_column + group.view.length_in_code_units()));
result.empend(ByteString::formatted(" contents='{}'", group.view));
} else {
result.empend(ByteString::formatted(" (invalid ref {}, max={})", ref, match.size() - 1));
}
} else {
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
}
}
} else if (compare_type == CharacterCompareType::String) {
auto& length = m_bytecode->at(offset++);
StringBuilder str_builder;

View file

@ -69,6 +69,7 @@ enum class OpCodeId : ByteCodeValueType {
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
@ -261,6 +262,11 @@ public:
FlyString get_string(size_t index) const { return m_string_table.get(index); }
auto const& string_table() const { return m_string_table; }
Optional<size_t> get_group_name_index(size_t group_index) const
{
return m_group_name_mappings.get(group_index);
}
void last_chunk() const = delete;
void first_chunk() const = delete;
@ -279,6 +285,10 @@ public:
m_string_table.m_table.set(entry.key, entry.value);
}
m_string_table.m_inverse_table.update(other.m_string_table.m_inverse_table);
for (auto const& mapping : other.m_group_name_mappings) {
m_group_name_mappings.set(mapping.key, mapping.value);
}
}
}
@ -326,8 +336,11 @@ public:
void insert_bytecode_group_capture_right(size_t capture_groups_count, FlyString name)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
empend(m_string_table.set(move(name)));
auto name_string_index = m_string_table.set(move(name));
empend(name_string_index);
empend(capture_groups_count);
m_group_name_mappings.set(capture_groups_count - 1, name_string_index);
}
enum class LookAroundType {
@ -618,6 +631,7 @@ private:
static bool s_opcodes_initialized;
static size_t s_next_checkpoint_serial_id;
StringTable m_string_table;
HashMap<size_t, size_t> m_group_name_mappings;
};
#define ENUMERATE_EXECUTION_RESULTS \

View file

@ -131,6 +131,7 @@ static bool interpret_compares(Vector<CompareTypeAndValuePair> const& lhs, Stati
// We've transformed this into a series of ranges in flat_compares(), so bail out if we see it.
return false;
case CharacterCompareType::Reference:
case CharacterCompareType::NamedReference:
// We've handled this before coming here.
break;
case CharacterCompareType::Property:
@ -512,6 +513,7 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
// We've transformed this into a series of ranges in flat_compares(), so bail out if we see it.
return true;
case CharacterCompareType::Reference:
case CharacterCompareType::NamedReference:
// We've handled this before coming here.
break;
case CharacterCompareType::Property:
@ -755,7 +757,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
break;
if (any_of(compares, [&](auto& compare) {
return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference;
return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference || compare.type == CharacterCompareType::NamedReference;
}))
return AtomicRewritePreconditionResult::NotSatisfied;
@ -1835,6 +1837,7 @@ static LookupTableInsertionOutcome insert_into_lookup_table(RedBlackTree<ByteCod
case CharacterCompareType::And:
return LookupTableInsertionOutcome::FlushOnInsertion;
case CharacterCompareType::Reference:
case CharacterCompareType::NamedReference:
case CharacterCompareType::Property:
case CharacterCompareType::GeneralCategory:
case CharacterCompareType::Script:

View file

@ -173,6 +173,7 @@ ALWAYS_INLINE void Parser::reset()
m_parser_state.capture_groups_count = 0;
m_parser_state.named_capture_groups_count = 0;
m_parser_state.named_capture_groups.clear();
m_parser_state.unresolved_named_references.clear();
}
Parser::Result Parser::parse(Optional<AllOptions> regex_options)
@ -182,10 +183,15 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
reset();
if (regex_options.has_value())
m_parser_state.regex_options = regex_options.value();
if (parse_internal(m_parser_state.bytecode, m_parser_state.match_length_minimum))
if (parse_internal(m_parser_state.bytecode, m_parser_state.match_length_minimum)) {
consume(TokenType::Eof, Error::InvalidPattern);
else
if (!resolve_forward_named_references())
set_error(Error::InvalidNameForCaptureGroup);
} else {
set_error(Error::InvalidPattern);
}
auto capture_groups = m_parser_state.named_capture_groups.keys();
dbgln_if(REGEX_DEBUG, "[PARSER] Produced bytecode with {} entries (opcodes + arguments)", m_parser_state.bytecode.size());
return {
@ -195,7 +201,7 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
move(m_parser_state.match_length_minimum),
move(m_parser_state.error),
move(m_parser_state.error_token),
m_parser_state.named_capture_groups.keys(),
move(capture_groups),
m_parser_state.regex_options,
};
}
@ -496,7 +502,6 @@ bool PosixBasicParser::parse_nonduplicating_re(ByteCode& bytecode, size_t& match
if (try_skip({ backref_name, 2 })) {
if (!m_capture_group_seen[i - 1])
return set_error(Error::InvalidNumber);
match_length_minimum += m_capture_group_minimum_lengths[i - 1];
bytecode.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)i } });
return true;
}
@ -1640,24 +1645,32 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
}
auto it = m_parser_state.named_capture_groups.find(name);
if (it == m_parser_state.named_capture_groups.end()) {
set_error(Error::InvalidNameForCaptureGroup);
return false;
if (it != m_parser_state.named_capture_groups.end()) {
// Use the first occurrence of the named group for the backreference
// This follows ECMAScript behavior where \k<name> refers to the first
// group with that name in left-to-right order, regardless of alternative
auto group_index = it->value.first().group_index;
auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(group_index);
if (maybe_length.has_value()) {
// Backward reference
stack.insert_bytecode_compare_values({ { CharacterCompareType::NamedReference, static_cast<ByteCodeValueType>(group_index) } });
} else {
// Self-reference or forward reference
auto placeholder_index = 0;
auto bytecode_offset = stack.size();
stack.insert_bytecode_compare_values({ { CharacterCompareType::NamedReference, static_cast<ByteCodeValueType>(placeholder_index) } });
m_parser_state.unresolved_named_references.append({ name, bytecode_offset + 1 });
}
} else {
// Forward reference
auto placeholder_index = 0;
auto bytecode_offset = stack.size();
stack.insert_bytecode_compare_values({ { CharacterCompareType::NamedReference, static_cast<ByteCodeValueType>(placeholder_index) } });
m_parser_state.unresolved_named_references.append({ name, bytecode_offset + 1 });
}
// Use the first occurrence of the named group for the backreference
// This follows ECMAScript behavior where \k<name> refers to the first
// group with that name in left-to-right order, regardless of alternative
auto group_index = it->value.first().group_index;
auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(group_index);
if (!maybe_length.has_value()) {
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
match_length_minimum += maybe_length.value();
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)group_index } });
return true;
}
@ -2706,7 +2719,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
return false;
}
m_parser_state.named_capture_groups.ensure(name).append({ group_index, m_current_alternative_id });
auto& group_vector = m_parser_state.named_capture_groups.ensure(name);
group_vector.append({ group_index, m_current_alternative_id });
ByteCode capture_group_bytecode;
size_t length = 0;
@ -2816,4 +2830,20 @@ size_t ECMA262Parser::ensure_total_number_of_capturing_parenthesis()
return count;
}
bool Parser::resolve_forward_named_references()
{
for (auto const& unresolved_ref : m_parser_state.unresolved_named_references) {
auto it = m_parser_state.named_capture_groups.find(unresolved_ref.name);
if (it == m_parser_state.named_capture_groups.end()) {
return false;
}
auto group_index = it->value.first().group_index;
m_parser_state.bytecode.at(unresolved_ref.bytecode_offset) = (ByteCodeValueType)group_index;
}
return true;
}
}

View file

@ -90,6 +90,7 @@ public:
protected:
virtual bool parse_internal(ByteCode&, size_t& match_length_minimum) = 0;
bool resolve_forward_named_references();
ALWAYS_INLINE bool match(TokenType type) const;
ALWAYS_INLINE bool match(char ch) const;
@ -120,7 +121,13 @@ protected:
size_t repetition_mark_count { 0 };
AllOptions regex_options;
HashMap<size_t, size_t> capture_group_minimum_lengths;
HashMap<FlyString, Vector<NamedCaptureGroup>> named_capture_groups;
OrderedHashMap<FlyString, Vector<NamedCaptureGroup>> named_capture_groups;
struct UnresolvedNamedReference {
FlyString name;
size_t bytecode_offset;
};
Vector<UnresolvedNamedReference> unresolved_named_references;
explicit ParserState(Lexer& lexer)
: lexer(lexer)

View file

@ -2,6 +2,7 @@
* Copyright (c) 2021, the SerenityOS developers.
* Copyright (c) 2021-2024, Sam Atkins <sam@ladybird.org>
* Copyright (c) 2022-2024, Andreas Kling <andreas@ladybird.org>
* Copyright (c) 2025, Lorenz Ackermann <me@lorenzackermann.xyz>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -146,18 +147,17 @@ void CSSImportRule::fetch()
// 4. Let importedStylesheet be the result of parsing byteStream given parsedUrl.
// FIXME: Tidy up our parsing API. For now, do the decoding here.
// FIXME: Get the encoding from the response somehow.
auto encoding = "utf-8"sv;
auto maybe_decoder = TextCodec::decoder_for(encoding);
if (!maybe_decoder.has_value()) {
dbgln_if(CSS_LOADER_DEBUG, "CSSImportRule: Failed to decode CSS file: {} Unsupported encoding: {}", parsed_url, encoding);
return;
Optional<String> mime_type_charset;
if (auto extracted_mime_type = response->header_list()->extract_mime_type(); extracted_mime_type.has_value()) {
if (auto charset = extracted_mime_type->parameters().get("charset"sv); charset.has_value())
mime_type_charset = charset.value();
}
auto& decoder = maybe_decoder.release_value();
auto decoded_or_error = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(decoder, *byte_stream);
// The environment encoding of an imported style sheet is the encoding of the style sheet that imported it. [css-syntax-3]
// FIXME: Save encoding on Stylesheet to get it here
Optional<StringView> environment_encoding;
auto decoded_or_error = css_decode_bytes(environment_encoding, mime_type_charset, *byte_stream);
if (decoded_or_error.is_error()) {
dbgln_if(CSS_LOADER_DEBUG, "CSSImportRule: Failed to decode CSS file: {} Encoding was: {}", parsed_url, encoding);
dbgln_if(CSS_LOADER_DEBUG, "CSSImportRule: Failed to decode CSS file: {}", parsed_url);
return;
}
auto decoded = decoded_or_error.release_value();

View file

@ -4,10 +4,12 @@
* Copyright (c) 2021-2024, Sam Atkins <atkinssj@serenityos.org>
* Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
* Copyright (c) 2022, MacDue <macdue@dueutil.tech>
* Copyright (c) 2025, Lorenz Ackermann <me@lorenzackermann.xyz>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTextCodec/Decoder.h>
#include <LibWeb/Bindings/MainThreadVM.h>
#include <LibWeb/Bindings/PrincipalHostDefined.h>
#include <LibWeb/CSS/CSSMediaRule.h>
@ -138,4 +140,75 @@ Vector<CSS::Parser::ComponentValue> parse_component_values_list(CSS::Parser::Par
return CSS::Parser::Parser::create(parsing_params, string).parse_as_list_of_component_values();
}
// https://drafts.csswg.org/css-syntax/#css-decode-bytes
ErrorOr<String> css_decode_bytes(Optional<StringView> const& environment_encoding, Optional<String> mime_type_charset, ByteBuffer const& encoded_string)
{
// https://drafts.csswg.org/css-syntax/#determine-the-fallback-encoding
auto determine_the_fallback_encoding = [&mime_type_charset, &environment_encoding, &encoded_string]() -> StringView {
// 1. If HTTP or equivalent protocol provides an encoding label (e.g. via the charset parameter of the Content-Type header) for the stylesheet,
// get an encoding from encoding label. If that does not return failure, return it.
if (mime_type_charset.has_value()) {
if (auto encoding = TextCodec::get_standardized_encoding(mime_type_charset.value()); encoding.has_value())
return encoding.value();
}
// 2. Otherwise, check stylesheets byte stream. If the first 1024 bytes of the stream begin with the hex sequence
// 40 63 68 61 72 73 65 74 20 22 XX* 22 3B
// where each XX byte is a value between 0x16 and 0x21 inclusive or a value between 0x23 and 0x7F inclusive,
// then get an encoding from a string formed out of the sequence of XX bytes, interpreted as ASCII.
auto check_stylesheets_byte_stream = [&encoded_string]() -> Optional<StringView> {
size_t scan_length = min(encoded_string.size(), 1024);
auto pattern_start = "@charset \""sv;
auto pattern_end = "\";"sv;
if (scan_length < pattern_start.length())
return {};
StringView buffer_view = encoded_string.bytes().slice(0, scan_length);
if (!buffer_view.starts_with(pattern_start))
return {};
auto encoding_start = pattern_start.length();
auto end_index = buffer_view.find(pattern_end, encoding_start);
if (!end_index.has_value())
return {};
size_t encoding_length = end_index.value() - encoding_start;
auto encoding_view = buffer_view.substring_view(encoding_start, encoding_length);
for (char c : encoding_view) {
u8 byte = static_cast<u8>(c);
if ((byte < 0x01 || byte > 0x21) && (byte < 0x23 || byte > 0x7F)) {
return {};
}
}
return TextCodec::get_standardized_encoding(encoding_view);
};
// If the return value was utf-16be or utf-16le, return utf-8; if it was anything else except failure, return it.
auto byte_stream_value = check_stylesheets_byte_stream();
if (byte_stream_value.has_value() && (byte_stream_value == "UTF-16BE"sv || byte_stream_value == "UTF-16LE"))
return "utf-8"sv;
if (byte_stream_value.has_value())
return byte_stream_value.value();
// 3. Otherwise, if an environment encoding is provided by the referring document, return it.
if (environment_encoding.has_value())
return environment_encoding.value();
// 4. Otherwise, return utf-8.
return "utf-8"sv;
};
// 1. Determine the fallback encoding of stylesheet, and let fallback be the result.
auto fallback = determine_the_fallback_encoding();
auto decoder = TextCodec::decoder_for(fallback);
if (!decoder.has_value()) {
// If we don't support the encoding yet, let's error out instead of trying to decode it as something it's most likely not.
dbgln("FIXME: Style sheet encoding '{}' is not supported yet", fallback);
return Error::from_string_literal("No Decoder found");
}
// 2. Decode stylesheets stream of bytes with fallback encoding fallback, and return the result.
return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, encoded_string);
}
}

View file

@ -602,5 +602,6 @@ Vector<NonnullRefPtr<CSS::MediaQuery>> parse_media_query_list(CSS::Parser::Parsi
RefPtr<CSS::Supports> parse_css_supports(CSS::Parser::ParsingParams const&, StringView);
Vector<CSS::Parser::ComponentValue> parse_component_values_list(CSS::Parser::ParsingParams const&, StringView);
GC::Ref<JS::Realm> internal_css_realm();
ErrorOr<String> css_decode_bytes(Optional<StringView> const& environment_encoding, Optional<String> mime_type_charset, ByteBuffer const& encoded_string);
}

View file

@ -422,10 +422,13 @@ void HTMLLinkElement::process_stylesheet_resource(bool success, Fetch::Infrastru
{
// 1. If the resource's Content-Type metadata is not text/css, then set success to false.
auto mime_type_string = m_mime_type;
if (!mime_type_string.has_value()) {
auto extracted_mime_type = response.header_list()->extract_mime_type();
if (extracted_mime_type.has_value())
Optional<String> mime_type_charset;
auto extracted_mime_type = response.header_list()->extract_mime_type();
if (extracted_mime_type.has_value()) {
if (!mime_type_string.has_value())
mime_type_string = extracted_mime_type->essence();
if (auto charset = extracted_mime_type->parameters().get("charset"sv); charset.has_value())
mime_type_charset = charset.value();
}
if (mime_type_string.has_value() && mime_type_string != "text/css"sv) {
@ -469,43 +472,34 @@ void HTMLLinkElement::process_stylesheet_resource(bool success, Fetch::Infrastru
// The CSS environment encoding is the result of running the following steps: [CSSSYNTAX]
// 1. If the element has a charset attribute, get an encoding from that attribute's value. If that succeeds, return the resulting encoding. [ENCODING]
// 2. Otherwise, return the document's character encoding. [DOM]
Optional<StringView> environment_encoding;
if (auto charset = attribute(HTML::AttributeNames::charset); charset.has_value()) {
if (auto environment_encoding = TextCodec::get_standardized_encoding(charset.release_value()); environment_encoding.has_value())
environment_encoding = environment_encoding.value();
}
if (!environment_encoding.has_value() && document().encoding().has_value())
environment_encoding = document().encoding().value();
Optional<String> encoding;
if (auto charset = attribute(HTML::AttributeNames::charset); charset.has_value())
encoding = charset.release_value();
if (!encoding.has_value())
encoding = document().encoding_or_default();
auto decoder = TextCodec::decoder_for(*encoding);
if (!decoder.has_value()) {
// If we don't support the encoding yet, let's error out instead of trying to decode it as something it's most likely not.
dbgln("FIXME: Style sheet encoding '{}' is not supported yet", encoding);
auto maybe_decoded_string = css_decode_bytes(environment_encoding, mime_type_charset, body_bytes.get<ByteBuffer>());
if (maybe_decoded_string.is_error()) {
dbgln("Failed to decode CSS file: {}", response.url().value_or(URL::URL()));
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::error));
} else {
auto const& encoded_string = body_bytes.get<ByteBuffer>();
auto maybe_decoded_string = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, encoded_string);
if (maybe_decoded_string.is_error()) {
dbgln("Style sheet {} claimed to be '{}' but decoding failed", response.url().value_or(URL::URL()), encoding);
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::error));
} else {
VERIFY(!response.url_list().is_empty());
m_loaded_style_sheet = document_or_shadow_root_style_sheets().create_a_css_style_sheet(
maybe_decoded_string.release_value(),
"text/css"_string,
this,
attribute(HTML::AttributeNames::media).value_or({}),
in_a_document_tree() ? attribute(HTML::AttributeNames::title).value_or({}) : String {},
(m_relationship & Relationship::Alternate && !m_explicitly_enabled) ? CSS::StyleSheetList::Alternate::Yes : CSS::StyleSheetList::Alternate::No,
CSS::StyleSheetList::OriginClean::Yes,
response.url_list().first(),
nullptr,
nullptr);
VERIFY(!response.url_list().is_empty());
m_loaded_style_sheet = document_or_shadow_root_style_sheets().create_a_css_style_sheet(
maybe_decoded_string.release_value(),
"text/css"_string,
this,
attribute(HTML::AttributeNames::media).value_or({}),
in_a_document_tree() ? attribute(HTML::AttributeNames::title).value_or({}) : String {},
(m_relationship & Relationship::Alternate && !m_explicitly_enabled) ? CSS::StyleSheetList::Alternate::Yes : CSS::StyleSheetList::Alternate::No,
CSS::StyleSheetList::OriginClean::Yes,
response.url_list().first(),
nullptr,
nullptr);
// 2. Fire an event named load at el.
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::load));
}
// 2. Fire an event named load at el.
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::load));
}
}
// 5. Otherwise, fire an event named error at el.

View file

@ -242,51 +242,54 @@ void HTMLScriptElement::prepare_script()
// then set el's type to "importmap".
m_script_type = ScriptType::ImportMap;
}
// 12. Otherwise, return. (No script is executed, and el's type is left as null.)
// FIXME: 12. Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "speculationrules", then set el's type to "speculationrules".
// 13. Otherwise, return. (No script is executed, and el's type is left as null.)
else {
VERIFY(m_script_type == ScriptType::Null);
return;
}
// 13. If parser document is non-null, then set el's parser document back to parser document and set el's force async to false.
// 14. If parser document is non-null, then set el's parser document back to parser document and set el's force async to false.
if (parser_document) {
m_parser_document = parser_document;
m_force_async = false;
}
// 14. Set el's already started to true.
// 15. Set el's already started to true.
m_already_started = true;
// 15. Set el's preparation-time document to its node document.
// 16. Set el's preparation-time document to its node document.
m_preparation_time_document = &document();
// 16. If parser document is non-null, and parser document is not equal to el's preparation-time document, then return.
// 17. If parser document is non-null, and parser document is not equal to el's preparation-time document, then return.
if (parser_document != nullptr && parser_document != m_preparation_time_document) {
dbgln("HTMLScriptElement: Refusing to run script because the parser document is not the same as the preparation time document.");
return;
}
// 17. If scripting is disabled for el, then return.
// 18. If scripting is disabled for el, then return.
if (is_scripting_disabled()) {
dbgln("HTMLScriptElement: Refusing to run script because scripting is disabled.");
return;
}
// 18. If el has a nomodule content attribute and its type is "classic", then return.
// 19. If el has a nomodule content attribute and its type is "classic", then return.
if (m_script_type == ScriptType::Classic && has_attribute(HTML::AttributeNames::nomodule)) {
dbgln("HTMLScriptElement: Refusing to run classic script because it has the nomodule attribute.");
return;
}
// 19. If el does not have a src content attribute, and the Should element's inline behavior be blocked by Content Security Policy?
// algorithm returns "Blocked" when given el, "script", and source text, then return. [CSP]
// FIXME: 20. Let cspType be "script speculationrules" if el's type is "speculationrules"; otherwise, "script".
// 21. If el does not have a src content attribute, and the Should element's inline behavior be blocked by Content
// Security Policy? algorithm returns "Blocked" when given el, cspType, and source text, then return [CSP]
if (!has_attribute(AttributeNames::src)
&& ContentSecurityPolicy::should_elements_inline_type_behavior_be_blocked_by_content_security_policy(realm(), *this, ContentSecurityPolicy::Directives::Directive::InlineType::Script, source_text_utf8) == ContentSecurityPolicy::Directives::Directive::Result::Blocked) {
dbgln("HTMLScriptElement: Refusing to run inline script because it violates the Content Security Policy.");
return;
}
// 20. If el has an event attribute and a for attribute, and el's type is "classic", then:
// 22. If el has an event attribute and a for attribute, and el's type is "classic", then:
if (m_script_type == ScriptType::Classic && has_attribute(HTML::AttributeNames::event) && has_attribute(HTML::AttributeNames::for_)) {
// 1. Let for be the value of el's' for attribute.
auto for_ = get_attribute_value(HTML::AttributeNames::for_);
@ -312,7 +315,7 @@ void HTMLScriptElement::prepare_script()
}
}
// 21. If el has a charset attribute, then let encoding be the result of getting an encoding from the value of the charset attribute.
// 23. If el has a charset attribute, then let encoding be the result of getting an encoding from the value of the charset attribute.
// If el does not have a charset attribute, or if getting an encoding failed, then let encoding be el's node document's the encoding.
Optional<String> encoding;
@ -328,34 +331,34 @@ void HTMLScriptElement::prepare_script()
VERIFY(encoding.has_value());
// 22. Let classic script CORS setting be the current state of el's crossorigin content attribute.
// 24. Let classic script CORS setting be the current state of el's crossorigin content attribute.
auto classic_script_cors_setting = m_crossorigin;
// 23. Let module script credentials mode be the CORS settings attribute credentials mode for el's crossorigin content attribute.
// 25. Let module script credentials mode be the CORS settings attribute credentials mode for el's crossorigin content attribute.
auto module_script_credential_mode = cors_settings_attribute_credentials_mode(m_crossorigin);
// 24. Let cryptographic nonce be el's [[CryptographicNonce]] internal slot's value.
// 26. Let cryptographic nonce be el's [[CryptographicNonce]] internal slot's value.
auto cryptographic_nonce = m_cryptographic_nonce;
// 25. If el has an integrity attribute, then let integrity metadata be that attribute's value.
// 27. If el has an integrity attribute, then let integrity metadata be that attribute's value.
// Otherwise, let integrity metadata be the empty string.
String integrity_metadata;
if (auto maybe_integrity = attribute(HTML::AttributeNames::integrity); maybe_integrity.has_value()) {
integrity_metadata = *maybe_integrity;
}
// 26. Let referrer policy be the current state of el's referrerpolicy content attribute.
// 28. Let referrer policy be the current state of el's referrerpolicy content attribute.
auto referrer_policy = m_referrer_policy;
// 27. Let fetch priority be the current state of el's fetchpriority content attribute.
// 29. Let fetch priority be the current state of el's fetchpriority content attribute.
auto fetch_priority = Fetch::Infrastructure::request_priority_from_string(get_attribute_value(HTML::AttributeNames::fetchpriority)).value_or(Fetch::Infrastructure::Request::Priority::Auto);
// 28. Let parser metadata be "parser-inserted" if el is parser-inserted, and "not-parser-inserted" otherwise.
// 30. Let parser metadata be "parser-inserted" if el is parser-inserted, and "not-parser-inserted" otherwise.
auto parser_metadata = is_parser_inserted()
? Fetch::Infrastructure::Request::ParserMetadata::ParserInserted
: Fetch::Infrastructure::Request::ParserMetadata::NotParserInserted;
// 29. Let options be a script fetch options whose cryptographic nonce is cryptographic nonce,
// 31. Let options be a script fetch options whose cryptographic nonce is cryptographic nonce,
// integrity metadata is integrity metadata, parser metadata is parser metadata,
// credentials mode is module script credentials mode, referrer policy is referrer policy,
// and fetch priority is fetch priority.
@ -368,12 +371,13 @@ void HTMLScriptElement::prepare_script()
.fetch_priority = move(fetch_priority),
};
// 30. Let settings object be el's node document's relevant settings object.
// 32. Let settings object be el's node document's relevant settings object.
auto& settings_object = document().relevant_settings_object();
// 31. If el has a src content attribute, then:
// 33. If el has a src content attribute, then:
if (has_attribute(HTML::AttributeNames::src)) {
// 1. If el's type is "importmap",
// 1. If el's type is "importmap" or "speculationrules", then:
// FIXME: Add "speculationrules" support.
if (m_script_type == ScriptType::ImportMap) {
// then queue an element task on the DOM manipulation task source given el to fire an event named error at el, and return.
queue_an_element_task(HTML::Task::Source::DOMManipulation, [this] {
@ -445,9 +449,9 @@ void HTMLScriptElement::prepare_script()
}
}
// 32. If el does not have a src content attribute:
// 34. If el does not have a src content attribute:
if (!has_attribute(HTML::AttributeNames::src)) {
// Let base URL be el's node document's document base URL.
// 1. Let base URL be el's node document's document base URL.
auto base_url = document().base_url();
// 2. Switch on el's type:
@ -466,11 +470,14 @@ void HTMLScriptElement::prepare_script()
begin_delaying_document_load_event(*m_preparation_time_document);
auto steps = create_on_fetch_script_complete(heap(), [this](auto result) {
// 1. Mark as ready el given result.
if (!result)
mark_as_ready(ResultState::Null {});
else
mark_as_ready(Result(*result));
// 1. Queue an element task on the networking task source given el to perform the following steps:
queue_an_element_task(Task::Source::Networking, [this, result = move(result)] {
// 1. Mark as ready el given result.
if (!result)
mark_as_ready(ResultState::Null {});
else
mark_as_ready(Result(*result));
});
});
// 2. Fetch an inline module script graph, given source text, base URL, settings object, options, and with the following steps given result:
@ -485,9 +492,10 @@ void HTMLScriptElement::prepare_script()
// 2. Mark as ready el given result.
mark_as_ready(Result(move(result)));
}
// FIXME: -> "speculationrules"
}
// 33. If el's type is "classic" and el has a src attribute, or el's type is "module":
// 35. If el's type is "classic" and el has a src attribute, or el's type is "module":
if ((m_script_type == ScriptType::Classic && has_attribute(HTML::AttributeNames::src)) || m_script_type == ScriptType::Module) {
// 1. Assert: el's result is "uninitialized".
// FIXME: I believe this step to be a spec bug, and it should be removed: https://github.com/whatwg/html/issues/8534
@ -561,7 +569,7 @@ void HTMLScriptElement::prepare_script()
}
}
// 34. Otherwise:
// 36. Otherwise:
else {
// 1. Assert: el's result is not "uninitialized".
VERIFY(!m_result.has<ResultState::Uninitialized>());

View file

@ -1379,3 +1379,93 @@ TEST_CASE(account_for_opcode_size_calculating_incoming_jump_edges)
EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
}
}
TEST_CASE(backreference_to_undefined_capture_groups)
{
{
// Test duplicate named groups in alternatives where backreference refers to participating group
Regex<ECMA262> re("(?:(?<x>a)|(?<x>b))\\k<x>"sv);
auto result = re.match("bb"sv);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_byte_string(), "bb"sv);
EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
EXPECT(result.capture_group_matches.first()[0].view.is_null());
EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "b"sv);
}
{
// Test duplicate named groups with quantifier
Regex<ECMA262> re("(?:(?:(?<x>a)|(?<x>b))\\k<x>){2}"sv);
auto result = re.match("aabb"sv);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_byte_string(), "aabb"sv);
EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
EXPECT(result.capture_group_matches.first()[0].view.is_null());
EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "b"sv);
}
{
// Test that first alternative works too
Regex<ECMA262> re("(?:(?<x>a)|(?<x>b))\\k<x>"sv);
auto result = re.match("aa"sv);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
EXPECT(result.capture_group_matches.first()[1].view.is_null());
}
{
// Test numbered backreference to undefined group
Regex<ECMA262> re("(.*?)a(?!(a+)b\\2c)\\2(.*)"sv);
auto result = re.match("baaabaac"sv);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_byte_string(), "baaabaac"sv);
EXPECT_EQ(result.capture_group_matches.first().size(), 3u);
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "ba"sv);
EXPECT(result.capture_group_matches.first()[1].view.is_null());
EXPECT_EQ(result.capture_group_matches.first()[2].view.to_byte_string(), "abaac"sv);
}
{
Regex<ECMA262> re("^(?:(?<a>x)|(?<a>y)|z)\\k<a>$"sv);
// Third alternative matches and backreference is undefined
auto result1 = re.match("z"sv);
EXPECT_EQ(result1.success, true);
EXPECT_EQ(result1.matches.size(), 1u);
EXPECT_EQ(result1.matches.first().view.to_byte_string(), "z"sv);
EXPECT_EQ(result1.capture_group_matches.first().size(), 2u);
EXPECT(result1.capture_group_matches.first()[0].view.is_null());
EXPECT(result1.capture_group_matches.first()[1].view.is_null());
}
{
// Quantified version of the above pattern
Regex<ECMA262> re("^(?:(?<a>x)|(?<a>y)|z){2}\\k<a>$"sv);
auto result1 = re.match("xz"sv);
EXPECT_EQ(result1.success, true);
EXPECT_EQ(result1.matches.size(), 1u);
EXPECT_EQ(result1.matches.first().view.to_byte_string(), "xz"sv);
EXPECT_EQ(result1.capture_group_matches.first().size(), 2u);
EXPECT(result1.capture_group_matches.first()[0].view.is_null());
EXPECT(result1.capture_group_matches.first()[1].view.is_null());
auto result2 = re.match("yz"sv);
EXPECT_EQ(result2.success, true);
EXPECT_EQ(result2.matches.size(), 1u);
EXPECT_EQ(result2.matches.first().view.to_byte_string(), "yz"sv);
EXPECT_EQ(result2.capture_group_matches.first().size(), 2u);
EXPECT(result2.capture_group_matches.first()[0].view.is_null());
EXPECT(result2.capture_group_matches.first()[1].view.is_null());
}
}

View file

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="us-ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<title>CSS Test: Stylesheet encodings: KOI8-R</title>
<link rel="author" title="Ian Hickson" href="mailto:ian@hixie.ch"/>
<link rel="alternate" href="http://www.hixie.ch/tests/adhoc/css/parsing/encoding/007.html" type="text/html"/>
<link rel="help" href="http://www.w3.org/TR/CSS21/syndata.html#charset" />
<link rel="match" href="../../../../../expected/wpt-import/css/CSS2/syntax/../reference/ref-green-background.xht"/>
<meta name="flags" content="http" />
<style type="text/css">
p { background: red; color: yellow; }
</style>
<link rel="stylesheet" href="support/at-charset-077.css"/>
</head>
<body>
<p class="t&#x0418;st">This should have a green background.</p>
</body>
</html>

View file

@ -0,0 +1,2 @@
@charset "koi8-r";
.tést { color: white; background: green; }

View file

@ -0,0 +1,6 @@
Harness status: OK
Found 1 tests
1 Pass
Pass Module scripts with no imports always execute asynchronously

View file

@ -0,0 +1,6 @@
Harness status: OK
Found 1 tests
1 Pass
Pass The character encoding of the page can be set by a meta element with charset attribute.

View file

@ -0,0 +1,27 @@
<!DOCTYPE html>
<html>
<head>
<title>Module scripts with no imports always execute asynchronously</title>
<script src="../../../../../resources/testharness.js"></script>
<script src="../../../../../resources/testharnessreport.js"></script>
<link rel="help" href="https://github.com/whatwg/html/issues/3746">
</head>
<body>
<script>
async_test(t => {
window.results = [];
window.logExecution = msg => window.results.push(msg);
const script = document.createElement('script');
script.type = 'module';
script.textContent = "window.logExecution('module')";
document.body.append(script);
window.logExecution('classic');
window.onload = t.step_func_done(e => {
assert_array_equals(window.results, ['classic', 'module']);
});
});
</script>
</body>
</html>

View file

@ -0,0 +1,4 @@
@charset "utf-8";
.test div.ÜÃÚ {
width: 100px;
}

View file

@ -0,0 +1,37 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="iso-8859-15"> <title>meta charset attribute</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='https://html.spec.whatwg.org/multipage/#the-input-byte-stream'>
<script src="../../../resources/testharness.js"></script>
<script src="../../../resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="support/encodingtests-15.css">
</head>
<body>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<!--Notes:
The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.
The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.
-->
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, "The character encoding of the page can be set by a meta element with charset attribute.");
</script>
<div id='log'></div>
</body>
</html>