mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-19 07:33:20 +00:00
Compare commits
6 commits
9b8f6b8108
...
62c00712fa
Author | SHA1 | Date | |
---|---|---|---|
![]() |
62c00712fa | ||
![]() |
24a7eac4ab | ||
![]() |
e73e0b3c92 | ||
![]() |
4b989b8efd | ||
![]() |
25a47ceb1b | ||
![]() |
c4eef822de |
20 changed files with 702 additions and 144 deletions
|
@ -271,21 +271,23 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
// 22. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
|
||||
MUST(array->create_data_property_or_throw(vm.names.index, Value(match_index)));
|
||||
|
||||
// 24. Let match be the Match { [[StartIndex]]: lastIndex, [[EndIndex]]: e }.
|
||||
// 23. Perform ! CreateDataPropertyOrThrow(A, "input", S).
|
||||
MUST(array->create_data_property_or_throw(vm.names.input, string));
|
||||
|
||||
// 24. Let match be the Match Record { [[StartIndex]]: lastIndex, [[EndIndex]]: e }.
|
||||
auto match_indices = Match::create(match);
|
||||
|
||||
// 25. Let indices be a new empty List.
|
||||
Vector<Optional<Match>> indices;
|
||||
Vector<Utf16String> captured_values;
|
||||
|
||||
// 26. Let groupNames be a new empty List.
|
||||
HashMap<Utf16FlyString, Match> group_names;
|
||||
Vector<Utf16String> group_names;
|
||||
|
||||
// 27. Add match as the last element of indices.
|
||||
// 27. Append match to indices.
|
||||
indices.append(move(match_indices));
|
||||
|
||||
// 28. Let matchedValue be ! GetMatchString(S, match).
|
||||
// 29. Perform ! CreateDataPropertyOrThrow(A, "0", matchedValue).
|
||||
// 28. Let matchedSubstr be GetMatchString(S, match).
|
||||
// 29. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr).
|
||||
MUST(array->create_data_property_or_throw(0, PrimitiveString::create(vm, match.view.u16_view())));
|
||||
|
||||
// 30. If R contains any GroupName, then
|
||||
|
@ -295,11 +297,19 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
// a. Let groups be undefined.
|
||||
// b. Let hasGroups be false.
|
||||
bool has_groups = result.n_named_capture_groups != 0;
|
||||
auto groups_object = has_groups ? Object::create(realm, nullptr) : GC::Ptr<Object> {};
|
||||
auto groups = has_groups ? Object::create(realm, nullptr) : js_undefined();
|
||||
|
||||
// 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
|
||||
// 32. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
|
||||
MUST(array->create_data_property_or_throw(vm.names.groups, groups));
|
||||
|
||||
// 33. Let matchedGroupNames be a new empty List.
|
||||
Vector<Utf16FlyString> matched_group_names;
|
||||
Vector<Utf16String> captured_values;
|
||||
|
||||
// 34. For each integer i such that 1 ≤ i ≤ n, in ascending order, do
|
||||
for (size_t i = 1; i <= result.n_capture_groups; ++i) {
|
||||
// a. Let captureI be ith element of r's captures List.
|
||||
|
||||
// a. Let captureI be ith element of r.[[Captures]].
|
||||
auto& capture = result.capture_group_matches[0][i - 1];
|
||||
|
||||
Value captured_value;
|
||||
|
@ -310,23 +320,21 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
captured_value = js_undefined();
|
||||
// ii. Append undefined to indices.
|
||||
indices.append({});
|
||||
// iii. Append capture to indices.
|
||||
captured_values.append({});
|
||||
}
|
||||
// c. Else,
|
||||
else {
|
||||
// i. Let captureStart be captureI's startIndex.
|
||||
// ii. Let captureEnd be captureI's endIndex.
|
||||
// i. Let captureStart be captureI.[[StartIndex]].
|
||||
// ii. Let captureEnd be captureI.[[EndIndex]].
|
||||
// iii. If fullUnicode is true, then
|
||||
// 1. Set captureStart to ! GetStringIndex(S, Input, captureStart).
|
||||
// 2. Set captureEnd to ! GetStringIndex(S, Input, captureEnd).
|
||||
// iv. Let capture be the Match { [[StartIndex]]: captureStart, [[EndIndex]: captureEnd }.
|
||||
// v. Let capturedValue be ! GetMatchString(S, capture).
|
||||
// 1. Set captureStart to GetStringIndex(S, captureStart).
|
||||
// 2. Set captureEnd to GetStringIndex(S, captureEnd).
|
||||
// iv. Let capture be the Match Record { [[StartIndex]]: captureStart, [[EndIndex]]: captureEnd }.
|
||||
// v. Let capturedValue be GetMatchString(S, capture).
|
||||
auto capture_as_utf16_string = Utf16String::from_utf16(capture.view.u16_view());
|
||||
captured_value = PrimitiveString::create(vm, capture_as_utf16_string);
|
||||
// vi. Append capture to indices.
|
||||
indices.append(Match::create(capture));
|
||||
// vii. Append capturedValue to the end of capturedValues.
|
||||
captured_values.append(capture_as_utf16_string);
|
||||
}
|
||||
|
||||
|
@ -335,22 +343,51 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
|
||||
// e. If the ith capture of R was defined with a GroupName, then
|
||||
if (capture.capture_group_name >= 0) {
|
||||
// i. Let s be the CapturingGroupName of the corresponding RegExpIdentifierName.
|
||||
// i. Let s be the CapturingGroupName of that GroupName.
|
||||
auto group_name = Utf16FlyString::from_utf8(regex.parser_result.bytecode.get_string(capture.capture_group_name));
|
||||
|
||||
// ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
|
||||
MUST(groups_object->create_data_property_or_throw(group_name, captured_value));
|
||||
|
||||
// iii. Append s to groupNames.
|
||||
group_names.set(move(group_name), Match::create(capture));
|
||||
// ii. If matchedGroupNames contains s, then
|
||||
if (matched_group_names.contains_slow(group_name)) {
|
||||
// 1. Assert: capturedValue is undefined.
|
||||
VERIFY(captured_value.is_undefined());
|
||||
// 2. Append undefined to groupNames.
|
||||
group_names.append({});
|
||||
}
|
||||
// iii. Else,
|
||||
else {
|
||||
// 1. If capturedValue is not undefined, append s to matchedGroupNames.
|
||||
if (!captured_value.is_undefined())
|
||||
matched_group_names.append(group_name);
|
||||
// 2. NOTE: If there are multiple groups named s, groups may already have an s property at this point.
|
||||
// However, because groups is an ordinary object whose properties are all writable data properties,
|
||||
// the call to CreateDataPropertyOrThrow is nevertheless guaranteed to succeed.
|
||||
// 3. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
|
||||
MUST(groups.as_object().create_data_property_or_throw(group_name, captured_value));
|
||||
// 4. Append s to groupNames.
|
||||
group_names.append(group_name.to_utf16_string());
|
||||
}
|
||||
}
|
||||
// f. Else,
|
||||
else {
|
||||
// i. Append undefined to groupNames.
|
||||
// See the note in MakeIndicesArray for why this step is skipped.
|
||||
group_names.append({});
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure named groups are enumerated in source order
|
||||
if (has_groups) {
|
||||
auto original_groups = groups;
|
||||
groups = Object::create(realm, nullptr);
|
||||
|
||||
for (auto const& group_name_str : regex.parser_result.capture_groups) {
|
||||
auto group_name = Utf16FlyString::from_utf8(group_name_str);
|
||||
auto value = original_groups.as_object().get_without_side_effects(group_name);
|
||||
MUST(groups.as_object().create_data_property_or_throw(group_name, value));
|
||||
}
|
||||
|
||||
MUST(array->set(vm.names.groups, groups, Object::ShouldThrowExceptions::Yes));
|
||||
}
|
||||
|
||||
// https://github.com/tc39/proposal-regexp-legacy-features#regexpbuiltinexec--r-s-
|
||||
// 5. Let thisRealm be the current Realm Record.
|
||||
auto* this_realm = &realm;
|
||||
|
@ -370,24 +407,39 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
}
|
||||
}
|
||||
|
||||
// 32. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
|
||||
// NOTE: This step must be performed after the above loop in order for groups to be populated.
|
||||
Value groups = has_groups ? groups_object : js_undefined();
|
||||
MUST(array->create_data_property_or_throw(vm.names.groups, groups));
|
||||
|
||||
// 34. If hasIndices is true, then
|
||||
// 35. If hasIndices is true, then
|
||||
if (has_indices) {
|
||||
// a. Let indicesArray be MakeMatchIndicesIndexPairArray(S, indices, groupNames, hasGroups).
|
||||
auto indices_array = make_match_indices_index_pair_array(vm, string->utf16_string_view(), indices, group_names, has_groups);
|
||||
// b. Perform ! CreateDataProperty(A, "indices", indicesArray).
|
||||
MUST(array->create_data_property(vm.names.indices, indices_array));
|
||||
HashMap<Utf16FlyString, Match> indices_group_names;
|
||||
for (size_t i = 0; i < group_names.size(); ++i) {
|
||||
if (!group_names[i].is_empty()) {
|
||||
auto& capture = result.capture_group_matches[0][i];
|
||||
if (!capture.view.is_null()) {
|
||||
indices_group_names.set(Utf16FlyString { group_names[i] }, Match::create(capture));
|
||||
}
|
||||
}
|
||||
}
|
||||
auto indices_array = make_match_indices_index_pair_array(vm, string->utf16_string_view(), indices, indices_group_names, has_groups);
|
||||
|
||||
// Make sure indices.groups includes all named groups in source order
|
||||
if (has_groups) {
|
||||
auto& indices_groups_object = indices_array.as_object().get_without_side_effects(vm.names.groups).as_object();
|
||||
auto ordered_indices_groups_object = Object::create(realm, nullptr);
|
||||
|
||||
for (auto const& group_name_str : regex.parser_result.capture_groups) {
|
||||
auto group_name = Utf16FlyString::from_utf8(group_name_str);
|
||||
auto value = indices_groups_object.get_without_side_effects(group_name);
|
||||
MUST(ordered_indices_groups_object->create_data_property_or_throw(group_name, value));
|
||||
}
|
||||
|
||||
// 23. Perform ! CreateDataPropertyOrThrow(A, "input", S).
|
||||
// NOTE: This step is performed last to allow the string to be moved into the PrimitiveString::create() invocation.
|
||||
MUST(array->create_data_property_or_throw(vm.names.input, string));
|
||||
MUST(indices_array.as_object().set(vm.names.groups, ordered_indices_groups_object, Object::ShouldThrowExceptions::Yes));
|
||||
}
|
||||
|
||||
// 35. Return A.
|
||||
// b. Perform ! CreateDataPropertyOrThrow(A, "indices", indicesArray).
|
||||
MUST(array->create_data_property_or_throw(vm.names.indices, indices_array));
|
||||
}
|
||||
|
||||
// 36. Return A.
|
||||
return array;
|
||||
}
|
||||
|
||||
|
|
|
@ -226,3 +226,105 @@ test("cached UTF-16 code point length", () => {
|
|||
|
||||
expect(match.codePointAt(0)).toBe(0x1f600);
|
||||
});
|
||||
|
||||
test("named groups source order", () => {
|
||||
// Test that named groups appear in source order, not match order
|
||||
let re = /(?<y>a)(?<x>a)|(?<x>b)(?<y>b)/;
|
||||
|
||||
let result1 = re.exec("aa");
|
||||
expect(Object.keys(result1.groups)).toEqual(["y", "x"]);
|
||||
expect(result1.groups.y).toBe("a");
|
||||
expect(result1.groups.x).toBe("a");
|
||||
|
||||
let result2 = re.exec("bb");
|
||||
expect(Object.keys(result2.groups)).toEqual(["y", "x"]);
|
||||
expect(result2.groups.y).toBe("b");
|
||||
expect(result2.groups.x).toBe("b");
|
||||
});
|
||||
|
||||
test("named groups all present in groups object", () => {
|
||||
// Test that all named groups appear in groups object, even unmatched ones
|
||||
let re = /(?<fst>.)|(?<snd>.)/u;
|
||||
|
||||
let result = re.exec("abcd");
|
||||
expect(Object.getOwnPropertyNames(result.groups)).toEqual(["fst", "snd"]);
|
||||
expect(result.groups.fst).toBe("a");
|
||||
expect(result.groups.snd).toBe(undefined);
|
||||
});
|
||||
|
||||
test("named groups with hasIndices flag", () => {
|
||||
// Test that indices.groups also contains all named groups in source order
|
||||
let re = /(?<fst>.)|(?<snd>.)/du;
|
||||
|
||||
let result = re.exec("abcd");
|
||||
expect(Object.getOwnPropertyNames(result.indices.groups)).toEqual(["fst", "snd"]);
|
||||
expect(result.indices.groups.fst).toEqual([0, 1]);
|
||||
expect(result.indices.groups.snd).toBe(undefined);
|
||||
});
|
||||
|
||||
test("complex named groups ordering", () => {
|
||||
// Test multiple groups in different order
|
||||
let re = /(?<third>c)|(?<first>a)|(?<second>b)/;
|
||||
|
||||
let result1 = re.exec("a");
|
||||
expect(Object.keys(result1.groups)).toEqual(["third", "first", "second"]);
|
||||
expect(result1.groups.third).toBe(undefined);
|
||||
expect(result1.groups.first).toBe("a");
|
||||
expect(result1.groups.second).toBe(undefined);
|
||||
|
||||
let result2 = re.exec("b");
|
||||
expect(Object.keys(result2.groups)).toEqual(["third", "first", "second"]);
|
||||
expect(result2.groups.third).toBe(undefined);
|
||||
expect(result2.groups.first).toBe(undefined);
|
||||
expect(result2.groups.second).toBe("b");
|
||||
|
||||
let result3 = re.exec("c");
|
||||
expect(Object.keys(result3.groups)).toEqual(["third", "first", "second"]);
|
||||
expect(result3.groups.third).toBe("c");
|
||||
expect(result3.groups.first).toBe(undefined);
|
||||
expect(result3.groups.second).toBe(undefined);
|
||||
});
|
||||
|
||||
test("forward references to named groups", () => {
|
||||
// Self-reference inside group
|
||||
let result1 = /(?<a>\k<a>\w)../.exec("bab");
|
||||
expect(result1).not.toBe(null);
|
||||
expect(result1[0]).toBe("bab");
|
||||
expect(result1[1]).toBe("b");
|
||||
expect(result1.groups.a).toBe("b");
|
||||
|
||||
// Reference before group definition
|
||||
let result2 = /\k<a>(?<a>b)\w\k<a>/.exec("bab");
|
||||
expect(result2).not.toBe(null);
|
||||
expect(result2[0]).toBe("bab");
|
||||
expect(result2[1]).toBe("b");
|
||||
expect(result2.groups.a).toBe("b");
|
||||
|
||||
let result3 = /(?<b>b)\k<a>(?<a>a)\k<b>/.exec("bab");
|
||||
expect(result3).not.toBe(null);
|
||||
expect(result3[0]).toBe("bab");
|
||||
expect(result3[1]).toBe("b");
|
||||
expect(result3[2]).toBe("a");
|
||||
expect(result3.groups.a).toBe("a");
|
||||
expect(result3.groups.b).toBe("b");
|
||||
|
||||
// Backward reference
|
||||
let result4 = /(?<a>a)(?<b>b)\k<a>/.exec("aba");
|
||||
expect(result4).not.toBe(null);
|
||||
expect(result4[0]).toBe("aba");
|
||||
expect(result4.groups.a).toBe("a");
|
||||
expect(result4.groups.b).toBe("b");
|
||||
|
||||
// Mixed forward/backward with alternation
|
||||
let result5 = /(?<a>a)(?<b>b)\k<a>|(?<c>c)/.exec("aba");
|
||||
expect(result5).not.toBe(null);
|
||||
expect(result5.groups.a).toBe("a");
|
||||
expect(result5.groups.b).toBe("b");
|
||||
expect(result5.groups.c).toBe(undefined);
|
||||
});
|
||||
|
||||
test("invalid named group references", () => {
|
||||
expect(() => {
|
||||
new RegExp("(?<a>x)\\k<nonexistent>");
|
||||
}).toThrow();
|
||||
});
|
||||
|
|
|
@ -609,12 +609,21 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
}
|
||||
case CharacterCompareType::Reference: {
|
||||
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
|
||||
if (input.match_index >= state.capture_group_matches_size())
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
if (input.match_index >= state.capture_group_matches_size()) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto groups = state.capture_group_matches(input.match_index);
|
||||
if (groups.size() <= reference_number)
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
|
||||
if (groups.size() <= reference_number) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto str = groups.at(reference_number).view;
|
||||
|
||||
|
@ -628,6 +637,59 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
}
|
||||
break;
|
||||
}
|
||||
case CharacterCompareType::NamedReference: {
|
||||
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
|
||||
|
||||
if (input.match_index >= state.capture_group_matches_size()) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto groups = state.capture_group_matches(input.match_index);
|
||||
|
||||
if (groups.size() <= reference_number) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
RegexStringView str {};
|
||||
|
||||
auto reference_name_index = m_bytecode->get_group_name_index(reference_number);
|
||||
|
||||
if (reference_name_index.has_value()) {
|
||||
auto target_name_string = m_bytecode->get_string(reference_name_index.value());
|
||||
|
||||
for (size_t i = 0; i < groups.size(); ++i) {
|
||||
if (groups[i].view.is_null())
|
||||
continue;
|
||||
|
||||
auto group_name_index = m_bytecode->get_group_name_index(i);
|
||||
|
||||
if (group_name_index.has_value()) {
|
||||
auto group_name_string = m_bytecode->get_string(group_name_index.value());
|
||||
|
||||
if (group_name_string == target_name_string) {
|
||||
str = groups[i].view;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (input.view.length() < state.string_position + str.length()) {
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
}
|
||||
|
||||
if (compare_string(input, state, str, had_zero_length_match)) {
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CharacterCompareType::Property: {
|
||||
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
|
||||
compare_property(input, state, property, current_inversion_state(), inverse_matched);
|
||||
|
@ -946,6 +1008,9 @@ Vector<CompareTypeAndValuePair> OpCode_Compare::flat_compares() const
|
|||
} else if (compare_type == CharacterCompareType::Reference) {
|
||||
auto ref = m_bytecode->at(offset++);
|
||||
result.append({ compare_type, ref });
|
||||
} else if (compare_type == CharacterCompareType::NamedReference) {
|
||||
auto ref = m_bytecode->at(offset++);
|
||||
result.append({ compare_type, ref });
|
||||
} else if (compare_type == CharacterCompareType::String) {
|
||||
auto& length = m_bytecode->at(offset++);
|
||||
for (size_t k = 0; k < length; ++k)
|
||||
|
@ -1028,6 +1093,24 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
|
|||
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
|
||||
}
|
||||
}
|
||||
} else if (compare_type == CharacterCompareType::NamedReference) {
|
||||
auto ref = m_bytecode->at(offset++);
|
||||
result.empend(ByteString::formatted(" named_number={}", ref));
|
||||
if (input.has_value()) {
|
||||
if (state().capture_group_matches_size() > input->match_index) {
|
||||
auto match = state().capture_group_matches(input->match_index);
|
||||
if (match.size() > ref) {
|
||||
auto& group = match[ref];
|
||||
result.empend(ByteString::formatted(" left={}", group.left_column));
|
||||
result.empend(ByteString::formatted(" right={}", group.left_column + group.view.length_in_code_units()));
|
||||
result.empend(ByteString::formatted(" contents='{}'", group.view));
|
||||
} else {
|
||||
result.empend(ByteString::formatted(" (invalid ref {}, max={})", ref, match.size() - 1));
|
||||
}
|
||||
} else {
|
||||
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
|
||||
}
|
||||
}
|
||||
} else if (compare_type == CharacterCompareType::String) {
|
||||
auto& length = m_bytecode->at(offset++);
|
||||
StringBuilder str_builder;
|
||||
|
|
|
@ -69,6 +69,7 @@ enum class OpCodeId : ByteCodeValueType {
|
|||
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
|
||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
|
||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
|
||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \
|
||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
|
||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \
|
||||
__ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
|
||||
|
@ -261,6 +262,11 @@ public:
|
|||
FlyString get_string(size_t index) const { return m_string_table.get(index); }
|
||||
auto const& string_table() const { return m_string_table; }
|
||||
|
||||
Optional<size_t> get_group_name_index(size_t group_index) const
|
||||
{
|
||||
return m_group_name_mappings.get(group_index);
|
||||
}
|
||||
|
||||
void last_chunk() const = delete;
|
||||
void first_chunk() const = delete;
|
||||
|
||||
|
@ -279,6 +285,10 @@ public:
|
|||
m_string_table.m_table.set(entry.key, entry.value);
|
||||
}
|
||||
m_string_table.m_inverse_table.update(other.m_string_table.m_inverse_table);
|
||||
|
||||
for (auto const& mapping : other.m_group_name_mappings) {
|
||||
m_group_name_mappings.set(mapping.key, mapping.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -326,8 +336,11 @@ public:
|
|||
void insert_bytecode_group_capture_right(size_t capture_groups_count, FlyString name)
|
||||
{
|
||||
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
|
||||
empend(m_string_table.set(move(name)));
|
||||
auto name_string_index = m_string_table.set(move(name));
|
||||
empend(name_string_index);
|
||||
empend(capture_groups_count);
|
||||
|
||||
m_group_name_mappings.set(capture_groups_count - 1, name_string_index);
|
||||
}
|
||||
|
||||
enum class LookAroundType {
|
||||
|
@ -618,6 +631,7 @@ private:
|
|||
static bool s_opcodes_initialized;
|
||||
static size_t s_next_checkpoint_serial_id;
|
||||
StringTable m_string_table;
|
||||
HashMap<size_t, size_t> m_group_name_mappings;
|
||||
};
|
||||
|
||||
#define ENUMERATE_EXECUTION_RESULTS \
|
||||
|
|
|
@ -131,6 +131,7 @@ static bool interpret_compares(Vector<CompareTypeAndValuePair> const& lhs, Stati
|
|||
// We've transformed this into a series of ranges in flat_compares(), so bail out if we see it.
|
||||
return false;
|
||||
case CharacterCompareType::Reference:
|
||||
case CharacterCompareType::NamedReference:
|
||||
// We've handled this before coming here.
|
||||
break;
|
||||
case CharacterCompareType::Property:
|
||||
|
@ -512,6 +513,7 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
|
|||
// We've transformed this into a series of ranges in flat_compares(), so bail out if we see it.
|
||||
return true;
|
||||
case CharacterCompareType::Reference:
|
||||
case CharacterCompareType::NamedReference:
|
||||
// We've handled this before coming here.
|
||||
break;
|
||||
case CharacterCompareType::Property:
|
||||
|
@ -755,7 +757,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
|
|||
break;
|
||||
|
||||
if (any_of(compares, [&](auto& compare) {
|
||||
return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference;
|
||||
return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference || compare.type == CharacterCompareType::NamedReference;
|
||||
}))
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
|
||||
|
@ -1835,6 +1837,7 @@ static LookupTableInsertionOutcome insert_into_lookup_table(RedBlackTree<ByteCod
|
|||
case CharacterCompareType::And:
|
||||
return LookupTableInsertionOutcome::FlushOnInsertion;
|
||||
case CharacterCompareType::Reference:
|
||||
case CharacterCompareType::NamedReference:
|
||||
case CharacterCompareType::Property:
|
||||
case CharacterCompareType::GeneralCategory:
|
||||
case CharacterCompareType::Script:
|
||||
|
|
|
@ -173,6 +173,7 @@ ALWAYS_INLINE void Parser::reset()
|
|||
m_parser_state.capture_groups_count = 0;
|
||||
m_parser_state.named_capture_groups_count = 0;
|
||||
m_parser_state.named_capture_groups.clear();
|
||||
m_parser_state.unresolved_named_references.clear();
|
||||
}
|
||||
|
||||
Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
||||
|
@ -182,10 +183,15 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
|||
reset();
|
||||
if (regex_options.has_value())
|
||||
m_parser_state.regex_options = regex_options.value();
|
||||
if (parse_internal(m_parser_state.bytecode, m_parser_state.match_length_minimum))
|
||||
if (parse_internal(m_parser_state.bytecode, m_parser_state.match_length_minimum)) {
|
||||
consume(TokenType::Eof, Error::InvalidPattern);
|
||||
else
|
||||
if (!resolve_forward_named_references())
|
||||
set_error(Error::InvalidNameForCaptureGroup);
|
||||
} else {
|
||||
set_error(Error::InvalidPattern);
|
||||
}
|
||||
|
||||
auto capture_groups = m_parser_state.named_capture_groups.keys();
|
||||
|
||||
dbgln_if(REGEX_DEBUG, "[PARSER] Produced bytecode with {} entries (opcodes + arguments)", m_parser_state.bytecode.size());
|
||||
return {
|
||||
|
@ -195,7 +201,7 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
|||
move(m_parser_state.match_length_minimum),
|
||||
move(m_parser_state.error),
|
||||
move(m_parser_state.error_token),
|
||||
m_parser_state.named_capture_groups.keys(),
|
||||
move(capture_groups),
|
||||
m_parser_state.regex_options,
|
||||
};
|
||||
}
|
||||
|
@ -496,7 +502,6 @@ bool PosixBasicParser::parse_nonduplicating_re(ByteCode& bytecode, size_t& match
|
|||
if (try_skip({ backref_name, 2 })) {
|
||||
if (!m_capture_group_seen[i - 1])
|
||||
return set_error(Error::InvalidNumber);
|
||||
match_length_minimum += m_capture_group_minimum_lengths[i - 1];
|
||||
bytecode.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)i } });
|
||||
return true;
|
||||
}
|
||||
|
@ -1640,24 +1645,32 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
|||
}
|
||||
|
||||
auto it = m_parser_state.named_capture_groups.find(name);
|
||||
if (it == m_parser_state.named_capture_groups.end()) {
|
||||
set_error(Error::InvalidNameForCaptureGroup);
|
||||
return false;
|
||||
}
|
||||
if (it != m_parser_state.named_capture_groups.end()) {
|
||||
|
||||
// Use the first occurrence of the named group for the backreference
|
||||
// This follows ECMAScript behavior where \k<name> refers to the first
|
||||
// group with that name in left-to-right order, regardless of alternative
|
||||
auto group_index = it->value.first().group_index;
|
||||
|
||||
auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(group_index);
|
||||
if (!maybe_length.has_value()) {
|
||||
set_error(Error::InvalidNameForCaptureGroup);
|
||||
return false;
|
||||
}
|
||||
if (maybe_length.has_value()) {
|
||||
// Backward reference
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::NamedReference, static_cast<ByteCodeValueType>(group_index) } });
|
||||
} else {
|
||||
// Self-reference or forward reference
|
||||
auto placeholder_index = 0;
|
||||
auto bytecode_offset = stack.size();
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::NamedReference, static_cast<ByteCodeValueType>(placeholder_index) } });
|
||||
|
||||
match_length_minimum += maybe_length.value();
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)group_index } });
|
||||
m_parser_state.unresolved_named_references.append({ name, bytecode_offset + 1 });
|
||||
}
|
||||
} else {
|
||||
// Forward reference
|
||||
auto placeholder_index = 0;
|
||||
auto bytecode_offset = stack.size();
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::NamedReference, static_cast<ByteCodeValueType>(placeholder_index) } });
|
||||
|
||||
m_parser_state.unresolved_named_references.append({ name, bytecode_offset + 1 });
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2706,7 +2719,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
|
|||
return false;
|
||||
}
|
||||
|
||||
m_parser_state.named_capture_groups.ensure(name).append({ group_index, m_current_alternative_id });
|
||||
auto& group_vector = m_parser_state.named_capture_groups.ensure(name);
|
||||
group_vector.append({ group_index, m_current_alternative_id });
|
||||
|
||||
ByteCode capture_group_bytecode;
|
||||
size_t length = 0;
|
||||
|
@ -2816,4 +2830,20 @@ size_t ECMA262Parser::ensure_total_number_of_capturing_parenthesis()
|
|||
return count;
|
||||
}
|
||||
|
||||
bool Parser::resolve_forward_named_references()
|
||||
{
|
||||
for (auto const& unresolved_ref : m_parser_state.unresolved_named_references) {
|
||||
auto it = m_parser_state.named_capture_groups.find(unresolved_ref.name);
|
||||
if (it == m_parser_state.named_capture_groups.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto group_index = it->value.first().group_index;
|
||||
|
||||
m_parser_state.bytecode.at(unresolved_ref.bytecode_offset) = (ByteCodeValueType)group_index;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -90,6 +90,7 @@ public:
|
|||
|
||||
protected:
|
||||
virtual bool parse_internal(ByteCode&, size_t& match_length_minimum) = 0;
|
||||
bool resolve_forward_named_references();
|
||||
|
||||
ALWAYS_INLINE bool match(TokenType type) const;
|
||||
ALWAYS_INLINE bool match(char ch) const;
|
||||
|
@ -120,7 +121,13 @@ protected:
|
|||
size_t repetition_mark_count { 0 };
|
||||
AllOptions regex_options;
|
||||
HashMap<size_t, size_t> capture_group_minimum_lengths;
|
||||
HashMap<FlyString, Vector<NamedCaptureGroup>> named_capture_groups;
|
||||
OrderedHashMap<FlyString, Vector<NamedCaptureGroup>> named_capture_groups;
|
||||
|
||||
struct UnresolvedNamedReference {
|
||||
FlyString name;
|
||||
size_t bytecode_offset;
|
||||
};
|
||||
Vector<UnresolvedNamedReference> unresolved_named_references;
|
||||
|
||||
explicit ParserState(Lexer& lexer)
|
||||
: lexer(lexer)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* Copyright (c) 2021, the SerenityOS developers.
|
||||
* Copyright (c) 2021-2024, Sam Atkins <sam@ladybird.org>
|
||||
* Copyright (c) 2022-2024, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2025, Lorenz Ackermann <me@lorenzackermann.xyz>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -146,18 +147,17 @@ void CSSImportRule::fetch()
|
|||
|
||||
// 4. Let importedStylesheet be the result of parsing byteStream given parsedUrl.
|
||||
// FIXME: Tidy up our parsing API. For now, do the decoding here.
|
||||
// FIXME: Get the encoding from the response somehow.
|
||||
auto encoding = "utf-8"sv;
|
||||
auto maybe_decoder = TextCodec::decoder_for(encoding);
|
||||
if (!maybe_decoder.has_value()) {
|
||||
dbgln_if(CSS_LOADER_DEBUG, "CSSImportRule: Failed to decode CSS file: {} Unsupported encoding: {}", parsed_url, encoding);
|
||||
return;
|
||||
Optional<String> mime_type_charset;
|
||||
if (auto extracted_mime_type = response->header_list()->extract_mime_type(); extracted_mime_type.has_value()) {
|
||||
if (auto charset = extracted_mime_type->parameters().get("charset"sv); charset.has_value())
|
||||
mime_type_charset = charset.value();
|
||||
}
|
||||
auto& decoder = maybe_decoder.release_value();
|
||||
|
||||
auto decoded_or_error = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(decoder, *byte_stream);
|
||||
// The environment encoding of an imported style sheet is the encoding of the style sheet that imported it. [css-syntax-3]
|
||||
// FIXME: Save encoding on Stylesheet to get it here
|
||||
Optional<StringView> environment_encoding;
|
||||
auto decoded_or_error = css_decode_bytes(environment_encoding, mime_type_charset, *byte_stream);
|
||||
if (decoded_or_error.is_error()) {
|
||||
dbgln_if(CSS_LOADER_DEBUG, "CSSImportRule: Failed to decode CSS file: {} Encoding was: {}", parsed_url, encoding);
|
||||
dbgln_if(CSS_LOADER_DEBUG, "CSSImportRule: Failed to decode CSS file: {}", parsed_url);
|
||||
return;
|
||||
}
|
||||
auto decoded = decoded_or_error.release_value();
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
* Copyright (c) 2021-2024, Sam Atkins <atkinssj@serenityos.org>
|
||||
* Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
|
||||
* Copyright (c) 2022, MacDue <macdue@dueutil.tech>
|
||||
* Copyright (c) 2025, Lorenz Ackermann <me@lorenzackermann.xyz>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/Bindings/MainThreadVM.h>
|
||||
#include <LibWeb/Bindings/PrincipalHostDefined.h>
|
||||
#include <LibWeb/CSS/CSSMediaRule.h>
|
||||
|
@ -138,4 +140,75 @@ Vector<CSS::Parser::ComponentValue> parse_component_values_list(CSS::Parser::Par
|
|||
return CSS::Parser::Parser::create(parsing_params, string).parse_as_list_of_component_values();
|
||||
}
|
||||
|
||||
// https://drafts.csswg.org/css-syntax/#css-decode-bytes
|
||||
ErrorOr<String> css_decode_bytes(Optional<StringView> const& environment_encoding, Optional<String> mime_type_charset, ByteBuffer const& encoded_string)
|
||||
{
|
||||
// https://drafts.csswg.org/css-syntax/#determine-the-fallback-encoding
|
||||
auto determine_the_fallback_encoding = [&mime_type_charset, &environment_encoding, &encoded_string]() -> StringView {
|
||||
// 1. If HTTP or equivalent protocol provides an encoding label (e.g. via the charset parameter of the Content-Type header) for the stylesheet,
|
||||
// get an encoding from encoding label. If that does not return failure, return it.
|
||||
if (mime_type_charset.has_value()) {
|
||||
if (auto encoding = TextCodec::get_standardized_encoding(mime_type_charset.value()); encoding.has_value())
|
||||
return encoding.value();
|
||||
}
|
||||
// 2. Otherwise, check stylesheet’s byte stream. If the first 1024 bytes of the stream begin with the hex sequence
|
||||
// 40 63 68 61 72 73 65 74 20 22 XX* 22 3B
|
||||
// where each XX byte is a value between 0x16 and 0x21 inclusive or a value between 0x23 and 0x7F inclusive,
|
||||
// then get an encoding from a string formed out of the sequence of XX bytes, interpreted as ASCII.
|
||||
auto check_stylesheets_byte_stream = [&encoded_string]() -> Optional<StringView> {
|
||||
size_t scan_length = min(encoded_string.size(), 1024);
|
||||
auto pattern_start = "@charset \""sv;
|
||||
auto pattern_end = "\";"sv;
|
||||
|
||||
if (scan_length < pattern_start.length())
|
||||
return {};
|
||||
|
||||
StringView buffer_view = encoded_string.bytes().slice(0, scan_length);
|
||||
if (!buffer_view.starts_with(pattern_start))
|
||||
return {};
|
||||
|
||||
auto encoding_start = pattern_start.length();
|
||||
auto end_index = buffer_view.find(pattern_end, encoding_start);
|
||||
if (!end_index.has_value())
|
||||
return {};
|
||||
|
||||
size_t encoding_length = end_index.value() - encoding_start;
|
||||
auto encoding_view = buffer_view.substring_view(encoding_start, encoding_length);
|
||||
|
||||
for (char c : encoding_view) {
|
||||
u8 byte = static_cast<u8>(c);
|
||||
if ((byte < 0x01 || byte > 0x21) && (byte < 0x23 || byte > 0x7F)) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return TextCodec::get_standardized_encoding(encoding_view);
|
||||
};
|
||||
// If the return value was utf-16be or utf-16le, return utf-8; if it was anything else except failure, return it.
|
||||
auto byte_stream_value = check_stylesheets_byte_stream();
|
||||
if (byte_stream_value.has_value() && (byte_stream_value == "UTF-16BE"sv || byte_stream_value == "UTF-16LE"))
|
||||
return "utf-8"sv;
|
||||
if (byte_stream_value.has_value())
|
||||
return byte_stream_value.value();
|
||||
|
||||
// 3. Otherwise, if an environment encoding is provided by the referring document, return it.
|
||||
if (environment_encoding.has_value())
|
||||
return environment_encoding.value();
|
||||
|
||||
// 4. Otherwise, return utf-8.
|
||||
return "utf-8"sv;
|
||||
};
|
||||
|
||||
// 1. Determine the fallback encoding of stylesheet, and let fallback be the result.
|
||||
auto fallback = determine_the_fallback_encoding();
|
||||
auto decoder = TextCodec::decoder_for(fallback);
|
||||
if (!decoder.has_value()) {
|
||||
// If we don't support the encoding yet, let's error out instead of trying to decode it as something it's most likely not.
|
||||
dbgln("FIXME: Style sheet encoding '{}' is not supported yet", fallback);
|
||||
return Error::from_string_literal("No Decoder found");
|
||||
}
|
||||
// 2. Decode stylesheet’s stream of bytes with fallback encoding fallback, and return the result.
|
||||
return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, encoded_string);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -602,5 +602,6 @@ Vector<NonnullRefPtr<CSS::MediaQuery>> parse_media_query_list(CSS::Parser::Parsi
|
|||
RefPtr<CSS::Supports> parse_css_supports(CSS::Parser::ParsingParams const&, StringView);
|
||||
Vector<CSS::Parser::ComponentValue> parse_component_values_list(CSS::Parser::ParsingParams const&, StringView);
|
||||
GC::Ref<JS::Realm> internal_css_realm();
|
||||
ErrorOr<String> css_decode_bytes(Optional<StringView> const& environment_encoding, Optional<String> mime_type_charset, ByteBuffer const& encoded_string);
|
||||
|
||||
}
|
||||
|
|
|
@ -422,10 +422,13 @@ void HTMLLinkElement::process_stylesheet_resource(bool success, Fetch::Infrastru
|
|||
{
|
||||
// 1. If the resource's Content-Type metadata is not text/css, then set success to false.
|
||||
auto mime_type_string = m_mime_type;
|
||||
if (!mime_type_string.has_value()) {
|
||||
Optional<String> mime_type_charset;
|
||||
auto extracted_mime_type = response.header_list()->extract_mime_type();
|
||||
if (extracted_mime_type.has_value())
|
||||
if (extracted_mime_type.has_value()) {
|
||||
if (!mime_type_string.has_value())
|
||||
mime_type_string = extracted_mime_type->essence();
|
||||
if (auto charset = extracted_mime_type->parameters().get("charset"sv); charset.has_value())
|
||||
mime_type_charset = charset.value();
|
||||
}
|
||||
|
||||
if (mime_type_string.has_value() && mime_type_string != "text/css"sv) {
|
||||
|
@ -469,25 +472,17 @@ void HTMLLinkElement::process_stylesheet_resource(bool success, Fetch::Infrastru
|
|||
// The CSS environment encoding is the result of running the following steps: [CSSSYNTAX]
|
||||
// 1. If the element has a charset attribute, get an encoding from that attribute's value. If that succeeds, return the resulting encoding. [ENCODING]
|
||||
// 2. Otherwise, return the document's character encoding. [DOM]
|
||||
Optional<StringView> environment_encoding;
|
||||
if (auto charset = attribute(HTML::AttributeNames::charset); charset.has_value()) {
|
||||
if (auto environment_encoding = TextCodec::get_standardized_encoding(charset.release_value()); environment_encoding.has_value())
|
||||
environment_encoding = environment_encoding.value();
|
||||
}
|
||||
if (!environment_encoding.has_value() && document().encoding().has_value())
|
||||
environment_encoding = document().encoding().value();
|
||||
|
||||
Optional<String> encoding;
|
||||
if (auto charset = attribute(HTML::AttributeNames::charset); charset.has_value())
|
||||
encoding = charset.release_value();
|
||||
|
||||
if (!encoding.has_value())
|
||||
encoding = document().encoding_or_default();
|
||||
|
||||
auto decoder = TextCodec::decoder_for(*encoding);
|
||||
|
||||
if (!decoder.has_value()) {
|
||||
// If we don't support the encoding yet, let's error out instead of trying to decode it as something it's most likely not.
|
||||
dbgln("FIXME: Style sheet encoding '{}' is not supported yet", encoding);
|
||||
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::error));
|
||||
} else {
|
||||
auto const& encoded_string = body_bytes.get<ByteBuffer>();
|
||||
auto maybe_decoded_string = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, encoded_string);
|
||||
auto maybe_decoded_string = css_decode_bytes(environment_encoding, mime_type_charset, body_bytes.get<ByteBuffer>());
|
||||
if (maybe_decoded_string.is_error()) {
|
||||
dbgln("Style sheet {} claimed to be '{}' but decoding failed", response.url().value_or(URL::URL()), encoding);
|
||||
dbgln("Failed to decode CSS file: {}", response.url().value_or(URL::URL()));
|
||||
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::error));
|
||||
} else {
|
||||
VERIFY(!response.url_list().is_empty());
|
||||
|
@ -507,7 +502,6 @@ void HTMLLinkElement::process_stylesheet_resource(bool success, Fetch::Infrastru
|
|||
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::load));
|
||||
}
|
||||
}
|
||||
}
|
||||
// 5. Otherwise, fire an event named error at el.
|
||||
else {
|
||||
dispatch_event(*DOM::Event::create(realm(), HTML::EventNames::error));
|
||||
|
|
|
@ -242,51 +242,54 @@ void HTMLScriptElement::prepare_script()
|
|||
// then set el's type to "importmap".
|
||||
m_script_type = ScriptType::ImportMap;
|
||||
}
|
||||
// 12. Otherwise, return. (No script is executed, and el's type is left as null.)
|
||||
// FIXME: 12. Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "speculationrules", then set el's type to "speculationrules".
|
||||
// 13. Otherwise, return. (No script is executed, and el's type is left as null.)
|
||||
else {
|
||||
VERIFY(m_script_type == ScriptType::Null);
|
||||
return;
|
||||
}
|
||||
|
||||
// 13. If parser document is non-null, then set el's parser document back to parser document and set el's force async to false.
|
||||
// 14. If parser document is non-null, then set el's parser document back to parser document and set el's force async to false.
|
||||
if (parser_document) {
|
||||
m_parser_document = parser_document;
|
||||
m_force_async = false;
|
||||
}
|
||||
|
||||
// 14. Set el's already started to true.
|
||||
// 15. Set el's already started to true.
|
||||
m_already_started = true;
|
||||
|
||||
// 15. Set el's preparation-time document to its node document.
|
||||
// 16. Set el's preparation-time document to its node document.
|
||||
m_preparation_time_document = &document();
|
||||
|
||||
// 16. If parser document is non-null, and parser document is not equal to el's preparation-time document, then return.
|
||||
// 17. If parser document is non-null, and parser document is not equal to el's preparation-time document, then return.
|
||||
if (parser_document != nullptr && parser_document != m_preparation_time_document) {
|
||||
dbgln("HTMLScriptElement: Refusing to run script because the parser document is not the same as the preparation time document.");
|
||||
return;
|
||||
}
|
||||
|
||||
// 17. If scripting is disabled for el, then return.
|
||||
// 18. If scripting is disabled for el, then return.
|
||||
if (is_scripting_disabled()) {
|
||||
dbgln("HTMLScriptElement: Refusing to run script because scripting is disabled.");
|
||||
return;
|
||||
}
|
||||
|
||||
// 18. If el has a nomodule content attribute and its type is "classic", then return.
|
||||
// 19. If el has a nomodule content attribute and its type is "classic", then return.
|
||||
if (m_script_type == ScriptType::Classic && has_attribute(HTML::AttributeNames::nomodule)) {
|
||||
dbgln("HTMLScriptElement: Refusing to run classic script because it has the nomodule attribute.");
|
||||
return;
|
||||
}
|
||||
|
||||
// 19. If el does not have a src content attribute, and the Should element's inline behavior be blocked by Content Security Policy?
|
||||
// algorithm returns "Blocked" when given el, "script", and source text, then return. [CSP]
|
||||
// FIXME: 20. Let cspType be "script speculationrules" if el's type is "speculationrules"; otherwise, "script".
|
||||
|
||||
// 21. If el does not have a src content attribute, and the Should element's inline behavior be blocked by Content
|
||||
// Security Policy? algorithm returns "Blocked" when given el, cspType, and source text, then return [CSP]
|
||||
if (!has_attribute(AttributeNames::src)
|
||||
&& ContentSecurityPolicy::should_elements_inline_type_behavior_be_blocked_by_content_security_policy(realm(), *this, ContentSecurityPolicy::Directives::Directive::InlineType::Script, source_text_utf8) == ContentSecurityPolicy::Directives::Directive::Result::Blocked) {
|
||||
dbgln("HTMLScriptElement: Refusing to run inline script because it violates the Content Security Policy.");
|
||||
return;
|
||||
}
|
||||
|
||||
// 20. If el has an event attribute and a for attribute, and el's type is "classic", then:
|
||||
// 22. If el has an event attribute and a for attribute, and el's type is "classic", then:
|
||||
if (m_script_type == ScriptType::Classic && has_attribute(HTML::AttributeNames::event) && has_attribute(HTML::AttributeNames::for_)) {
|
||||
// 1. Let for be the value of el's' for attribute.
|
||||
auto for_ = get_attribute_value(HTML::AttributeNames::for_);
|
||||
|
@ -312,7 +315,7 @@ void HTMLScriptElement::prepare_script()
|
|||
}
|
||||
}
|
||||
|
||||
// 21. If el has a charset attribute, then let encoding be the result of getting an encoding from the value of the charset attribute.
|
||||
// 23. If el has a charset attribute, then let encoding be the result of getting an encoding from the value of the charset attribute.
|
||||
// If el does not have a charset attribute, or if getting an encoding failed, then let encoding be el's node document's the encoding.
|
||||
Optional<String> encoding;
|
||||
|
||||
|
@ -328,34 +331,34 @@ void HTMLScriptElement::prepare_script()
|
|||
|
||||
VERIFY(encoding.has_value());
|
||||
|
||||
// 22. Let classic script CORS setting be the current state of el's crossorigin content attribute.
|
||||
// 24. Let classic script CORS setting be the current state of el's crossorigin content attribute.
|
||||
auto classic_script_cors_setting = m_crossorigin;
|
||||
|
||||
// 23. Let module script credentials mode be the CORS settings attribute credentials mode for el's crossorigin content attribute.
|
||||
// 25. Let module script credentials mode be the CORS settings attribute credentials mode for el's crossorigin content attribute.
|
||||
auto module_script_credential_mode = cors_settings_attribute_credentials_mode(m_crossorigin);
|
||||
|
||||
// 24. Let cryptographic nonce be el's [[CryptographicNonce]] internal slot's value.
|
||||
// 26. Let cryptographic nonce be el's [[CryptographicNonce]] internal slot's value.
|
||||
auto cryptographic_nonce = m_cryptographic_nonce;
|
||||
|
||||
// 25. If el has an integrity attribute, then let integrity metadata be that attribute's value.
|
||||
// 27. If el has an integrity attribute, then let integrity metadata be that attribute's value.
|
||||
// Otherwise, let integrity metadata be the empty string.
|
||||
String integrity_metadata;
|
||||
if (auto maybe_integrity = attribute(HTML::AttributeNames::integrity); maybe_integrity.has_value()) {
|
||||
integrity_metadata = *maybe_integrity;
|
||||
}
|
||||
|
||||
// 26. Let referrer policy be the current state of el's referrerpolicy content attribute.
|
||||
// 28. Let referrer policy be the current state of el's referrerpolicy content attribute.
|
||||
auto referrer_policy = m_referrer_policy;
|
||||
|
||||
// 27. Let fetch priority be the current state of el's fetchpriority content attribute.
|
||||
// 29. Let fetch priority be the current state of el's fetchpriority content attribute.
|
||||
auto fetch_priority = Fetch::Infrastructure::request_priority_from_string(get_attribute_value(HTML::AttributeNames::fetchpriority)).value_or(Fetch::Infrastructure::Request::Priority::Auto);
|
||||
|
||||
// 28. Let parser metadata be "parser-inserted" if el is parser-inserted, and "not-parser-inserted" otherwise.
|
||||
// 30. Let parser metadata be "parser-inserted" if el is parser-inserted, and "not-parser-inserted" otherwise.
|
||||
auto parser_metadata = is_parser_inserted()
|
||||
? Fetch::Infrastructure::Request::ParserMetadata::ParserInserted
|
||||
: Fetch::Infrastructure::Request::ParserMetadata::NotParserInserted;
|
||||
|
||||
// 29. Let options be a script fetch options whose cryptographic nonce is cryptographic nonce,
|
||||
// 31. Let options be a script fetch options whose cryptographic nonce is cryptographic nonce,
|
||||
// integrity metadata is integrity metadata, parser metadata is parser metadata,
|
||||
// credentials mode is module script credentials mode, referrer policy is referrer policy,
|
||||
// and fetch priority is fetch priority.
|
||||
|
@ -368,12 +371,13 @@ void HTMLScriptElement::prepare_script()
|
|||
.fetch_priority = move(fetch_priority),
|
||||
};
|
||||
|
||||
// 30. Let settings object be el's node document's relevant settings object.
|
||||
// 32. Let settings object be el's node document's relevant settings object.
|
||||
auto& settings_object = document().relevant_settings_object();
|
||||
|
||||
// 31. If el has a src content attribute, then:
|
||||
// 33. If el has a src content attribute, then:
|
||||
if (has_attribute(HTML::AttributeNames::src)) {
|
||||
// 1. If el's type is "importmap",
|
||||
// 1. If el's type is "importmap" or "speculationrules", then:
|
||||
// FIXME: Add "speculationrules" support.
|
||||
if (m_script_type == ScriptType::ImportMap) {
|
||||
// then queue an element task on the DOM manipulation task source given el to fire an event named error at el, and return.
|
||||
queue_an_element_task(HTML::Task::Source::DOMManipulation, [this] {
|
||||
|
@ -445,9 +449,9 @@ void HTMLScriptElement::prepare_script()
|
|||
}
|
||||
}
|
||||
|
||||
// 32. If el does not have a src content attribute:
|
||||
// 34. If el does not have a src content attribute:
|
||||
if (!has_attribute(HTML::AttributeNames::src)) {
|
||||
// Let base URL be el's node document's document base URL.
|
||||
// 1. Let base URL be el's node document's document base URL.
|
||||
auto base_url = document().base_url();
|
||||
|
||||
// 2. Switch on el's type:
|
||||
|
@ -466,12 +470,15 @@ void HTMLScriptElement::prepare_script()
|
|||
begin_delaying_document_load_event(*m_preparation_time_document);
|
||||
|
||||
auto steps = create_on_fetch_script_complete(heap(), [this](auto result) {
|
||||
// 1. Queue an element task on the networking task source given el to perform the following steps:
|
||||
queue_an_element_task(Task::Source::Networking, [this, result = move(result)] {
|
||||
// 1. Mark as ready el given result.
|
||||
if (!result)
|
||||
mark_as_ready(ResultState::Null {});
|
||||
else
|
||||
mark_as_ready(Result(*result));
|
||||
});
|
||||
});
|
||||
|
||||
// 2. Fetch an inline module script graph, given source text, base URL, settings object, options, and with the following steps given result:
|
||||
// FIXME: Pass options
|
||||
|
@ -485,9 +492,10 @@ void HTMLScriptElement::prepare_script()
|
|||
// 2. Mark as ready el given result.
|
||||
mark_as_ready(Result(move(result)));
|
||||
}
|
||||
// FIXME: -> "speculationrules"
|
||||
}
|
||||
|
||||
// 33. If el's type is "classic" and el has a src attribute, or el's type is "module":
|
||||
// 35. If el's type is "classic" and el has a src attribute, or el's type is "module":
|
||||
if ((m_script_type == ScriptType::Classic && has_attribute(HTML::AttributeNames::src)) || m_script_type == ScriptType::Module) {
|
||||
// 1. Assert: el's result is "uninitialized".
|
||||
// FIXME: I believe this step to be a spec bug, and it should be removed: https://github.com/whatwg/html/issues/8534
|
||||
|
@ -561,7 +569,7 @@ void HTMLScriptElement::prepare_script()
|
|||
}
|
||||
}
|
||||
|
||||
// 34. Otherwise:
|
||||
// 36. Otherwise:
|
||||
else {
|
||||
// 1. Assert: el's result is not "uninitialized".
|
||||
VERIFY(!m_result.has<ResultState::Uninitialized>());
|
||||
|
|
|
@ -1379,3 +1379,93 @@ TEST_CASE(account_for_opcode_size_calculating_incoming_jump_edges)
|
|||
EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(backreference_to_undefined_capture_groups)
|
||||
{
|
||||
{
|
||||
// Test duplicate named groups in alternatives where backreference refers to participating group
|
||||
Regex<ECMA262> re("(?:(?<x>a)|(?<x>b))\\k<x>"sv);
|
||||
auto result = re.match("bb"sv);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.matches.size(), 1u);
|
||||
EXPECT_EQ(result.matches.first().view.to_byte_string(), "bb"sv);
|
||||
EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
|
||||
EXPECT(result.capture_group_matches.first()[0].view.is_null());
|
||||
EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "b"sv);
|
||||
}
|
||||
|
||||
{
|
||||
// Test duplicate named groups with quantifier
|
||||
Regex<ECMA262> re("(?:(?:(?<x>a)|(?<x>b))\\k<x>){2}"sv);
|
||||
auto result = re.match("aabb"sv);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.matches.size(), 1u);
|
||||
EXPECT_EQ(result.matches.first().view.to_byte_string(), "aabb"sv);
|
||||
EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
|
||||
EXPECT(result.capture_group_matches.first()[0].view.is_null());
|
||||
EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "b"sv);
|
||||
}
|
||||
|
||||
{
|
||||
// Test that first alternative works too
|
||||
Regex<ECMA262> re("(?:(?<x>a)|(?<x>b))\\k<x>"sv);
|
||||
auto result = re.match("aa"sv);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.matches.size(), 1u);
|
||||
EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
|
||||
EXPECT_EQ(result.capture_group_matches.first().size(), 2u);
|
||||
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
|
||||
EXPECT(result.capture_group_matches.first()[1].view.is_null());
|
||||
}
|
||||
|
||||
{
|
||||
// Test numbered backreference to undefined group
|
||||
Regex<ECMA262> re("(.*?)a(?!(a+)b\\2c)\\2(.*)"sv);
|
||||
auto result = re.match("baaabaac"sv);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.matches.size(), 1u);
|
||||
EXPECT_EQ(result.matches.first().view.to_byte_string(), "baaabaac"sv);
|
||||
EXPECT_EQ(result.capture_group_matches.first().size(), 3u);
|
||||
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "ba"sv);
|
||||
EXPECT(result.capture_group_matches.first()[1].view.is_null());
|
||||
EXPECT_EQ(result.capture_group_matches.first()[2].view.to_byte_string(), "abaac"sv);
|
||||
}
|
||||
|
||||
{
|
||||
Regex<ECMA262> re("^(?:(?<a>x)|(?<a>y)|z)\\k<a>$"sv);
|
||||
|
||||
// Third alternative matches and backreference is undefined
|
||||
auto result1 = re.match("z"sv);
|
||||
EXPECT_EQ(result1.success, true);
|
||||
EXPECT_EQ(result1.matches.size(), 1u);
|
||||
EXPECT_EQ(result1.matches.first().view.to_byte_string(), "z"sv);
|
||||
EXPECT_EQ(result1.capture_group_matches.first().size(), 2u);
|
||||
EXPECT(result1.capture_group_matches.first()[0].view.is_null());
|
||||
EXPECT(result1.capture_group_matches.first()[1].view.is_null());
|
||||
}
|
||||
|
||||
{
|
||||
// Quantified version of the above pattern
|
||||
Regex<ECMA262> re("^(?:(?<a>x)|(?<a>y)|z){2}\\k<a>$"sv);
|
||||
|
||||
auto result1 = re.match("xz"sv);
|
||||
EXPECT_EQ(result1.success, true);
|
||||
EXPECT_EQ(result1.matches.size(), 1u);
|
||||
EXPECT_EQ(result1.matches.first().view.to_byte_string(), "xz"sv);
|
||||
EXPECT_EQ(result1.capture_group_matches.first().size(), 2u);
|
||||
EXPECT(result1.capture_group_matches.first()[0].view.is_null());
|
||||
EXPECT(result1.capture_group_matches.first()[1].view.is_null());
|
||||
|
||||
auto result2 = re.match("yz"sv);
|
||||
EXPECT_EQ(result2.success, true);
|
||||
EXPECT_EQ(result2.matches.size(), 1u);
|
||||
EXPECT_EQ(result2.matches.first().view.to_byte_string(), "yz"sv);
|
||||
EXPECT_EQ(result2.capture_group_matches.first().size(), 2u);
|
||||
EXPECT(result2.capture_group_matches.first()[0].view.is_null());
|
||||
EXPECT(result2.capture_group_matches.first()[1].view.is_null());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
<?xml version="1.0" encoding="us-ascii"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
||||
<head>
|
||||
<title>CSS Test: Stylesheet encodings: KOI8-R</title>
|
||||
<link rel="author" title="Ian Hickson" href="mailto:ian@hixie.ch"/>
|
||||
<link rel="alternate" href="http://www.hixie.ch/tests/adhoc/css/parsing/encoding/007.html" type="text/html"/>
|
||||
<link rel="help" href="http://www.w3.org/TR/CSS21/syndata.html#charset" />
|
||||
<link rel="match" href="../../../../../expected/wpt-import/css/CSS2/syntax/../reference/ref-green-background.xht"/>
|
||||
<meta name="flags" content="http" />
|
||||
<style type="text/css">
|
||||
p { background: red; color: yellow; }
|
||||
</style>
|
||||
<link rel="stylesheet" href="support/at-charset-077.css"/>
|
||||
</head>
|
||||
<body>
|
||||
<p class="tИst">This should have a green background.</p>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,2 @@
|
|||
@charset "koi8-r";
|
||||
.tést { color: white; background: green; }
|
|
@ -0,0 +1,6 @@
|
|||
Harness status: OK
|
||||
|
||||
Found 1 tests
|
||||
|
||||
1 Pass
|
||||
Pass Module scripts with no imports always execute asynchronously
|
|
@ -0,0 +1,6 @@
|
|||
Harness status: OK
|
||||
|
||||
Found 1 tests
|
||||
|
||||
1 Pass
|
||||
Pass The character encoding of the page can be set by a meta element with charset attribute.
|
|
@ -0,0 +1,27 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Module scripts with no imports always execute asynchronously</title>
|
||||
<script src="../../../../../resources/testharness.js"></script>
|
||||
<script src="../../../../../resources/testharnessreport.js"></script>
|
||||
<link rel="help" href="https://github.com/whatwg/html/issues/3746">
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
async_test(t => {
|
||||
window.results = [];
|
||||
window.logExecution = msg => window.results.push(msg);
|
||||
|
||||
const script = document.createElement('script');
|
||||
script.type = 'module';
|
||||
script.textContent = "window.logExecution('module')";
|
||||
document.body.append(script);
|
||||
window.logExecution('classic');
|
||||
|
||||
window.onload = t.step_func_done(e => {
|
||||
assert_array_equals(window.results, ['classic', 'module']);
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,4 @@
|
|||
@charset "utf-8";
|
||||
.test div.ÜÀÚ {
|
||||
width: 100px;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en" >
|
||||
<head>
|
||||
<meta charset="iso-8859-15"> <title>meta charset attribute</title>
|
||||
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
|
||||
<link rel='help' href='https://html.spec.whatwg.org/multipage/#the-input-byte-stream'>
|
||||
<script src="../../../resources/testharness.js"></script>
|
||||
<script src="../../../resources/testharnessreport.js"></script>
|
||||
<meta name='flags' content='http'>
|
||||
<style type='text/css'>
|
||||
.test div { width: 50px; }</style>
|
||||
<link rel="stylesheet" type="text/css" href="support/encodingtests-15.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
|
||||
|
||||
<div class='test'><div id='box' class='ýäè'> </div></div>
|
||||
|
||||
|
||||
<!--Notes:
|
||||
|
||||
The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.
|
||||
|
||||
The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.ÜÀÚ</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.
|
||||
|
||||
-->
|
||||
<script>
|
||||
test(function() {
|
||||
assert_equals(document.getElementById('box').offsetWidth, 100);
|
||||
}, "The character encoding of the page can be set by a meta element with charset attribute.");
|
||||
</script>
|
||||
|
||||
<div id='log'></div>
|
||||
|
||||
</body>
|
||||
</html>
|
Loading…
Add table
Add a link
Reference in a new issue