mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-19 15:43:20 +00:00
LibRegex: Fix backreferences to undefined capture groups
Fixes handling of backreferences when the referenced capture group is undefined or hasn't participated in the match. CharacterCompareType::NamedReference is added to distinguish numbered (\1) from named (\k<name>) backreferences. Numbered backreferences use exact group lookup. Named backreferences search for participating groups among duplicates.
This commit is contained in:
parent
9b8f6b8108
commit
c4eef822de
Notes:
github-actions[bot]
2025-10-16 14:39:20 +00:00
Author: https://github.com/aplefull
Commit: c4eef822de
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5590
Reviewed-by: https://github.com/alimpfard ✅
Reviewed-by: https://github.com/gmta
5 changed files with 197 additions and 9 deletions
|
@ -609,12 +609,21 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
}
|
||||
case CharacterCompareType::Reference: {
|
||||
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
|
||||
if (input.match_index >= state.capture_group_matches_size())
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
if (input.match_index >= state.capture_group_matches_size()) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto groups = state.capture_group_matches(input.match_index);
|
||||
if (groups.size() <= reference_number)
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
|
||||
if (groups.size() <= reference_number) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto str = groups.at(reference_number).view;
|
||||
|
||||
|
@ -628,6 +637,59 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
}
|
||||
break;
|
||||
}
|
||||
case CharacterCompareType::NamedReference: {
|
||||
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
|
||||
|
||||
if (input.match_index >= state.capture_group_matches_size()) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
auto groups = state.capture_group_matches(input.match_index);
|
||||
|
||||
if (groups.size() <= reference_number) {
|
||||
had_zero_length_match = true;
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
RegexStringView str {};
|
||||
|
||||
auto reference_name_index = m_bytecode->get_group_name_index(reference_number);
|
||||
|
||||
if (reference_name_index.has_value()) {
|
||||
auto target_name_string = m_bytecode->get_string(reference_name_index.value());
|
||||
|
||||
for (size_t i = 0; i < groups.size(); ++i) {
|
||||
if (groups[i].view.is_null())
|
||||
continue;
|
||||
|
||||
auto group_name_index = m_bytecode->get_group_name_index(i);
|
||||
|
||||
if (group_name_index.has_value()) {
|
||||
auto group_name_string = m_bytecode->get_string(group_name_index.value());
|
||||
|
||||
if (group_name_string == target_name_string) {
|
||||
str = groups[i].view;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (input.view.length() < state.string_position + str.length()) {
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
}
|
||||
|
||||
if (compare_string(input, state, str, had_zero_length_match)) {
|
||||
if (current_inversion_state())
|
||||
inverse_matched = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CharacterCompareType::Property: {
|
||||
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
|
||||
compare_property(input, state, property, current_inversion_state(), inverse_matched);
|
||||
|
@ -946,6 +1008,9 @@ Vector<CompareTypeAndValuePair> OpCode_Compare::flat_compares() const
|
|||
} else if (compare_type == CharacterCompareType::Reference) {
|
||||
auto ref = m_bytecode->at(offset++);
|
||||
result.append({ compare_type, ref });
|
||||
} else if (compare_type == CharacterCompareType::NamedReference) {
|
||||
auto ref = m_bytecode->at(offset++);
|
||||
result.append({ compare_type, ref });
|
||||
} else if (compare_type == CharacterCompareType::String) {
|
||||
auto& length = m_bytecode->at(offset++);
|
||||
for (size_t k = 0; k < length; ++k)
|
||||
|
@ -1028,6 +1093,24 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
|
|||
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
|
||||
}
|
||||
}
|
||||
} else if (compare_type == CharacterCompareType::NamedReference) {
|
||||
auto ref = m_bytecode->at(offset++);
|
||||
result.empend(ByteString::formatted(" named_number={}", ref));
|
||||
if (input.has_value()) {
|
||||
if (state().capture_group_matches_size() > input->match_index) {
|
||||
auto match = state().capture_group_matches(input->match_index);
|
||||
if (match.size() > ref) {
|
||||
auto& group = match[ref];
|
||||
result.empend(ByteString::formatted(" left={}", group.left_column));
|
||||
result.empend(ByteString::formatted(" right={}", group.left_column + group.view.length_in_code_units()));
|
||||
result.empend(ByteString::formatted(" contents='{}'", group.view));
|
||||
} else {
|
||||
result.empend(ByteString::formatted(" (invalid ref {}, max={})", ref, match.size() - 1));
|
||||
}
|
||||
} else {
|
||||
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
|
||||
}
|
||||
}
|
||||
} else if (compare_type == CharacterCompareType::String) {
|
||||
auto& length = m_bytecode->at(offset++);
|
||||
StringBuilder str_builder;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue