| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2021-04-22 16:53:07 -07:00
										 |  |  |  * Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org> | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2021-04-22 01:24:48 -07:00
										 |  |  |  * SPDX-License-Identifier: BSD-2-Clause | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  | #include <AK/Function.h>
 | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | #include <LibJS/Runtime/GlobalObject.h>
 | 
					
						
							|  |  |  | #include <LibJS/Runtime/PrimitiveString.h>
 | 
					
						
							|  |  |  | #include <LibJS/Runtime/RegExpObject.h>
 | 
					
						
							| 
									
										
										
										
											2021-07-22 08:04:31 -04:00
										 |  |  | #include <LibJS/Runtime/StringPrototype.h>
 | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | #include <LibJS/Runtime/Value.h>
 | 
					
						
							| 
									
										
										
										
											2021-10-05 18:33:28 +01:00
										 |  |  | #include <LibJS/Token.h>
 | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace JS { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  | Result<regex::RegexOptions<ECMAScriptFlags>, String> regex_flags_from_string(StringView flags) | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  |     bool d = false, g = false, i = false, m = false, s = false, u = false, y = false, v = false; | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     auto options = RegExpObject::default_flags; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  | 
 | 
					
						
							|  |  |  |     for (auto ch : flags) { | 
					
						
							|  |  |  |         switch (ch) { | 
					
						
							| 
									
										
										
										
											2021-07-09 16:10:17 -04:00
										 |  |  |         case 'd': | 
					
						
							|  |  |  |             if (d) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2021-07-09 16:10:17 -04:00
										 |  |  |             d = true; | 
					
						
							|  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |         case 'g': | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             if (g) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             g = true; | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             options |= regex::ECMAScriptFlags::Global; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             break; | 
					
						
							|  |  |  |         case 'i': | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             if (i) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             i = true; | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             options |= regex::ECMAScriptFlags::Insensitive; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             break; | 
					
						
							|  |  |  |         case 'm': | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             if (m) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             m = true; | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             options |= regex::ECMAScriptFlags::Multiline; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             break; | 
					
						
							|  |  |  |         case 's': | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             if (s) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             s = true; | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             options |= regex::ECMAScriptFlags::SingleLine; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             break; | 
					
						
							|  |  |  |         case 'u': | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             if (u) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             u = true; | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             options |= regex::ECMAScriptFlags::Unicode; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             break; | 
					
						
							|  |  |  |         case 'y': | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             if (y) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2021-05-11 22:47:14 +01:00
										 |  |  |             y = true; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             // Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
 | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             options.reset_flag(regex::ECMAScriptFlags::Global); | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             // "What's the difference between sticky and global, then", that's simple.
 | 
					
						
							|  |  |  |             // all the other flags imply 'global', and the "global" flag implies 'stateful';
 | 
					
						
							|  |  |  |             // however, the "sticky" flag does *not* imply 'global', only 'stateful'.
 | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             options |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful; | 
					
						
							|  |  |  |             options |= regex::ECMAScriptFlags::Sticky; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |             break; | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  |         case 'v': | 
					
						
							|  |  |  |             if (v) | 
					
						
							|  |  |  |                 return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch); | 
					
						
							|  |  |  |             v = true; | 
					
						
							|  |  |  |             options |= regex::ECMAScriptFlags::UnicodeSets; | 
					
						
							|  |  |  |             break; | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |         default: | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             return String::formatted(ErrorType::RegExpObjectBadFlag.message(), ch); | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return options; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  | ErrorOr<String, ParseRegexPatternError> parse_regex_pattern(StringView pattern, bool unicode, bool unicode_sets) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  |     if (unicode && unicode_sets) | 
					
						
							|  |  |  |         return ParseRegexPatternError { String::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     auto utf16_pattern = AK::utf8_to_utf16(pattern); | 
					
						
							|  |  |  |     Utf16View utf16_pattern_view { utf16_pattern }; | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
 | 
					
						
							|  |  |  |     // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
 | 
					
						
							|  |  |  |     for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) { | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  |         if (unicode || unicode_sets) { | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |             auto code_point = code_point_at(utf16_pattern_view, i); | 
					
						
							|  |  |  |             builder.append_code_point(code_point.code_point); | 
					
						
							|  |  |  |             i += code_point.code_unit_count; | 
					
						
							|  |  |  |             continue; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         u16 code_unit = utf16_pattern_view.code_unit_at(i); | 
					
						
							|  |  |  |         ++i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (code_unit > 0x7f) | 
					
						
							|  |  |  |             builder.appendff("\\u{:04x}", code_unit); | 
					
						
							|  |  |  |         else | 
					
						
							|  |  |  |             builder.append_code_point(code_unit); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return builder.build(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  | ThrowCompletionOr<String> parse_regex_pattern(StringView pattern, VM& vm, GlobalObject& global_object, bool unicode, bool unicode_sets) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     auto result = parse_regex_pattern(pattern, unicode, unicode_sets); | 
					
						
							|  |  |  |     if (result.is_error()) | 
					
						
							|  |  |  |         return vm.throw_completion<JS::SyntaxError>(global_object, result.release_error().error); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return result.release_value(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  | RegExpObject* RegExpObject::create(GlobalObject& global_object) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return global_object.heap().allocate<RegExpObject>(global_object, *global_object.regexp_prototype()); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  | RegExpObject* RegExpObject::create(GlobalObject& global_object, Regex<ECMA262> regex, String pattern, String flags) | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     return global_object.heap().allocate<RegExpObject>(global_object, move(regex), move(pattern), move(flags), *global_object.regexp_prototype()); | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  | RegExpObject::RegExpObject(Object& prototype) | 
					
						
							|  |  |  |     : Object(prototype) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  | RegExpObject::RegExpObject(Regex<ECMA262> regex, String pattern, String flags, Object& prototype) | 
					
						
							| 
									
										
										
										
											2020-06-23 17:21:53 +02:00
										 |  |  |     : Object(prototype) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     , m_pattern(move(pattern)) | 
					
						
							| 
									
										
										
										
											2021-07-22 08:04:31 -04:00
										 |  |  |     , m_flags(move(flags)) | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     , m_regex(move(regex)) | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  |     VERIFY(m_regex->parser_result.error == regex::Error::NoError); | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-07 19:15:52 +03:00
										 |  |  | void RegExpObject::initialize(GlobalObject& global_object) | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-07-07 19:15:52 +03:00
										 |  |  |     auto& vm = this->vm(); | 
					
						
							|  |  |  |     Object::initialize(global_object); | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     define_direct_property(vm.names.lastIndex, Value(0), Attribute::Writable); | 
					
						
							| 
									
										
										
										
											2020-11-19 01:50:00 +03:30
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  | // 22.2.3.2.2 RegExpInitialize ( obj, pattern, flags ), https://tc39.es/ecma262/#sec-regexpinitialize
 | 
					
						
							| 
									
										
										
										
											2021-10-23 03:49:29 +03:00
										 |  |  | ThrowCompletionOr<RegExpObject*> RegExpObject::regexp_initialize(GlobalObject& global_object, Value pattern, Value flags) | 
					
						
							| 
									
										
										
										
											2021-03-14 11:03:11 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-07-07 19:15:52 +03:00
										 |  |  |     auto& vm = global_object.vm(); | 
					
						
							| 
									
										
										
										
											2021-07-22 08:04:31 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-14 11:03:11 +01:00
										 |  |  |     String f; | 
					
						
							| 
									
										
										
										
											2021-03-14 12:02:53 +01:00
										 |  |  |     if (flags.is_undefined()) { | 
					
						
							| 
									
										
										
										
											2021-03-14 11:03:11 +01:00
										 |  |  |         f = String::empty(); | 
					
						
							|  |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2021-10-23 03:49:29 +03:00
										 |  |  |         f = TRY(flags.to_string(global_object)); | 
					
						
							| 
									
										
										
										
											2021-03-14 11:03:11 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-07-22 08:04:31 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     String original_pattern; | 
					
						
							|  |  |  |     String parsed_pattern; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (pattern.is_undefined()) { | 
					
						
							|  |  |  |         original_pattern = String::empty(); | 
					
						
							|  |  |  |         parsed_pattern = String::empty(); | 
					
						
							|  |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2021-10-23 03:49:29 +03:00
										 |  |  |         original_pattern = TRY(pattern.to_string(global_object)); | 
					
						
							| 
									
										
										
										
											2021-07-22 08:04:31 -04:00
										 |  |  |         bool unicode = f.find('u').has_value(); | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  |         bool unicode_sets = f.find('v').has_value(); | 
					
						
							|  |  |  |         parsed_pattern = TRY(parse_regex_pattern(original_pattern, vm, global_object, unicode, unicode_sets)); | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-07-22 08:04:31 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  |     auto parsed_flags_or_error = regex_flags_from_string(f); | 
					
						
							| 
									
										
										
										
											2021-10-23 03:49:29 +03:00
										 |  |  |     if (parsed_flags_or_error.is_error()) | 
					
						
							|  |  |  |         return vm.throw_completion<SyntaxError>(global_object, parsed_flags_or_error.release_error()); | 
					
						
							| 
									
										
										
										
											2021-07-29 10:34:37 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Regex<ECMA262> regex(move(parsed_pattern), parsed_flags_or_error.release_value()); | 
					
						
							| 
									
										
										
										
											2021-10-23 03:49:29 +03:00
										 |  |  |     if (regex.parser_result.error != regex::Error::NoError) | 
					
						
							|  |  |  |         return vm.throw_completion<SyntaxError>(global_object, ErrorType::RegExpCompileError, regex.error_string()); | 
					
						
							| 
									
										
										
										
											2021-07-22 08:04:31 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  |     m_pattern = move(original_pattern); | 
					
						
							|  |  |  |     m_flags = move(f); | 
					
						
							|  |  |  |     m_regex = move(regex); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-23 03:49:29 +03:00
										 |  |  |     TRY(set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes)); | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return this; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-05 18:33:28 +01:00
										 |  |  | // 22.2.3.2.5 EscapeRegExpPattern ( P, F ), https://tc39.es/ecma262/#sec-escaperegexppattern
 | 
					
						
							|  |  |  | String RegExpObject::escape_regexp_pattern() const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (m_pattern.is_empty()) | 
					
						
							|  |  |  |         return "(?:)"; | 
					
						
							| 
									
										
										
										
											2022-07-16 10:14:03 +04:30
										 |  |  |     // FIXME: Check the 'u' and 'v' flags and escape accordingly
 | 
					
						
							| 
									
										
										
										
											2022-07-11 17:32:29 +00:00
										 |  |  |     return m_pattern.replace("\n"sv, "\\n"sv, ReplaceMode::All).replace("\r"sv, "\\r"sv, ReplaceMode::All).replace(LINE_SEPARATOR_STRING, "\\u2028"sv, ReplaceMode::All).replace(PARAGRAPH_SEPARATOR_STRING, "\\u2029"sv, ReplaceMode::All).replace("/"sv, "\\/"sv, ReplaceMode::All); | 
					
						
							| 
									
										
										
										
											2021-10-05 18:33:28 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  | // 22.2.3.2.4 RegExpCreate ( P, F ), https://tc39.es/ecma262/#sec-regexpcreate
 | 
					
						
							| 
									
										
										
										
											2021-10-23 03:52:23 +03:00
										 |  |  | ThrowCompletionOr<RegExpObject*> regexp_create(GlobalObject& global_object, Value pattern, Value flags) | 
					
						
							| 
									
										
										
										
											2021-08-20 09:14:27 -04:00
										 |  |  | { | 
					
						
							|  |  |  |     auto* regexp_object = RegExpObject::create(global_object); | 
					
						
							| 
									
										
										
										
											2021-10-23 03:52:23 +03:00
										 |  |  |     return TRY(regexp_object->regexp_initialize(global_object, pattern, flags)); | 
					
						
							| 
									
										
										
										
											2021-03-14 11:03:11 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-03 16:05:49 -07:00
										 |  |  | } |