ladybird/Libraries/LibRegex/ECMAScriptRegex.h
Andreas Kling b81269e78b Libraries: Clean up UTF-16 source text paths
Store parser errors, source range filenames, source code filenames,
module source, and Rust parser errors as UTF-16 where they flow back
into JavaScript-visible strings. Keep byte-oriented source buffers
byte-backed.

Remove temporary PrimitiveString, ByteString, and UTF-8 detours from
JSON, RegExp, module debug logging, print formatting, and tests.
2026-06-22 19:51:25 +02:00

93 lines
2.4 KiB
C++

/*
* Copyright (c) 2026-present, the Ladybird developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Error.h>
#include <AK/Noncopyable.h>
#include <AK/OwnPtr.h>
#include <AK/String.h>
#include <AK/Utf16FlyString.h>
#include <AK/Utf16View.h>
#include <AK/Vector.h>
#include <LibRegex/Export.h>
namespace regex {
enum class MatchResult : i8 {
Match,
NoMatch,
LimitExceeded,
};
struct ECMAScriptCompileFlags {
bool global {};
bool ignore_case {};
bool multiline {};
bool dot_all {};
bool unicode {};
bool unicode_sets {};
bool sticky {};
bool has_indices {};
};
struct ECMAScriptNamedCaptureGroup {
Utf16FlyString name;
unsigned int index;
};
class REGEX_API ECMAScriptRegex {
AK_MAKE_NONCOPYABLE(ECMAScriptRegex);
public:
static ErrorOr<ECMAScriptRegex, String> compile(Utf16View pattern, ECMAScriptCompileFlags);
~ECMAScriptRegex();
ECMAScriptRegex(ECMAScriptRegex&&);
ECMAScriptRegex& operator=(ECMAScriptRegex&&);
/// Execute and fill internal capture buffer.
/// After a successful call, read results via capture_slot().
[[nodiscard]] MatchResult exec(Utf16View input, size_t start_pos) const;
/// Read a capture slot from the internal buffer (after exec).
/// Even slots are start positions, odd slots are end positions.
/// Returns -1 for unmatched captures.
int capture_slot(unsigned int slot) const;
/// Test for a match without filling capture buffer.
[[nodiscard]] MatchResult test(Utf16View input, size_t start_pos) const;
/// Number of numbered capture groups (excluding group 0).
unsigned int capture_count() const;
/// Total number of capture groups including group 0.
unsigned int total_groups() const;
bool is_single_non_bmp_literal() const;
/// Named capture groups with their indices.
Vector<ECMAScriptNamedCaptureGroup> const& named_groups() const;
/// Find all non-overlapping matches. Returns number of matches found.
/// Access results via find_all_match(i) after calling.
int find_all(Utf16View input, size_t start_pos) const;
struct MatchPair {
int start;
int end;
};
/// Get the i-th match from find_all results.
MatchPair find_all_match(int index) const;
private:
struct Impl;
ECMAScriptRegex(OwnPtr<Impl>);
OwnPtr<Impl> m_impl;
};
} // namespace regex