mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-08 06:09:58 +00:00
AK+LibJS+LibWeb: Recognize that our UTF-16 string is actually WTF-16
For the web, we allow a wobbly UTF-16 encoding (i.e. lonely surrogates are permitted). Only in a few exceptional cases do we strictly require valid UTF-16. As such, our `validate(AllowLonelySurrogates::Yes)` calls will always succeed. It's a wasted effort to ever make such a check. This patch eliminates such invocations. The validation methods will now only check for strict UTF-16, and are only invoked when needed.
This commit is contained in:
parent
36c7302178
commit
8472e469f4
Notes:
github-actions[bot]
2025-08-13 13:57:41 +00:00
Author: https://github.com/trflynn89
Commit: 8472e469f4
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5762
20 changed files with 61 additions and 158 deletions
|
|
@ -237,82 +237,52 @@ TEST_CASE(validate_invalid_utf16)
|
|||
{
|
||||
// Lonely high surrogate.
|
||||
invalid = u"\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
|
||||
invalid = u"\xdbff"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
}
|
||||
{
|
||||
// Lonely low surrogate.
|
||||
invalid = u"\xdc00"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
|
||||
invalid = u"\xdfff"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
}
|
||||
{
|
||||
// High surrogate followed by non-surrogate.
|
||||
invalid = u"\xd800\x0000"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
invalid = u"\xd800\xe000"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
}
|
||||
{
|
||||
// High surrogate followed by high surrogate.
|
||||
invalid = u"\xd800\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
invalid = u"\xd800\xdbff"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
}
|
||||
{
|
||||
// Valid UTF-16 followed by invalid code units.
|
||||
invalid = u"\x0041\x0041\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 3uz);
|
||||
|
||||
invalid = u"\x0041\x0041\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 3uz);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue