ladybird/Tests/LibUnicode/TestEmoji.cpp
Timothy Flynn f8a0365002 LibUnicode: Detect ZWJ sequences when filtering by emoji presentation
This was preventing some unqualified emoji sequences from rendering
properly, such as the custom SerenityOS flag. We rendered the flag
correctly when given the fully qualified sequence:

    U+1F3F3 U+FEOF U+200D U+1F41E

But were not detecting the unqualified sequence as an emoji when also
filtering for emoji-presentation sequences:

    U+1F3F3 U+200D U+1F41E
2023-03-05 20:21:57 +01:00

72 lines
4.8 KiB
C++
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Array.h>
#include <AK/CharacterTypes.h>
#include <AK/String.h>
#include <AK/Utf8View.h>
#include <LibTest/TestCase.h>
#include <LibUnicode/Emoji.h>
// These emojis are the first subgroup in each Unicode-defined group of emojis, plus some interesting
// hand-picked test cases (such as keycap emoji, which begin with ASCII symbols, and country flags).
static constexpr auto s_smileys_emotion = Array { "๐Ÿ˜€"sv, "๐Ÿ˜ƒ"sv, "๐Ÿ˜„"sv, "๐Ÿ˜"sv, "๐Ÿ˜†"sv, "๐Ÿ˜…"sv, "๐Ÿคฃ"sv, "๐Ÿ˜‚"sv, "๐Ÿ™‚"sv, "๐Ÿ™ƒ"sv, "๐Ÿซ "sv, "๐Ÿ˜‰"sv, "๐Ÿ˜Š"sv, "๐Ÿ˜‡"sv };
static constexpr auto s_people_body = Array { "๐Ÿ‘‹"sv, "๐Ÿคš"sv, "๐Ÿ–๏ธ"sv, "๐Ÿ–"sv, "โœ‹"sv, "๐Ÿซฑ"sv, "๐Ÿซฒ"sv, "๐Ÿซณ"sv, "๐Ÿซด"sv, "๐Ÿซท"sv, "๐Ÿซธ"sv };
static constexpr auto s_animals_nature = Array { "๐Ÿถ"sv, "๐Ÿ•"sv, "๐Ÿ•โ€๐Ÿฆบ"sv, "๐Ÿฉ"sv, "๐ŸฆŠ"sv, "๐Ÿฆ"sv, "๐Ÿฑ"sv, "๐Ÿˆ"sv, "๐Ÿˆโ€โฌ›"sv, "๐Ÿฆ"sv, "๐Ÿฏ"sv, "๐Ÿด"sv, "๐ŸซŽ"sv, "๐Ÿซ"sv, "๐ŸŽ"sv, "๐Ÿฆ„"sv, "๐Ÿฆ“"sv, "๐ŸฆŒ"sv, "๐Ÿฆฌ"sv, "๐Ÿฎ"sv, "๐Ÿท"sv, "๐Ÿ–"sv, "๐Ÿ—"sv, "๐Ÿฝ"sv, "๐Ÿ‘"sv, "๐Ÿฆ™"sv, "๐Ÿฆ’"sv, "๐Ÿ˜"sv, "๐Ÿญ"sv, "๐Ÿ"sv, "๐Ÿ€"sv, "๐Ÿฐ"sv, "๐Ÿ‡"sv, "๐Ÿฟ๏ธ"sv, "๐Ÿฟ"sv, "๐Ÿฆ”"sv, "๐Ÿฆ‡"sv, "๐Ÿป"sv, "๐Ÿปโ€โ„๏ธ"sv, "๐Ÿปโ€โ„"sv, "๐Ÿจ"sv, "๐Ÿผ"sv, "๐Ÿฆฅ"sv, "๐Ÿฆ˜"sv, "๐Ÿฆก"sv, "๐Ÿพ"sv };
static constexpr auto s_food_drink = Array { "๐Ÿ‡"sv, "๐Ÿˆ"sv, "๐Ÿ‰"sv, "๐ŸŠ"sv, "๐Ÿ‹"sv, "๐ŸŒ"sv, "๐Ÿ"sv, "๐Ÿฅญ"sv, "๐ŸŽ"sv, "๐Ÿ"sv, "๐Ÿ"sv, "๐Ÿ‘"sv, "๐Ÿ’"sv, "๐Ÿ“"sv, "๐Ÿซ"sv, "๐Ÿฅ"sv, "๐Ÿ…"sv, "๐Ÿซ’"sv, "๐Ÿฅฅ"sv };
static constexpr auto s_travel_places = Array { "๐ŸŒ"sv, "๐ŸŒŽ"sv, "๐ŸŒ"sv, "๐ŸŒ"sv, "๐Ÿ—บ๏ธ"sv, "๐Ÿ—บ"sv, "๐Ÿ—พ"sv, "๐Ÿงญ"sv };
static constexpr auto s_activities = Array { "๐ŸŽƒ"sv, "๐ŸŽ„"sv, "๐ŸŽ†"sv, "๐ŸŽ‡"sv, "๐Ÿงจ"sv, "โœจ"sv, "๐ŸŽˆ"sv, "๐ŸŽ‰"sv, "๐ŸŽŠ"sv, "๐ŸŽ‹"sv, "๐ŸŽ"sv, "๐ŸŽ"sv, "๐ŸŽ‘"sv, "๐ŸŽ€"sv, "๐ŸŽ"sv, "๐ŸŽ—๏ธ"sv, "๐ŸŽ—"sv, "๐ŸŽŸ๏ธ"sv, "๐ŸŽŸ"sv, "๐ŸŽซ"sv };
static constexpr auto s_objects = Array { "๐Ÿ‘“"sv, "๐Ÿ•ถ๏ธ"sv, "๐Ÿ•ถ"sv, "๐Ÿฆบ"sv, "๐Ÿ‘”"sv, "๐Ÿ‘–"sv, "๐Ÿงฆ"sv, "๐Ÿ‘—"sv, "๐Ÿฅป"sv, "๐Ÿฉฑ"sv, "๐Ÿฉฒ"sv, "๐Ÿฉณ"sv, "๐Ÿ‘™"sv, "๐Ÿชญ"sv, "๐Ÿ‘›"sv, "๐Ÿ‘œ"sv, "๐Ÿ›๏ธ"sv, "๐Ÿ›"sv, "๐Ÿฉด"sv, "๐Ÿ‘ก"sv, "๐Ÿ‘ข"sv, "๐Ÿชฎ"sv, "๐Ÿ‘‘"sv, "๐ŸŽฉ"sv, "๐ŸŽ“"sv, "๐Ÿช–"sv, "โ›‘๏ธ"sv, "โ›‘"sv, "๐Ÿ’„"sv, "๐Ÿ’"sv, "๐Ÿ’Ž"sv };
static constexpr auto s_symbols = Array { "๐Ÿšฎ"sv, "๐Ÿšฐ"sv, "โ™ฟ"sv, "๐Ÿšน"sv, "๐Ÿšบ"sv, "๐Ÿšพ"sv, "๐Ÿ›‚"sv, "๐Ÿ›ƒ"sv, "๐Ÿ›„"sv, "๐Ÿ›…"sv, "#๏ธโƒฃ"sv, "#โƒฃ"sv, "*๏ธโƒฃ"sv, "*โƒฃ"sv, "0๏ธโƒฃ"sv, "0โƒฃ"sv, "1๏ธโƒฃ"sv, "1โƒฃ"sv, "2๏ธโƒฃ"sv, "2โƒฃ"sv, "3๏ธโƒฃ"sv, "3โƒฃ"sv, "4๏ธโƒฃ"sv, "4โƒฃ"sv, "5๏ธโƒฃ"sv, "5โƒฃ"sv, "6๏ธโƒฃ"sv, "6โƒฃ"sv, "7๏ธโƒฃ"sv, "7โƒฃ"sv, "8๏ธโƒฃ"sv, "8โƒฃ"sv, "9๏ธโƒฃ"sv, "9โƒฃ"sv, "๐Ÿ”Ÿ"sv };
static constexpr auto s_flags = Array { "๐Ÿ"sv, "๐Ÿšฉ"sv, "๐ŸŽŒ"sv, "๐Ÿด"sv, "๐Ÿณ๏ธ"sv, "๐Ÿณ"sv, "๐Ÿณ๏ธโ€๐ŸŒˆ"sv, "๐Ÿณโ€๐ŸŒˆ"sv, "๐Ÿณ๏ธโ€โšง๏ธ"sv, "๐Ÿณโ€โšง๏ธ"sv, "๐Ÿณ๏ธโ€โšง"sv, "๐Ÿณโ€โšง"sv, "๐Ÿดโ€โ˜ ๏ธ"sv, "๐Ÿดโ€โ˜ "sv, "๐Ÿ‡ฆ๐Ÿ‡จ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฉ"sv, "๐Ÿ‡ฆ๐Ÿ‡ช"sv, "๐Ÿ‡ฆ๐Ÿ‡ซ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฌ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฎ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฑ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฒ"sv, "๐Ÿ‡ฆ๐Ÿ‡ด"sv, "๐Ÿ‡ฆ๐Ÿ‡ถ"sv, "๐Ÿ‡ฆ๐Ÿ‡ท"sv, "๐Ÿ‡ฆ๐Ÿ‡ธ"sv, "๐Ÿ‡ฆ๐Ÿ‡น"sv, "๐Ÿ‡ฆ๐Ÿ‡บ"sv, "๐Ÿ‡ฆ๐Ÿ‡ผ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฝ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฟ"sv, "๐Ÿ‡ง๐Ÿ‡ฆ"sv, "๐Ÿ‡ง๐Ÿ‡ง"sv, "๐Ÿ‡ง๐Ÿ‡ฉ"sv, "๐Ÿ‡ง๐Ÿ‡ช"sv, "๐Ÿ‡ง๐Ÿ‡ซ"sv, "๐Ÿ‡ง๐Ÿ‡ฌ"sv, "๐Ÿ‡ง๐Ÿ‡ญ"sv, "๐Ÿ‡ง๐Ÿ‡ฎ"sv, "๐Ÿ‡ง๐Ÿ‡ฏ"sv, "๐Ÿ‡ง๐Ÿ‡ฑ"sv, "๐Ÿ‡ง๐Ÿ‡ฒ"sv, "๐Ÿ‡ง๐Ÿ‡ณ"sv, "๐Ÿ‡ง๐Ÿ‡ด"sv, "๐Ÿ‡ง๐Ÿ‡ถ"sv, "๐Ÿ‡ง๐Ÿ‡ท"sv, "๐Ÿ‡ง๐Ÿ‡ธ"sv };
TEST_CASE(emoji)
{
auto test_emojis = [](auto const& emojis) {
for (auto emoji : emojis) {
Utf8View view { emoji };
EXPECT(Unicode::could_be_start_of_emoji_sequence(view.begin()));
}
};
test_emojis(s_smileys_emotion);
test_emojis(s_people_body);
test_emojis(s_animals_nature);
test_emojis(s_food_drink);
test_emojis(s_travel_places);
test_emojis(s_activities);
test_emojis(s_objects);
test_emojis(s_symbols);
test_emojis(s_flags);
}
TEST_CASE(emoji_presentation_only)
{
auto test_emoji = [](auto emoji, auto expected_result) {
Utf8View view { emoji };
auto is_start_of_emoji_sequence = Unicode::could_be_start_of_emoji_sequence(view.begin(), Unicode::SequenceType::EmojiPresentation);
EXPECT_EQ(is_start_of_emoji_sequence, expected_result);
};
test_emoji("ยฉ๏ธ"sv, true);
test_emoji("ยฉ"sv, false);
test_emoji("ยฎ๏ธ"sv, true);
test_emoji("ยฎ"sv, false);
test_emoji("\U0001F3F3\u200D\U0001F41E"sv, true); // SerenityOS flag
test_emoji("\U0001F3F3\uFE0F\u200D\U0001F41E"sv, true); // SerenityOS flag
}
TEST_CASE(ascii_is_not_emoji)
{
for (u32 code_point = 0u; is_ascii(code_point); ++code_point) {
auto string = String::from_code_point(code_point);
Utf8View view { string };
EXPECT(!Unicode::could_be_start_of_emoji_sequence(view.begin()));
}
}