From ebb96e2303c95e63458ad68fa350954fb2cf80c7 Mon Sep 17 00:00:00 2001 From: Haoyu Qiu Date: Mon, 21 Jul 2025 17:14:37 +0800 Subject: [PATCH] Move plural rules logic into a separate class - Extracts plural rules logic in `TranslationPO` into a new `PluralRules` class. - Changes caching the last used plural index in `TranslationPO` into an LRU cache in `PluralRules`. - Adds tests for `PluralRules`. --- core/string/plural_rules.cpp | 167 +++++++++++++++++++++++++++ core/string/plural_rules.h | 72 ++++++++++++ core/string/translation_po.cpp | 140 +++------------------- core/string/translation_po.h | 33 +----- tests/core/string/test_translation.h | 49 ++++++++ 5 files changed, 311 insertions(+), 150 deletions(-) create mode 100644 core/string/plural_rules.cpp create mode 100644 core/string/plural_rules.h diff --git a/core/string/plural_rules.cpp b/core/string/plural_rules.cpp new file mode 100644 index 00000000000..ccdb5cd1e61 --- /dev/null +++ b/core/string/plural_rules.cpp @@ -0,0 +1,167 @@ +/**************************************************************************/ +/* plural_rules.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "plural_rules.h" + +#include "core/math/expression.h" + +int PluralRules::_eq_test(const Array &p_input_val, const Ref &p_node, const Variant &p_result) const { + if (p_node.is_null()) { + return p_result; + } + + static const Vector input_name = { "n" }; + + Error err = expr->parse(p_node->regex, input_name); + ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text())); + + Variant result = expr->execute(p_input_val); + ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex)); + + if (bool(result)) { + return _eq_test(p_input_val, p_node->left, result); + } else { + return _eq_test(p_input_val, p_node->right, result); + } +} + +int PluralRules::_find_unquoted(const String &p_src, char32_t p_chr) const { + const int len = p_src.length(); + if (len == 0) { + return -1; + } + + const char32_t *src = p_src.get_data(); + bool in_quote = false; + for (int i = 0; i < len; i++) { + if (in_quote) { + if (src[i] == ')') { + in_quote = false; + } + } else { + if (src[i] == '(') { + in_quote = true; + } else if (src[i] == p_chr) { + return i; + } + } + } + + return -1; +} + +void PluralRules::_cache_plural_tests(const String &p_plural_rule, Ref &p_node) { + // Some examples of p_plural_rule passed in can have the form: + // "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic) + // "n >= 2" (French) // When evaluating the last, especially careful with this one. + // "n != 1" (English) + + String rule = p_plural_rule; + if (rule.begins_with("(") && rule.ends_with(")")) { + int bcount = 0; + for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) { + if (rule[i] == '(') { + bcount++; + } else if (rule[i] == ')') { + bcount--; + } + } + if (bcount == 0) { + rule = rule.substr(1, rule.length() - 2); + } + } + + int first_ques_mark = _find_unquoted(rule, '?'); + int first_colon = _find_unquoted(rule, ':'); + + if (first_ques_mark == -1) { + p_node->regex = rule.strip_edges(); + return; + } + + p_node->regex = rule.substr(0, first_ques_mark).strip_edges(); + + p_node->left.instantiate(); + _cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left); + p_node->right.instantiate(); + _cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right); +} + +int PluralRules::evaluate(int p_n) const { + const int *cached = cache.getptr(p_n); + if (cached) { + return *cached; + } + + const Array &input_val = { p_n }; + int index = _eq_test(input_val, equi_tests, 0); + cache.insert(p_n, index); + return index; +} + +PluralRules::PluralRules(int p_nplurals, const String &p_plural) : + nplurals(p_nplurals), + plural(p_plural) { + equi_tests.instantiate(); + _cache_plural_tests(plural, equi_tests); + + expr.instantiate(); +} + +PluralRules *PluralRules::parse(const String &p_rules) { + // `p_rules` should be in the format "nplurals=; plural=;". + + const int nplurals_eq = p_rules.find_char('='); + ERR_FAIL_COND_V_MSG(nplurals_eq == -1, nullptr, "Invalid plural rules format. Missing equal sign for `nplurals`."); + + const int nplurals_semi_col = p_rules.find_char(';', nplurals_eq); + ERR_FAIL_COND_V_MSG(nplurals_semi_col == -1, nullptr, "Invalid plural rules format. Missing semicolon for `nplurals`."); + + const String nplurals_str = p_rules.substr(nplurals_eq + 1, nplurals_semi_col - (nplurals_eq + 1)).strip_edges(); + ERR_FAIL_COND_V_MSG(!nplurals_str.is_valid_int(), nullptr, "Invalid plural rules format. `nplurals` should be an integer."); + + const int nplurals = nplurals_str.to_int(); + ERR_FAIL_COND_V_MSG(nplurals < 1, nullptr, "Invalid plural rules format. `nplurals` should be at least 1."); + + const int expression_eq = p_rules.find_char('=', nplurals_semi_col + 1); + ERR_FAIL_COND_V_MSG(expression_eq == -1, nullptr, "Invalid plural rules format. Missing equal sign for `plural`."); + + int expression_end = p_rules.rfind_char(';'); + if (expression_end == -1) { + WARN_PRINT("Invalid plural rules format. Missing semicolon at the end of `plural` expression. Assuming ends at the end of the string."); + expression_end = p_rules.length(); + } + + const int expression_start = expression_eq + 1; + ERR_FAIL_COND_V_MSG(expression_end <= expression_start, nullptr, "Invalid plural rules format. `plural` expression is empty."); + + const String &plural = p_rules.substr(expression_start, expression_end - expression_start).strip_edges(); + return memnew(PluralRules(nplurals, plural)); +} diff --git a/core/string/plural_rules.h b/core/string/plural_rules.h new file mode 100644 index 00000000000..f549cc3c762 --- /dev/null +++ b/core/string/plural_rules.h @@ -0,0 +1,72 @@ +/**************************************************************************/ +/* plural_rules.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "core/object/ref_counted.h" +#include "core/templates/lru.h" + +class Expression; + +class PluralRules : public Object { + GDSOFTCLASS(PluralRules, Object); + + mutable LRUCache cache; + + // These two fields are initialized in the constructor. + const int nplurals; + const String plural; + + // Cache temporary variables related to `evaluate()` to make it faster. + class EQNode : public RefCounted { + GDSOFTCLASS(EQNode, RefCounted); + + public: + String regex; + Ref left; + Ref right; + }; + Ref equi_tests; + Ref expr; + + int _find_unquoted(const String &p_src, char32_t p_chr) const; + int _eq_test(const Array &p_input_val, const Ref &p_node, const Variant &p_result) const; + void _cache_plural_tests(const String &p_plural_rule, Ref &p_node); + + PluralRules(int p_nplurals, const String &p_plural); + +public: + int evaluate(int p_n) const; + + int get_nplurals() const { return nplurals; } + String get_plural() const { return plural; } + + static PluralRules *parse(const String &p_rules); +}; diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp index 8ff9006a181..569efe594cc 100644 --- a/core/string/translation_po.cpp +++ b/core/string/translation_po.cpp @@ -30,6 +30,8 @@ #include "translation_po.h" +#include "core/string/plural_rules.h" + #ifdef DEBUG_TRANSLATION_PO #include "core/io/file_access.h" @@ -129,112 +131,11 @@ Vector TranslationPO::_get_message_list() const { return v; } -int TranslationPO::_get_plural_index(int p_n) const { - // Get a number between [0;number of plural forms). - - input_val.clear(); - input_val.push_back(p_n); - - return _eq_test(equi_tests, 0); -} - -int TranslationPO::_eq_test(const Ref &p_node, const Variant &p_result) const { - if (p_node.is_valid()) { - Error err = expr->parse(p_node->regex, input_name); - ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text())); - - Variant result = expr->execute(input_val); - ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex)); - - if (bool(result)) { - return _eq_test(p_node->left, result); - } else { - return _eq_test(p_node->right, result); - } - } else { - return p_result; - } -} - -int TranslationPO::_find_unquoted(const String &p_src, char32_t p_chr) const { - const int len = p_src.length(); - if (len == 0) { - return -1; - } - - const char32_t *src = p_src.get_data(); - bool in_quote = false; - for (int i = 0; i < len; i++) { - if (in_quote) { - if (src[i] == ')') { - in_quote = false; - } - } else { - if (src[i] == '(') { - in_quote = true; - } else if (src[i] == p_chr) { - return i; - } - } - } - - return -1; -} - -void TranslationPO::_cache_plural_tests(const String &p_plural_rule, Ref &p_node) { - // Some examples of p_plural_rule passed in can have the form: - // "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic) - // "n >= 2" (French) // When evaluating the last, especially careful with this one. - // "n != 1" (English) - - String rule = p_plural_rule; - if (rule.begins_with("(") && rule.ends_with(")")) { - int bcount = 0; - for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) { - if (rule[i] == '(') { - bcount++; - } else if (rule[i] == ')') { - bcount--; - } - } - if (bcount == 0) { - rule = rule.substr(1, rule.length() - 2); - } - } - - int first_ques_mark = _find_unquoted(rule, '?'); - int first_colon = _find_unquoted(rule, ':'); - - if (first_ques_mark == -1) { - p_node->regex = rule.strip_edges(); - return; - } - - p_node->regex = rule.substr(0, first_ques_mark).strip_edges(); - - p_node->left.instantiate(); - _cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left); - p_node->right.instantiate(); - _cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right); -} - void TranslationPO::set_plural_rule(const String &p_plural_rule) { - // Set plural_forms and plural_rule. - // p_plural_rule passed in has the form "Plural-Forms: nplurals=2; plural=(n >= 2);". - - int first_semi_col = p_plural_rule.find_char(';'); - plural_forms = p_plural_rule.substr(p_plural_rule.find_char('=') + 1, first_semi_col - (p_plural_rule.find_char('=') + 1)).to_int(); - - int expression_start = p_plural_rule.find_char('=', first_semi_col) + 1; - int second_semi_col = p_plural_rule.rfind_char(';'); - plural_rule = p_plural_rule.substr(expression_start, second_semi_col - expression_start).strip_edges(); - - // Setup the cache to make evaluating plural rule faster later on. - equi_tests.instantiate(); - _cache_plural_tests(plural_rule, equi_tests); - - expr.instantiate(); - input_name.push_back("n"); + if (plural_rules) { + memdelete(plural_rules); + } + plural_rules = PluralRules::parse(p_plural_rule); } void TranslationPO::add_message(const StringName &p_src_text, const StringName &p_xlated_text, const StringName &p_context) { @@ -249,7 +150,8 @@ void TranslationPO::add_message(const StringName &p_src_text, const StringName & } void TranslationPO::add_plural_message(const StringName &p_src_text, const Vector &p_plural_xlated_texts, const StringName &p_context) { - ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_forms, vformat("Trying to add plural texts that don't match the required number of plural forms for locale \"%s\".", get_locale())); + ERR_FAIL_NULL_MSG(plural_rules, "Plural rules are not set. Please call set_plural_rule() before calling add_plural_message()."); + ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_rules->get_nplurals(), vformat("Trying to add plural texts that don't match the required number of plural forms for locale \"%s\".", get_locale())); HashMap> &map_id_str = translation_map[p_context]; @@ -264,11 +166,11 @@ void TranslationPO::add_plural_message(const StringName &p_src_text, const Vecto } int TranslationPO::get_plural_forms() const { - return plural_forms; + return plural_rules ? plural_rules->get_nplurals() : 0; } String TranslationPO::get_plural_rule() const { - return plural_rule; + return plural_rules ? plural_rules->get_plural() : String(); } StringName TranslationPO::get_message(const StringName &p_src_text, const StringName &p_context) const { @@ -282,27 +184,16 @@ StringName TranslationPO::get_message(const StringName &p_src_text, const String StringName TranslationPO::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const { ERR_FAIL_COND_V_MSG(p_n < 0, StringName(), "N passed into translation to get a plural message should not be negative. For negative numbers, use singular translation please. Search \"gettext PO Plural Forms\" online for the documentation on translating negative numbers."); - - // If the query is the same as last time, return the cached result. - if (p_n == last_plural_n && p_context == last_plural_context && p_src_text == last_plural_key) { - return translation_map[p_context][p_src_text][last_plural_mapped_index]; - } + ERR_FAIL_NULL_V_MSG(plural_rules, StringName(), "Plural rules are not set. Please call set_plural_rule() before calling get_plural_message()."); if (!translation_map.has(p_context) || !translation_map[p_context].has(p_src_text)) { return StringName(); } ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), vformat("Source text \"%s\" is registered but doesn't have a translation. Please report this bug.", String(p_src_text))); - int plural_index = _get_plural_index(p_n); + int plural_index = plural_rules->evaluate(p_n); ERR_FAIL_COND_V_MSG(plural_index < 0 || translation_map[p_context][p_src_text].size() < plural_index + 1, StringName(), "Plural index returned or number of plural translations is not valid. Please report this bug."); - // Cache result so that if the next entry is the same, we can return directly. - // _get_plural_index(p_n) can get very costly, especially when evaluating long plural-rule (Arabic) - last_plural_key = p_src_text; - last_plural_context = p_context; - last_plural_n = p_n; - last_plural_mapped_index = plural_index; - return translation_map[p_context][p_src_text][plural_index]; } @@ -343,3 +234,10 @@ void TranslationPO::_bind_methods() { ClassDB::bind_method(D_METHOD("get_plural_forms"), &TranslationPO::get_plural_forms); ClassDB::bind_method(D_METHOD("get_plural_rule"), &TranslationPO::get_plural_rule); } + +TranslationPO::~TranslationPO() { + if (plural_rules) { + memdelete(plural_rules); + plural_rules = nullptr; + } +} diff --git a/core/string/translation_po.h b/core/string/translation_po.h index 3f92264a007..ab7d0e7b8f1 100644 --- a/core/string/translation_po.h +++ b/core/string/translation_po.h @@ -32,9 +32,10 @@ //#define DEBUG_TRANSLATION_PO -#include "core/math/expression.h" #include "core/string/translation.h" +class PluralRules; + class TranslationPO : public Translation { GDCLASS(TranslationPO, Translation); @@ -45,33 +46,7 @@ class TranslationPO : public Translation { // Strings without context have "" as first key. HashMap>> translation_map; - int plural_forms = 0; // 0 means no "Plural-Forms" is given in the PO header file. The min for all languages is 1. - String plural_rule; - - // Cache temporary variables related to _get_plural_index() to make it faster - class EQNode : public RefCounted { - GDSOFTCLASS(EQNode, RefCounted); - - public: - String regex; - Ref left; - Ref right; - }; - Ref equi_tests; - - int _find_unquoted(const String &p_src, char32_t p_chr) const; - int _eq_test(const Ref &p_node, const Variant &p_result) const; - - Vector input_name; - mutable Ref expr; - mutable Array input_val; - mutable StringName last_plural_key; - mutable StringName last_plural_context; - mutable int last_plural_n = -1; // Set it to an impossible value at the beginning. - mutable int last_plural_mapped_index = 0; - - void _cache_plural_tests(const String &p_plural_rule, Ref &p_node); - int _get_plural_index(int p_n) const; + PluralRules *plural_rules = nullptr; Vector _get_message_list() const override; Dictionary _get_messages() const override; @@ -98,5 +73,5 @@ public: void print_translation_map(); #endif - TranslationPO() {} + ~TranslationPO(); }; diff --git a/tests/core/string/test_translation.h b/tests/core/string/test_translation.h index d01137122d7..cf0a496634b 100644 --- a/tests/core/string/test_translation.h +++ b/tests/core/string/test_translation.h @@ -31,6 +31,7 @@ #pragma once #include "core/string/optimized_translation.h" +#include "core/string/plural_rules.h" #include "core/string/translation.h" #include "core/string/translation_po.h" #include "core/string/translation_server.h" @@ -129,6 +130,54 @@ TEST_CASE("[TranslationPO] Plural messages") { CHECK(vformat(translation->get_plural_message("There are %d apples", "", 2), 2) == "Il y a 2 pommes"); } +TEST_CASE("[TranslationPO] Plural rules parsing") { + ERR_PRINT_OFF; + { + CHECK(PluralRules::parse("") == nullptr); + + CHECK(PluralRules::parse("plurals=(n != 1);") == nullptr); + CHECK(PluralRules::parse("nplurals; plurals=(n != 1);") == nullptr); + CHECK(PluralRules::parse("nplurals=; plurals=(n != 1);") == nullptr); + CHECK(PluralRules::parse("nplurals=0; plurals=(n != 1);") == nullptr); + CHECK(PluralRules::parse("nplurals=-1; plurals=(n != 1);") == nullptr); + + CHECK(PluralRules::parse("nplurals=2;") == nullptr); + CHECK(PluralRules::parse("nplurals=2; plurals;") == nullptr); + CHECK(PluralRules::parse("nplurals=2; plurals=;") == nullptr); + } + ERR_PRINT_ON; + + { + PluralRules *pr = PluralRules::parse("nplurals=3; plural=(n==0 ? 0 : n==1 ? 1 : 2);"); + REQUIRE(pr != nullptr); + + CHECK(pr->get_nplurals() == 3); + CHECK(pr->get_plural() == "(n==0 ? 0 : n==1 ? 1 : 2)"); + + CHECK(pr->evaluate(0) == 0); + CHECK(pr->evaluate(1) == 1); + CHECK(pr->evaluate(2) == 2); + CHECK(pr->evaluate(3) == 2); + + memdelete(pr); + } + + { + PluralRules *pr = PluralRules::parse("nplurals=1; plural=0;"); + REQUIRE(pr != nullptr); + + CHECK(pr->get_nplurals() == 1); + CHECK(pr->get_plural() == "0"); + + CHECK(pr->evaluate(0) == 0); + CHECK(pr->evaluate(1) == 0); + CHECK(pr->evaluate(2) == 0); + CHECK(pr->evaluate(3) == 0); + + memdelete(pr); + } +} + #ifdef TOOLS_ENABLED TEST_CASE("[OptimizedTranslation] Generate from Translation and read messages") { Ref translation = memnew(Translation);