Move plural rules logic into a separate class

- Extracts plural rules logic in `TranslationPO` into a new `PluralRules` class.
- Changes caching the last used plural index in `TranslationPO` into an LRU cache in `PluralRules`.
- Adds tests for `PluralRules`.
This commit is contained in:
Haoyu Qiu 2025-07-21 17:14:37 +08:00
parent 1f7630f1bf
commit ebb96e2303
5 changed files with 311 additions and 150 deletions

View file

@ -0,0 +1,167 @@
/**************************************************************************/
/* plural_rules.cpp */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#include "plural_rules.h"
#include "core/math/expression.h"
int PluralRules::_eq_test(const Array &p_input_val, const Ref<EQNode> &p_node, const Variant &p_result) const {
if (p_node.is_null()) {
return p_result;
}
static const Vector<String> input_name = { "n" };
Error err = expr->parse(p_node->regex, input_name);
ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text()));
Variant result = expr->execute(p_input_val);
ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex));
if (bool(result)) {
return _eq_test(p_input_val, p_node->left, result);
} else {
return _eq_test(p_input_val, p_node->right, result);
}
}
int PluralRules::_find_unquoted(const String &p_src, char32_t p_chr) const {
const int len = p_src.length();
if (len == 0) {
return -1;
}
const char32_t *src = p_src.get_data();
bool in_quote = false;
for (int i = 0; i < len; i++) {
if (in_quote) {
if (src[i] == ')') {
in_quote = false;
}
} else {
if (src[i] == '(') {
in_quote = true;
} else if (src[i] == p_chr) {
return i;
}
}
}
return -1;
}
void PluralRules::_cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node) {
// Some examples of p_plural_rule passed in can have the form:
// "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic)
// "n >= 2" (French) // When evaluating the last, especially careful with this one.
// "n != 1" (English)
String rule = p_plural_rule;
if (rule.begins_with("(") && rule.ends_with(")")) {
int bcount = 0;
for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) {
if (rule[i] == '(') {
bcount++;
} else if (rule[i] == ')') {
bcount--;
}
}
if (bcount == 0) {
rule = rule.substr(1, rule.length() - 2);
}
}
int first_ques_mark = _find_unquoted(rule, '?');
int first_colon = _find_unquoted(rule, ':');
if (first_ques_mark == -1) {
p_node->regex = rule.strip_edges();
return;
}
p_node->regex = rule.substr(0, first_ques_mark).strip_edges();
p_node->left.instantiate();
_cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left);
p_node->right.instantiate();
_cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right);
}
int PluralRules::evaluate(int p_n) const {
const int *cached = cache.getptr(p_n);
if (cached) {
return *cached;
}
const Array &input_val = { p_n };
int index = _eq_test(input_val, equi_tests, 0);
cache.insert(p_n, index);
return index;
}
PluralRules::PluralRules(int p_nplurals, const String &p_plural) :
nplurals(p_nplurals),
plural(p_plural) {
equi_tests.instantiate();
_cache_plural_tests(plural, equi_tests);
expr.instantiate();
}
PluralRules *PluralRules::parse(const String &p_rules) {
// `p_rules` should be in the format "nplurals=<N>; plural=<Expression>;".
const int nplurals_eq = p_rules.find_char('=');
ERR_FAIL_COND_V_MSG(nplurals_eq == -1, nullptr, "Invalid plural rules format. Missing equal sign for `nplurals`.");
const int nplurals_semi_col = p_rules.find_char(';', nplurals_eq);
ERR_FAIL_COND_V_MSG(nplurals_semi_col == -1, nullptr, "Invalid plural rules format. Missing semicolon for `nplurals`.");
const String nplurals_str = p_rules.substr(nplurals_eq + 1, nplurals_semi_col - (nplurals_eq + 1)).strip_edges();
ERR_FAIL_COND_V_MSG(!nplurals_str.is_valid_int(), nullptr, "Invalid plural rules format. `nplurals` should be an integer.");
const int nplurals = nplurals_str.to_int();
ERR_FAIL_COND_V_MSG(nplurals < 1, nullptr, "Invalid plural rules format. `nplurals` should be at least 1.");
const int expression_eq = p_rules.find_char('=', nplurals_semi_col + 1);
ERR_FAIL_COND_V_MSG(expression_eq == -1, nullptr, "Invalid plural rules format. Missing equal sign for `plural`.");
int expression_end = p_rules.rfind_char(';');
if (expression_end == -1) {
WARN_PRINT("Invalid plural rules format. Missing semicolon at the end of `plural` expression. Assuming ends at the end of the string.");
expression_end = p_rules.length();
}
const int expression_start = expression_eq + 1;
ERR_FAIL_COND_V_MSG(expression_end <= expression_start, nullptr, "Invalid plural rules format. `plural` expression is empty.");
const String &plural = p_rules.substr(expression_start, expression_end - expression_start).strip_edges();
return memnew(PluralRules(nplurals, plural));
}

View file

@ -0,0 +1,72 @@
/**************************************************************************/
/* plural_rules.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#include "core/object/ref_counted.h"
#include "core/templates/lru.h"
class Expression;
class PluralRules : public Object {
GDSOFTCLASS(PluralRules, Object);
mutable LRUCache<int, int> cache;
// These two fields are initialized in the constructor.
const int nplurals;
const String plural;
// Cache temporary variables related to `evaluate()` to make it faster.
class EQNode : public RefCounted {
GDSOFTCLASS(EQNode, RefCounted);
public:
String regex;
Ref<EQNode> left;
Ref<EQNode> right;
};
Ref<EQNode> equi_tests;
Ref<Expression> expr;
int _find_unquoted(const String &p_src, char32_t p_chr) const;
int _eq_test(const Array &p_input_val, const Ref<EQNode> &p_node, const Variant &p_result) const;
void _cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node);
PluralRules(int p_nplurals, const String &p_plural);
public:
int evaluate(int p_n) const;
int get_nplurals() const { return nplurals; }
String get_plural() const { return plural; }
static PluralRules *parse(const String &p_rules);
};

View file

@ -30,6 +30,8 @@
#include "translation_po.h" #include "translation_po.h"
#include "core/string/plural_rules.h"
#ifdef DEBUG_TRANSLATION_PO #ifdef DEBUG_TRANSLATION_PO
#include "core/io/file_access.h" #include "core/io/file_access.h"
@ -129,112 +131,11 @@ Vector<String> TranslationPO::_get_message_list() const {
return v; return v;
} }
int TranslationPO::_get_plural_index(int p_n) const {
// Get a number between [0;number of plural forms).
input_val.clear();
input_val.push_back(p_n);
return _eq_test(equi_tests, 0);
}
int TranslationPO::_eq_test(const Ref<EQNode> &p_node, const Variant &p_result) const {
if (p_node.is_valid()) {
Error err = expr->parse(p_node->regex, input_name);
ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text()));
Variant result = expr->execute(input_val);
ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex));
if (bool(result)) {
return _eq_test(p_node->left, result);
} else {
return _eq_test(p_node->right, result);
}
} else {
return p_result;
}
}
int TranslationPO::_find_unquoted(const String &p_src, char32_t p_chr) const {
const int len = p_src.length();
if (len == 0) {
return -1;
}
const char32_t *src = p_src.get_data();
bool in_quote = false;
for (int i = 0; i < len; i++) {
if (in_quote) {
if (src[i] == ')') {
in_quote = false;
}
} else {
if (src[i] == '(') {
in_quote = true;
} else if (src[i] == p_chr) {
return i;
}
}
}
return -1;
}
void TranslationPO::_cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node) {
// Some examples of p_plural_rule passed in can have the form:
// "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic)
// "n >= 2" (French) // When evaluating the last, especially careful with this one.
// "n != 1" (English)
String rule = p_plural_rule;
if (rule.begins_with("(") && rule.ends_with(")")) {
int bcount = 0;
for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) {
if (rule[i] == '(') {
bcount++;
} else if (rule[i] == ')') {
bcount--;
}
}
if (bcount == 0) {
rule = rule.substr(1, rule.length() - 2);
}
}
int first_ques_mark = _find_unquoted(rule, '?');
int first_colon = _find_unquoted(rule, ':');
if (first_ques_mark == -1) {
p_node->regex = rule.strip_edges();
return;
}
p_node->regex = rule.substr(0, first_ques_mark).strip_edges();
p_node->left.instantiate();
_cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left);
p_node->right.instantiate();
_cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right);
}
void TranslationPO::set_plural_rule(const String &p_plural_rule) { void TranslationPO::set_plural_rule(const String &p_plural_rule) {
// Set plural_forms and plural_rule. if (plural_rules) {
// p_plural_rule passed in has the form "Plural-Forms: nplurals=2; plural=(n >= 2);". memdelete(plural_rules);
}
int first_semi_col = p_plural_rule.find_char(';'); plural_rules = PluralRules::parse(p_plural_rule);
plural_forms = p_plural_rule.substr(p_plural_rule.find_char('=') + 1, first_semi_col - (p_plural_rule.find_char('=') + 1)).to_int();
int expression_start = p_plural_rule.find_char('=', first_semi_col) + 1;
int second_semi_col = p_plural_rule.rfind_char(';');
plural_rule = p_plural_rule.substr(expression_start, second_semi_col - expression_start).strip_edges();
// Setup the cache to make evaluating plural rule faster later on.
equi_tests.instantiate();
_cache_plural_tests(plural_rule, equi_tests);
expr.instantiate();
input_name.push_back("n");
} }
void TranslationPO::add_message(const StringName &p_src_text, const StringName &p_xlated_text, const StringName &p_context) { void TranslationPO::add_message(const StringName &p_src_text, const StringName &p_xlated_text, const StringName &p_context) {
@ -249,7 +150,8 @@ void TranslationPO::add_message(const StringName &p_src_text, const StringName &
} }
void TranslationPO::add_plural_message(const StringName &p_src_text, const Vector<String> &p_plural_xlated_texts, const StringName &p_context) { void TranslationPO::add_plural_message(const StringName &p_src_text, const Vector<String> &p_plural_xlated_texts, const StringName &p_context) {
ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_forms, vformat("Trying to add plural texts that don't match the required number of plural forms for locale \"%s\".", get_locale())); ERR_FAIL_NULL_MSG(plural_rules, "Plural rules are not set. Please call set_plural_rule() before calling add_plural_message().");
ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_rules->get_nplurals(), vformat("Trying to add plural texts that don't match the required number of plural forms for locale \"%s\".", get_locale()));
HashMap<StringName, Vector<StringName>> &map_id_str = translation_map[p_context]; HashMap<StringName, Vector<StringName>> &map_id_str = translation_map[p_context];
@ -264,11 +166,11 @@ void TranslationPO::add_plural_message(const StringName &p_src_text, const Vecto
} }
int TranslationPO::get_plural_forms() const { int TranslationPO::get_plural_forms() const {
return plural_forms; return plural_rules ? plural_rules->get_nplurals() : 0;
} }
String TranslationPO::get_plural_rule() const { String TranslationPO::get_plural_rule() const {
return plural_rule; return plural_rules ? plural_rules->get_plural() : String();
} }
StringName TranslationPO::get_message(const StringName &p_src_text, const StringName &p_context) const { StringName TranslationPO::get_message(const StringName &p_src_text, const StringName &p_context) const {
@ -282,27 +184,16 @@ StringName TranslationPO::get_message(const StringName &p_src_text, const String
StringName TranslationPO::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const { StringName TranslationPO::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const {
ERR_FAIL_COND_V_MSG(p_n < 0, StringName(), "N passed into translation to get a plural message should not be negative. For negative numbers, use singular translation please. Search \"gettext PO Plural Forms\" online for the documentation on translating negative numbers."); ERR_FAIL_COND_V_MSG(p_n < 0, StringName(), "N passed into translation to get a plural message should not be negative. For negative numbers, use singular translation please. Search \"gettext PO Plural Forms\" online for the documentation on translating negative numbers.");
ERR_FAIL_NULL_V_MSG(plural_rules, StringName(), "Plural rules are not set. Please call set_plural_rule() before calling get_plural_message().");
// If the query is the same as last time, return the cached result.
if (p_n == last_plural_n && p_context == last_plural_context && p_src_text == last_plural_key) {
return translation_map[p_context][p_src_text][last_plural_mapped_index];
}
if (!translation_map.has(p_context) || !translation_map[p_context].has(p_src_text)) { if (!translation_map.has(p_context) || !translation_map[p_context].has(p_src_text)) {
return StringName(); return StringName();
} }
ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), vformat("Source text \"%s\" is registered but doesn't have a translation. Please report this bug.", String(p_src_text))); ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), vformat("Source text \"%s\" is registered but doesn't have a translation. Please report this bug.", String(p_src_text)));
int plural_index = _get_plural_index(p_n); int plural_index = plural_rules->evaluate(p_n);
ERR_FAIL_COND_V_MSG(plural_index < 0 || translation_map[p_context][p_src_text].size() < plural_index + 1, StringName(), "Plural index returned or number of plural translations is not valid. Please report this bug."); ERR_FAIL_COND_V_MSG(plural_index < 0 || translation_map[p_context][p_src_text].size() < plural_index + 1, StringName(), "Plural index returned or number of plural translations is not valid. Please report this bug.");
// Cache result so that if the next entry is the same, we can return directly.
// _get_plural_index(p_n) can get very costly, especially when evaluating long plural-rule (Arabic)
last_plural_key = p_src_text;
last_plural_context = p_context;
last_plural_n = p_n;
last_plural_mapped_index = plural_index;
return translation_map[p_context][p_src_text][plural_index]; return translation_map[p_context][p_src_text][plural_index];
} }
@ -343,3 +234,10 @@ void TranslationPO::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_plural_forms"), &TranslationPO::get_plural_forms); ClassDB::bind_method(D_METHOD("get_plural_forms"), &TranslationPO::get_plural_forms);
ClassDB::bind_method(D_METHOD("get_plural_rule"), &TranslationPO::get_plural_rule); ClassDB::bind_method(D_METHOD("get_plural_rule"), &TranslationPO::get_plural_rule);
} }
TranslationPO::~TranslationPO() {
if (plural_rules) {
memdelete(plural_rules);
plural_rules = nullptr;
}
}

View file

@ -32,9 +32,10 @@
//#define DEBUG_TRANSLATION_PO //#define DEBUG_TRANSLATION_PO
#include "core/math/expression.h"
#include "core/string/translation.h" #include "core/string/translation.h"
class PluralRules;
class TranslationPO : public Translation { class TranslationPO : public Translation {
GDCLASS(TranslationPO, Translation); GDCLASS(TranslationPO, Translation);
@ -45,33 +46,7 @@ class TranslationPO : public Translation {
// Strings without context have "" as first key. // Strings without context have "" as first key.
HashMap<StringName, HashMap<StringName, Vector<StringName>>> translation_map; HashMap<StringName, HashMap<StringName, Vector<StringName>>> translation_map;
int plural_forms = 0; // 0 means no "Plural-Forms" is given in the PO header file. The min for all languages is 1. PluralRules *plural_rules = nullptr;
String plural_rule;
// Cache temporary variables related to _get_plural_index() to make it faster
class EQNode : public RefCounted {
GDSOFTCLASS(EQNode, RefCounted);
public:
String regex;
Ref<EQNode> left;
Ref<EQNode> right;
};
Ref<EQNode> equi_tests;
int _find_unquoted(const String &p_src, char32_t p_chr) const;
int _eq_test(const Ref<EQNode> &p_node, const Variant &p_result) const;
Vector<String> input_name;
mutable Ref<Expression> expr;
mutable Array input_val;
mutable StringName last_plural_key;
mutable StringName last_plural_context;
mutable int last_plural_n = -1; // Set it to an impossible value at the beginning.
mutable int last_plural_mapped_index = 0;
void _cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node);
int _get_plural_index(int p_n) const;
Vector<String> _get_message_list() const override; Vector<String> _get_message_list() const override;
Dictionary _get_messages() const override; Dictionary _get_messages() const override;
@ -98,5 +73,5 @@ public:
void print_translation_map(); void print_translation_map();
#endif #endif
TranslationPO() {} ~TranslationPO();
}; };

View file

@ -31,6 +31,7 @@
#pragma once #pragma once
#include "core/string/optimized_translation.h" #include "core/string/optimized_translation.h"
#include "core/string/plural_rules.h"
#include "core/string/translation.h" #include "core/string/translation.h"
#include "core/string/translation_po.h" #include "core/string/translation_po.h"
#include "core/string/translation_server.h" #include "core/string/translation_server.h"
@ -129,6 +130,54 @@ TEST_CASE("[TranslationPO] Plural messages") {
CHECK(vformat(translation->get_plural_message("There are %d apples", "", 2), 2) == "Il y a 2 pommes"); CHECK(vformat(translation->get_plural_message("There are %d apples", "", 2), 2) == "Il y a 2 pommes");
} }
TEST_CASE("[TranslationPO] Plural rules parsing") {
ERR_PRINT_OFF;
{
CHECK(PluralRules::parse("") == nullptr);
CHECK(PluralRules::parse("plurals=(n != 1);") == nullptr);
CHECK(PluralRules::parse("nplurals; plurals=(n != 1);") == nullptr);
CHECK(PluralRules::parse("nplurals=; plurals=(n != 1);") == nullptr);
CHECK(PluralRules::parse("nplurals=0; plurals=(n != 1);") == nullptr);
CHECK(PluralRules::parse("nplurals=-1; plurals=(n != 1);") == nullptr);
CHECK(PluralRules::parse("nplurals=2;") == nullptr);
CHECK(PluralRules::parse("nplurals=2; plurals;") == nullptr);
CHECK(PluralRules::parse("nplurals=2; plurals=;") == nullptr);
}
ERR_PRINT_ON;
{
PluralRules *pr = PluralRules::parse("nplurals=3; plural=(n==0 ? 0 : n==1 ? 1 : 2);");
REQUIRE(pr != nullptr);
CHECK(pr->get_nplurals() == 3);
CHECK(pr->get_plural() == "(n==0 ? 0 : n==1 ? 1 : 2)");
CHECK(pr->evaluate(0) == 0);
CHECK(pr->evaluate(1) == 1);
CHECK(pr->evaluate(2) == 2);
CHECK(pr->evaluate(3) == 2);
memdelete(pr);
}
{
PluralRules *pr = PluralRules::parse("nplurals=1; plural=0;");
REQUIRE(pr != nullptr);
CHECK(pr->get_nplurals() == 1);
CHECK(pr->get_plural() == "0");
CHECK(pr->evaluate(0) == 0);
CHECK(pr->evaluate(1) == 0);
CHECK(pr->evaluate(2) == 0);
CHECK(pr->evaluate(3) == 0);
memdelete(pr);
}
}
#ifdef TOOLS_ENABLED #ifdef TOOLS_ENABLED
TEST_CASE("[OptimizedTranslation] Generate from Translation and read messages") { TEST_CASE("[OptimizedTranslation] Generate from Translation and read messages") {
Ref<Translation> translation = memnew(Translation); Ref<Translation> translation = memnew(Translation);