Merge pull request #76735 from AThousandShips/natural_cmp

Add `naturalcasecmp_to` function to `String`
This commit is contained in:
Rémi Verschelde 2023-05-09 17:44:37 +02:00
commit 5ade250c7d
No known key found for this signature in database
GPG key ID: C3336907360768E1
5 changed files with 143 additions and 59 deletions

View file

@ -812,15 +812,15 @@ signed char String::nocasecmp_to(const String &p_str) const {
const char32_t *this_str = get_data();
while (true) {
if (*that_str == 0 && *this_str == 0) {
return 0; //we're equal
} else if (*this_str == 0) {
return -1; //if this is empty, and the other one is not, then we're less.. I think?
} else if (*that_str == 0) {
return 1; //otherwise the other one is smaller..
} else if (_find_upper(*this_str) < _find_upper(*that_str)) { //more than
if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
return 0;
} else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
return -1;
} else if (_find_upper(*this_str) > _find_upper(*that_str)) { //less than
} else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
return 1;
} else if (_find_upper(*this_str) < _find_upper(*that_str)) { // If current character in this is less, we are less.
return -1;
} else if (_find_upper(*this_str) > _find_upper(*that_str)) { // If current character in this is greater, we are greater.
return 1;
}
@ -844,15 +844,15 @@ signed char String::casecmp_to(const String &p_str) const {
const char32_t *this_str = get_data();
while (true) {
if (*that_str == 0 && *this_str == 0) {
return 0; //we're equal
} else if (*this_str == 0) {
return -1; //if this is empty, and the other one is not, then we're less.. I think?
} else if (*that_str == 0) {
return 1; //otherwise the other one is smaller..
} else if (*this_str < *that_str) { //more than
if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
return 0;
} else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
return -1;
} else if (*this_str > *that_str) { //less than
} else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
return 1;
} else if (*this_str < *that_str) { // If current character in this is less, we are less.
return -1;
} else if (*this_str > *that_str) { // If current character in this is greater, we are greater.
return 1;
}
@ -861,6 +861,100 @@ signed char String::casecmp_to(const String &p_str) const {
}
}
static _FORCE_INLINE_ signed char natural_cmp_common(const char32_t *&r_this_str, const char32_t *&r_that_str) {
// Keep ptrs to start of numerical sequences.
const char32_t *this_substr = r_this_str;
const char32_t *that_substr = r_that_str;
// Compare lengths of both numerical sequences, ignoring leading zeros.
while (is_digit(*r_this_str)) {
r_this_str++;
}
while (is_digit(*r_that_str)) {
r_that_str++;
}
while (*this_substr == '0') {
this_substr++;
}
while (*that_substr == '0') {
that_substr++;
}
int this_len = r_this_str - this_substr;
int that_len = r_that_str - that_substr;
if (this_len < that_len) {
return -1;
} else if (this_len > that_len) {
return 1;
}
// If lengths equal, compare lexicographically.
while (this_substr != r_this_str && that_substr != r_that_str) {
if (*this_substr < *that_substr) {
return -1;
} else if (*this_substr > *that_substr) {
return 1;
}
this_substr++;
that_substr++;
}
return 0;
}
signed char String::naturalcasecmp_to(const String &p_str) const {
const char32_t *this_str = get_data();
const char32_t *that_str = p_str.get_data();
if (this_str && that_str) {
while (*this_str == '.' || *that_str == '.') {
if (*this_str++ != '.') {
return 1;
}
if (*that_str++ != '.') {
return -1;
}
if (!*that_str) {
return 1;
}
if (!*this_str) {
return -1;
}
}
while (*this_str) {
if (!*that_str) {
return 1;
} else if (is_digit(*this_str)) {
if (!is_digit(*that_str)) {
return -1;
}
signed char ret = natural_cmp_common(this_str, that_str);
if (ret) {
return ret;
}
} else if (is_digit(*that_str)) {
return 1;
} else {
if (*this_str < *that_str) { // If current character in this is less, we are less.
return -1;
} else if (*this_str > *that_str) { // If current character in this is greater, we are greater.
return 1;
}
this_str++;
that_str++;
}
}
if (*that_str) {
return -1;
}
}
return 0;
}
signed char String::naturalnocasecmp_to(const String &p_str) const {
const char32_t *this_str = get_data();
const char32_t *that_str = p_str.get_data();
@ -889,48 +983,16 @@ signed char String::naturalnocasecmp_to(const String &p_str) const {
return -1;
}
// Keep ptrs to start of numerical sequences
const char32_t *this_substr = this_str;
const char32_t *that_substr = that_str;
// Compare lengths of both numerical sequences, ignoring leading zeros
while (is_digit(*this_str)) {
this_str++;
}
while (is_digit(*that_str)) {
that_str++;
}
while (*this_substr == '0') {
this_substr++;
}
while (*that_substr == '0') {
that_substr++;
}
int this_len = this_str - this_substr;
int that_len = that_str - that_substr;
if (this_len < that_len) {
return -1;
} else if (this_len > that_len) {
return 1;
}
// If lengths equal, compare lexicographically
while (this_substr != this_str && that_substr != that_str) {
if (*this_substr < *that_substr) {
return -1;
} else if (*this_substr > *that_substr) {
return 1;
}
this_substr++;
that_substr++;
signed char ret = natural_cmp_common(this_str, that_str);
if (ret) {
return ret;
}
} else if (is_digit(*that_str)) {
return 1;
} else {
if (_find_upper(*this_str) < _find_upper(*that_str)) { //more than
if (_find_upper(*this_str) < _find_upper(*that_str)) { // If current character in this is less, we are less.
return -1;
} else if (_find_upper(*this_str) > _find_upper(*that_str)) { //less than
} else if (_find_upper(*this_str) > _find_upper(*that_str)) { // If current character in this is greater, we are greater.
return 1;
}

View file

@ -262,6 +262,7 @@ public:
signed char casecmp_to(const String &p_str) const;
signed char nocasecmp_to(const String &p_str) const;
signed char naturalcasecmp_to(const String &p_str) const;
signed char naturalnocasecmp_to(const String &p_str) const;
const char32_t *get_data() const;