ICU4C: Update to version 77.1

This commit is contained in:
Pāvels Nadtočajevs 2025-03-13 23:09:36 +02:00
parent 7e4f6bdb59
commit a25696fd90
No known key found for this signature in database
GPG key ID: 8413210218EF35D2
50 changed files with 2086 additions and 1668 deletions

View file

@ -420,13 +420,13 @@ Files extracted from upstream source:
## icu4c ## icu4c
- Upstream: https://github.com/unicode-org/icu - Upstream: https://github.com/unicode-org/icu
- Version: 76.1 (8eca245c7484ac6cc179e3e5f7c1ea7680810f39, 2024) - Version: 77.1 (457157a92aa053e632cc7fcfd0e12f8a943b2d11, 2025)
- License: Unicode - License: Unicode
Files extracted from upstream source: Files extracted from upstream source:
- The `common` folder - The `common` folder
- `scriptset.*`, `ucln_in.*`, `uspoof.cpp"` and `uspoof_impl.cpp` from the `i18n` folder - `scriptset.*`, `ucln_in.*`, `uspoof.cpp` and `uspoof_impl.*` from the `i18n` folder
- `uspoof.h` from the `i18n/unicode` folder - `uspoof.h` from the `i18n/unicode` folder
- `LICENSE` - `LICENSE`

View file

@ -2,7 +2,7 @@ UNICODE LICENSE V3
COPYRIGHT AND PERMISSION NOTICE COPYRIGHT AND PERMISSION NOTICE
Copyright © 2016-2024 Unicode, Inc. Copyright © 2016-2025 Unicode, Inc.
NOTICE TO USER: Carefully read the following legal agreement. BY NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR

View file

@ -59,7 +59,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
{ {
char fnbuff[256]; char fnbuff[256];
char ext[4]={'\0'}; char ext[4]={'\0'};
CharString actualLocale; CharString actual;
int32_t size; int32_t size;
const char16_t* brkfname = nullptr; const char16_t* brkfname = nullptr;
UResourceBundle brkRulesStack; UResourceBundle brkRulesStack;
@ -94,7 +94,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
// Use the string if we found it // Use the string if we found it
if (U_SUCCESS(status) && brkfname) { if (U_SUCCESS(status) && brkfname) {
actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); actual.append(ures_getLocaleInternal(brkName, &status), -1, status);
char16_t* extStart=u_strchr(brkfname, 0x002e); char16_t* extStart=u_strchr(brkfname, 0x002e);
int len = 0; int len = 0;
@ -123,10 +123,9 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
if (U_SUCCESS(status) && result != nullptr) { if (U_SUCCESS(status) && result != nullptr) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result); U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data()); actual.data(), status);
uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY); LocaleBased::setLocaleID(loc.getName(), result->requestLocale, status);
result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
} }
ures_close(b); ures_close(b);
@ -206,26 +205,32 @@ BreakIterator::getAvailableLocales(int32_t& count)
BreakIterator::BreakIterator() BreakIterator::BreakIterator()
{ {
*validLocale = *actualLocale = *requestLocale = 0;
} }
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); UErrorCode status = U_ZERO_ERROR;
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); U_LOCALE_BASED(locBased, *this);
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
U_ASSERT(U_SUCCESS(status));
} }
BreakIterator &BreakIterator::operator =(const BreakIterator &other) { BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) { if (this != &other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); UErrorCode status = U_ZERO_ERROR;
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); U_LOCALE_BASED(locBased, *this);
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
U_ASSERT(U_SUCCESS(status));
} }
return *this; return *this;
} }
BreakIterator::~BreakIterator() BreakIterator::~BreakIterator()
{ {
delete validLocale;
delete actualLocale;
delete requestLocale;
} }
// ------------------------------------------ // ------------------------------------------
@ -394,7 +399,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
// revisit this in ICU 3.0 and clean it up/fix it/remove it. // revisit this in ICU 3.0 and clean it up/fix it/remove it.
if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) { if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
U_LOCALE_BASED(locBased, *result); U_LOCALE_BASED(locBased, *result);
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status);
} }
return result; return result;
} }
@ -488,6 +493,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
} }
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
delete result;
return nullptr; return nullptr;
} }
@ -496,20 +502,25 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
Locale Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) { if (U_FAILURE(status)) {
return {requestLocale}; return Locale::getRoot();
} }
U_LOCALE_BASED(locBased, *this); if (type == ULOC_REQUESTED_LOCALE) {
return locBased.getLocale(type, status); return requestLocale == nullptr ?
Locale::getRoot() : Locale(requestLocale->data());
}
return LocaleBased::getLocale(validLocale, actualLocale, type, status);
} }
const char * const char *
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) { if (U_FAILURE(status)) {
return requestLocale; return nullptr;
} }
U_LOCALE_BASED(locBased, *this); if (type == ULOC_REQUESTED_LOCALE) {
return locBased.getLocaleID(type, status); return requestLocale == nullptr ? "" : requestLocale->data();
}
return LocaleBased::getLocaleID(validLocale, actualLocale, type, status);
} }
@ -536,8 +547,10 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
} }
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, (*this)); U_LOCALE_BASED(locBased, (*this));
locBased.setLocaleIDs(valid, actual); locBased.setLocaleIDs(valid.getName(), actual.getName(), status);
U_ASSERT(U_SUCCESS(status));
} }
U_NAMESPACE_END U_NAMESPACE_END

View file

@ -70,6 +70,15 @@ CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
return *this; return *this;
} }
CharString &CharString::copyFrom(StringPiece s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return *this;
}
len = 0;
append(s, errorCode);
return *this;
}
int32_t CharString::lastIndexOf(char c) const { int32_t CharString::lastIndexOf(char c) const {
for(int32_t i=len; i>0;) { for(int32_t i=len; i>0;) {
if(buffer[--i]==c) { if(buffer[--i]==c) {
@ -143,7 +152,7 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
return *this; return *this;
} }
CharString &CharString::appendNumber(int32_t number, UErrorCode &status) { CharString &CharString::appendNumber(int64_t number, UErrorCode &status) {
if (number < 0) { if (number < 0) {
this->append('-', status); this->append('-', status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {

View file

@ -74,6 +74,7 @@ public:
* use a UErrorCode where memory allocations might be needed. * use a UErrorCode where memory allocations might be needed.
*/ */
CharString &copyFrom(const CharString &other, UErrorCode &errorCode); CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
CharString &copyFrom(StringPiece s, UErrorCode &errorCode);
UBool isEmpty() const { return len==0; } UBool isEmpty() const { return len==0; }
int32_t length() const { return len; } int32_t length() const { return len; }
@ -135,7 +136,7 @@ public:
} }
CharString &append(const char *s, int32_t sLength, UErrorCode &status); CharString &append(const char *s, int32_t sLength, UErrorCode &status);
CharString &appendNumber(int32_t number, UErrorCode &status); CharString &appendNumber(int64_t number, UErrorCode &status);
/** /**
* Returns a writable buffer for appending and writes the buffer's capacity to * Returns a writable buffer for appending and writes the buffer's capacity to

File diff suppressed because it is too large Load diff

View file

@ -12,44 +12,84 @@
*/ */
#include "locbased.h" #include "locbased.h"
#include "cstring.h" #include "cstring.h"
#include "charstr.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const { Locale LocaleBased::getLocale(const CharString* valid, const CharString* actual,
const char* id = getLocaleID(type, status); ULocDataLocaleType type, UErrorCode& status) {
const char* id = getLocaleID(valid, actual, type, status);
return Locale(id != nullptr ? id : ""); return Locale(id != nullptr ? id : "");
} }
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { const char* LocaleBased::getLocaleID(const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status) {
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return nullptr; return nullptr;
} }
switch(type) { switch(type) {
case ULOC_VALID_LOCALE: case ULOC_VALID_LOCALE:
return valid; return valid == nullptr ? "" : valid->data();
case ULOC_ACTUAL_LOCALE: case ULOC_ACTUAL_LOCALE:
return actual; return actual == nullptr ? "" : actual->data();
default: default:
status = U_ILLEGAL_ARGUMENT_ERROR; status = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr; return nullptr;
} }
} }
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) { void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) {
if (validID != nullptr) { setValidLocaleID(validID, status);
uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY); setActualLocaleID(actualID,status);
valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate }
} void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) {
if (actualID != nullptr) { setValidLocaleID(validID, status);
uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY); setActualLocaleID(actualID,status);
actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate }
void LocaleBased::setLocaleID(const char* id, CharString*& dest, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
if (id == nullptr || *id == 0) {
delete dest;
dest = nullptr;
} else {
if (dest == nullptr) {
dest = new CharString(id, status);
if (dest == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
dest->copyFrom(id, status);
}
} }
} }
void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) { void LocaleBased::setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status) {
uprv_strcpy(valid, validID.getName()); if (U_FAILURE(status)) { return; }
uprv_strcpy(actual, actualID.getName()); if (id == nullptr || id->isEmpty()) {
delete dest;
dest = nullptr;
} else {
if (dest == nullptr) {
dest = new CharString(*id, status);
if (dest == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
dest->copyFrom(*id, status);
}
}
}
bool LocaleBased::equalIDs(const CharString* left, const CharString* right) {
// true if both are nullptr
if (left == nullptr && right == nullptr) return true;
// false if only one is nullptr
if (left == nullptr || right == nullptr) return false;
return *left == *right;
} }
U_NAMESPACE_END U_NAMESPACE_END

View file

@ -19,13 +19,14 @@
/** /**
* Macro to declare a locale LocaleBased wrapper object for the given * Macro to declare a locale LocaleBased wrapper object for the given
* object, which must have two members named `validLocale' and * object, which must have two members named `validLocale' and
* `actualLocale' of size ULOC_FULLNAME_CAPACITY * `actualLocale' of which are pointers to the internal icu::CharString.
*/ */
#define U_LOCALE_BASED(varname, objname) \ #define U_LOCALE_BASED(varname, objname) \
LocaleBased varname((objname).validLocale, (objname).actualLocale) LocaleBased varname((objname).validLocale, (objname).actualLocale)
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
class CharString;
/** /**
* A utility class that unifies the implementation of getLocale() by * A utility class that unifies the implementation of getLocale() by
* various ICU services. This class is likely to be removed in the * various ICU services. This class is likely to be removed in the
@ -41,33 +42,35 @@ class U_COMMON_API LocaleBased : public UMemory {
* Construct a LocaleBased wrapper around the two pointers. These * Construct a LocaleBased wrapper around the two pointers. These
* will be aliased for the lifetime of this object. * will be aliased for the lifetime of this object.
*/ */
inline LocaleBased(char* validAlias, char* actualAlias); inline LocaleBased(CharString*& validAlias, CharString*& actualAlias);
/**
* Construct a LocaleBased wrapper around the two const pointers.
* These will be aliased for the lifetime of this object.
*/
inline LocaleBased(const char* validAlias, const char* actualAlias);
/** /**
* Return locale meta-data for the service object wrapped by this * Return locale meta-data for the service object wrapped by this
* object. Either the valid or the actual locale may be * object. Either the valid or the actual locale may be
* retrieved. * retrieved.
* @param valid The valid locale.
* @param actual The actual locale.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code * @param status input-output error code
* @return the indicated locale * @return the indicated locale
*/ */
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; static Locale getLocale(
const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status);
/** /**
* Return the locale ID for the service object wrapped by this * Return the locale ID for the service object wrapped by this
* object. Either the valid or the actual locale may be * object. Either the valid or the actual locale may be
* retrieved. * retrieved.
* @param valid The valid locale.
* @param actual The actual locale.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code * @param status input-output error code
* @return the indicated locale ID * @return the indicated locale ID
*/ */
const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; static const char* getLocaleID(
const CharString* valid, const CharString* actual,
ULocDataLocaleType type, UErrorCode& status);
/** /**
* Set the locale meta-data for the service object wrapped by this * Set the locale meta-data for the service object wrapped by this
@ -75,31 +78,40 @@ class U_COMMON_API LocaleBased : public UMemory {
* @param valid the ID of the valid locale * @param valid the ID of the valid locale
* @param actual the ID of the actual locale * @param actual the ID of the actual locale
*/ */
void setLocaleIDs(const char* valid, const char* actual); void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status);
void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status);
/** static void setLocaleID(const char* id, CharString*& dest, UErrorCode& status);
* Set the locale meta-data for the service object wrapped by this static void setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status);
* object.
* @param valid the ID of the valid locale static bool equalIDs(const CharString* left, const CharString* right);
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const Locale& valid, const Locale& actual);
private: private:
char* valid; void setValidLocaleID(const CharString* id, UErrorCode& status);
void setActualLocaleID(const CharString* id, UErrorCode& status);
char* actual; void setValidLocaleID(const char* id, UErrorCode& status);
void setActualLocaleID(const char* id, UErrorCode& status);
CharString*& valid;
CharString*& actual;
}; };
inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) : inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) :
valid(validAlias), actual(actualAlias) { valid(validAlias), actual(actualAlias) {
} }
inline LocaleBased::LocaleBased(const char* validAlias, inline void LocaleBased::setValidLocaleID(const CharString* id, UErrorCode& status) {
const char* actualAlias) : setLocaleID(id, valid, status);
// ugh: cast away const }
valid(const_cast<char*>(validAlias)), actual(const_cast<char*>(actualAlias)) { inline void LocaleBased::setActualLocaleID(const CharString* id, UErrorCode& status) {
setLocaleID(id, actual, status);
}
inline void LocaleBased::setValidLocaleID(const char* id, UErrorCode& status) {
setLocaleID(id, valid, status);
}
inline void LocaleBased::setActualLocaleID(const char* id, UErrorCode& status) {
setLocaleID(id, actual, status);
} }
U_NAMESPACE_END U_NAMESPACE_END

View file

@ -19,6 +19,8 @@
* that then do not depend on resource bundle code and display name data. * that then do not depend on resource bundle code and display name data.
*/ */
#include <string_view>
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/locid.h" #include "unicode/locid.h"
@ -359,7 +361,7 @@ _getStringOrCopyKey(const char *path, const char *locale,
return u_terminateUChars(dest, destCapacity, length, &errorCode); return u_terminateUChars(dest, destCapacity, length, &errorCode);
} }
using UDisplayNameGetter = icu::CharString(const char*, UErrorCode&); using UDisplayNameGetter = icu::CharString(std::string_view, UErrorCode&);
int32_t int32_t
_getDisplayNameForComponent(const char *locale, _getDisplayNameForComponent(const char *locale,
@ -377,6 +379,10 @@ _getDisplayNameForComponent(const char *locale,
return 0; return 0;
} }
if (locale == nullptr) {
locale = uloc_getDefault();
}
localStatus = U_ZERO_ERROR; localStatus = U_ZERO_ERROR;
icu::CharString localeBuffer = (*getter)(locale, localStatus); icu::CharString localeBuffer = (*getter)(locale, localStatus);
if (U_FAILURE(localStatus)) { if (U_FAILURE(localStatus)) {

View file

@ -1828,8 +1828,13 @@ ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
/*This function initializes a Locale from a C locale ID*/
Locale& Locale::init(const char* localeID, UBool canonicalize) Locale& Locale::init(const char* localeID, UBool canonicalize)
{
return localeID == nullptr ? *this = getDefault() : init(StringPiece{localeID}, canonicalize);
}
/*This function initializes a Locale from a C locale ID*/
Locale& Locale::init(StringPiece localeID, UBool canonicalize)
{ {
fIsBogus = false; fIsBogus = false;
/* Free our current storage */ /* Free our current storage */
@ -1854,19 +1859,28 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
int32_t length; int32_t length;
UErrorCode err; UErrorCode err;
if(localeID == nullptr) {
// not an error, just set the default locale
return *this = getDefault();
}
/* preset all fields to empty */ /* preset all fields to empty */
language[0] = script[0] = country[0] = 0; language[0] = script[0] = country[0] = 0;
const auto parse = [canonicalize](std::string_view localeID,
char* name,
int32_t nameCapacity,
UErrorCode& status) {
return ByteSinkUtil::viaByteSinkToTerminatedChars(
name, nameCapacity,
[&](ByteSink& sink, UErrorCode& status) {
if (canonicalize) {
ulocimp_canonicalize(localeID, sink, status);
} else {
ulocimp_getName(localeID, sink, status);
}
},
status);
};
// "canonicalize" the locale ID to ICU/Java format // "canonicalize" the locale ID to ICU/Java format
err = U_ZERO_ERROR; err = U_ZERO_ERROR;
length = canonicalize ? length = parse(localeID, fullName, sizeof fullNameBuffer, err);
uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) { if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
U_ASSERT(baseName == nullptr); U_ASSERT(baseName == nullptr);
@ -1877,9 +1891,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
} }
fullName = newFullName; fullName = newFullName;
err = U_ZERO_ERROR; err = U_ZERO_ERROR;
length = canonicalize ? length = parse(localeID, fullName, length + 1, err);
uloc_canonicalize(localeID, fullName, length+1, &err) :
uloc_getName(localeID, fullName, length+1, &err);
} }
if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) { if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
/* should never occur */ /* should never occur */
@ -2200,6 +2212,13 @@ Locale::createFromName (const char *name)
} }
} }
Locale U_EXPORT2
Locale::createFromName(StringPiece name) {
Locale loc("");
loc.init(name, false);
return loc;
}
Locale U_EXPORT2 Locale U_EXPORT2
Locale::createCanonical(const char* name) { Locale::createCanonical(const char* name) {
Locale loc(""); Locale loc("");

View file

@ -300,6 +300,9 @@ ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink, icu::ByteSink& sink,
UErrorCode& status) { UErrorCode& status) {
if (U_FAILURE(status)) { return; } if (U_FAILURE(status)) { return; }
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status); icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
_uloc_addLikelySubtags(localeBuffer.data(), sink, status); _uloc_addLikelySubtags(localeBuffer.data(), sink, status);
} }
@ -334,6 +337,9 @@ ulocimp_minimizeSubtags(const char* localeID,
bool favorScript, bool favorScript,
UErrorCode& status) { UErrorCode& status) {
if (U_FAILURE(status)) { return; } if (U_FAILURE(status)) { return; }
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status); icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
_uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status); _uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status);
} }
@ -349,7 +355,9 @@ uloc_isRightToLeft(const char *locale) {
UErrorCode errorCode = U_ZERO_ERROR; UErrorCode errorCode = U_ZERO_ERROR;
icu::CharString lang; icu::CharString lang;
icu::CharString script; icu::CharString script;
ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, errorCode); ulocimp_getSubtags(
locale == nullptr ? uloc_getDefault() : locale,
&lang, &script, nullptr, nullptr, nullptr, errorCode);
if (U_FAILURE(errorCode) || script.isEmpty()) { if (U_FAILURE(errorCode) || script.isEmpty()) {
// Fastpath: We know the likely scripts and their writing direction // Fastpath: We know the likely scripts and their writing direction
// for some common languages. // for some common languages.
@ -369,7 +377,7 @@ uloc_isRightToLeft(const char *locale) {
if (U_FAILURE(errorCode)) { if (U_FAILURE(errorCode)) {
return false; return false;
} }
ulocimp_getSubtags(likely.data(), nullptr, &script, nullptr, nullptr, nullptr, errorCode); ulocimp_getSubtags(likely.toStringPiece(), nullptr, &script, nullptr, nullptr, nullptr, errorCode);
if (U_FAILURE(errorCode) || script.isEmpty()) { if (U_FAILURE(errorCode) || script.isEmpty()) {
return false; return false;
} }
@ -430,7 +438,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
icu::CharString rgBuf = GetRegionFromKey(localeID, "rg", status); icu::CharString rgBuf = GetRegionFromKey(localeID, "rg", status);
if (U_SUCCESS(status) && rgBuf.isEmpty()) { if (U_SUCCESS(status) && rgBuf.isEmpty()) {
// No valid rg keyword value, try for unicode_region_subtag // No valid rg keyword value, try for unicode_region_subtag
rgBuf = ulocimp_getRegion(localeID, status); rgBuf = ulocimp_getRegion(localeID == nullptr ? uloc_getDefault() : localeID, status);
if (U_SUCCESS(status) && rgBuf.isEmpty() && inferRegion) { if (U_SUCCESS(status) && rgBuf.isEmpty() && inferRegion) {
// Second check for sd keyword value // Second check for sd keyword value
rgBuf = GetRegionFromKey(localeID, "sd", status); rgBuf = GetRegionFromKey(localeID, "sd", status);
@ -439,7 +447,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
UErrorCode rgStatus = U_ZERO_ERROR; UErrorCode rgStatus = U_ZERO_ERROR;
icu::CharString locBuf = ulocimp_addLikelySubtags(localeID, rgStatus); icu::CharString locBuf = ulocimp_addLikelySubtags(localeID, rgStatus);
if (U_SUCCESS(rgStatus)) { if (U_SUCCESS(rgStatus)) {
rgBuf = ulocimp_getRegion(locBuf.data(), status); rgBuf = ulocimp_getRegion(locBuf.toStringPiece(), status);
} }
} }
} }

View file

@ -527,7 +527,7 @@ LSR LikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
return {}; return {};
} }
const char *name = locale.getName(); const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") if (!returnInputIfUnmatch && uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
// Private use language tag x-subtag-subtag... which CLDR changes to // Private use language tag x-subtag-subtag... which CLDR changes to
// und-x-subtag-subtag... // und-x-subtag-subtag...
return LSR(name, "", "", LSR::EXPLICIT_LSR); return LSR(name, "", "", LSR::EXPLICIT_LSR);

View file

@ -161,6 +161,9 @@ _uloc_getOrientationHelper(const char* localeId,
if (U_FAILURE(status)) { return result; } if (U_FAILURE(status)) { return result; }
if (localeId == nullptr) {
localeId = uloc_getDefault();
}
icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status); icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status);
if (U_FAILURE(status)) { return result; } if (U_FAILURE(status)) { return result; }

View file

@ -193,7 +193,7 @@ u_strToPunycode(const char16_t *src, int32_t srcLength,
return 0; return 0;
} }
if(src==nullptr || srcLength<-1 || (dest==nullptr && destCapacity!=0)) { if(src==nullptr || srcLength<-1 || destCapacity<0 || (dest==nullptr && destCapacity!=0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }

View file

@ -76,7 +76,7 @@
#include <float.h> #include <float.h>
#ifndef U_COMMON_IMPLEMENTATION #ifndef U_COMMON_IMPLEMENTATION
#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html
#endif #endif

View file

@ -47,7 +47,10 @@ static int gLastSerial = 0;
// Constructor. Just set the fields to reasonable default values. // Constructor. Just set the fields to reasonable default values.
// //
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
RBBINode::RBBINode(NodeType t) : UMemory() { RBBINode::RBBINode(NodeType t, UErrorCode& status) : UMemory() {
if (U_FAILURE(status)) {
return;
}
#ifdef RBBI_DEBUG #ifdef RBBI_DEBUG
fSerialNum = ++gLastSerial; fSerialNum = ++gLastSerial;
#endif #endif
@ -65,10 +68,13 @@ RBBINode::RBBINode(NodeType t) : UMemory() {
fVal = 0; fVal = 0;
fPrecedence = precZero; fPrecedence = precZero;
UErrorCode status = U_ZERO_ERROR; fFirstPosSet = new UVector(status);
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
fLastPosSet = new UVector(status); fLastPosSet = new UVector(status);
fFollowPos = new UVector(status); fFollowPos = new UVector(status);
if (U_SUCCESS(status) &&
(fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
if (t==opCat) {fPrecedence = precOpCat;} if (t==opCat) {fPrecedence = precOpCat;}
else if (t==opOr) {fPrecedence = precOpOr;} else if (t==opOr) {fPrecedence = precOpOr;}
else if (t==opStart) {fPrecedence = precStart;} else if (t==opStart) {fPrecedence = precStart;}
@ -77,7 +83,10 @@ RBBINode::RBBINode(NodeType t) : UMemory() {
} }
RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { RBBINode::RBBINode(const RBBINode &other, UErrorCode& status) : UMemory(other) {
if (U_FAILURE(status)) {
return;
}
#ifdef RBBI_DEBUG #ifdef RBBI_DEBUG
fSerialNum = ++gLastSerial; fSerialNum = ++gLastSerial;
#endif #endif
@ -94,10 +103,13 @@ RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
fVal = other.fVal; fVal = other.fVal;
fRuleRoot = false; fRuleRoot = false;
fChainIn = other.fChainIn; fChainIn = other.fChainIn;
UErrorCode status = U_ZERO_ERROR;
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
fLastPosSet = new UVector(status); fLastPosSet = new UVector(status);
fFollowPos = new UVector(status); fFollowPos = new UVector(status);
if (U_SUCCESS(status) &&
(fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
} }
@ -193,27 +205,54 @@ void RBBINode::NRDeleteNode(RBBINode *node) {
// references in preparation for generating the DFA tables. // references in preparation for generating the DFA tables.
// //
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
RBBINode *RBBINode::cloneTree() { constexpr int kRecursiveDepthLimit = 3500;
RBBINode *RBBINode::cloneTree(UErrorCode &status, int depth) {
if (U_FAILURE(status)) {
return nullptr;
}
// If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
// to avoid stack overflow crash.
if (depth > kRecursiveDepthLimit) {
status = U_INPUT_TOO_LONG_ERROR;
return nullptr;
}
RBBINode *n; RBBINode *n;
if (fType == RBBINode::varRef) { if (fType == RBBINode::varRef) {
// If the current node is a variable reference, skip over it // If the current node is a variable reference, skip over it
// and clone the definition of the variable instead. // and clone the definition of the variable instead.
n = fLeftChild->cloneTree(); n = fLeftChild->cloneTree(status, depth+1);
if (U_FAILURE(status)) {
return nullptr;
}
} else if (fType == RBBINode::uset) { } else if (fType == RBBINode::uset) {
n = this; n = this;
} else { } else {
n = new RBBINode(*this); n = new RBBINode(*this, status);
if (U_FAILURE(status)) {
delete n;
return nullptr;
}
// Check for null pointer. // Check for null pointer.
if (n != nullptr) { if (n == nullptr) {
if (fLeftChild != nullptr) { status = U_MEMORY_ALLOCATION_ERROR;
n->fLeftChild = fLeftChild->cloneTree(); return nullptr;
n->fLeftChild->fParent = n; }
if (fLeftChild != nullptr) {
n->fLeftChild = fLeftChild->cloneTree(status, depth+1);
if (U_FAILURE(status)) {
delete n;
return nullptr;
} }
if (fRightChild != nullptr) { n->fLeftChild->fParent = n;
n->fRightChild = fRightChild->cloneTree(); }
n->fRightChild->fParent = n; if (fRightChild != nullptr) {
n->fRightChild = fRightChild->cloneTree(status, depth+1);
if (U_FAILURE(status)) {
delete n;
return nullptr;
} }
n->fRightChild->fParent = n;
} }
} }
return n; return n;
@ -239,7 +278,6 @@ RBBINode *RBBINode::cloneTree() {
// nested references are handled by cloneTree(), not here. // nested references are handled by cloneTree(), not here.
// //
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
constexpr int kRecursiveDepthLimit = 3500;
RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) { RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return this; return this;
@ -251,21 +289,34 @@ RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
return this; return this;
} }
if (fType == varRef) { if (fType == varRef) {
RBBINode *retNode = fLeftChild->cloneTree(); RBBINode *retNode = fLeftChild->cloneTree(status, depth+1);
if (retNode != nullptr) { if (U_FAILURE(status)) {
retNode->fRuleRoot = this->fRuleRoot; return this;
retNode->fChainIn = this->fChainIn;
} }
retNode->fRuleRoot = this->fRuleRoot;
retNode->fChainIn = this->fChainIn;
delete this; // TODO: undefined behavior. Fix. delete this; // TODO: undefined behavior. Fix.
return retNode; return retNode;
} }
if (fLeftChild != nullptr) { if (fLeftChild != nullptr) {
fLeftChild = fLeftChild->flattenVariables(status, depth+1); fLeftChild = fLeftChild->flattenVariables(status, depth+1);
if (fLeftChild == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(status)) {
return this;
}
fLeftChild->fParent = this; fLeftChild->fParent = this;
} }
if (fRightChild != nullptr) { if (fRightChild != nullptr) {
fRightChild = fRightChild->flattenVariables(status, depth+1); fRightChild = fRightChild->flattenVariables(status, depth+1);
if (fRightChild == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(status)) {
return this;
}
fRightChild->fParent = this; fRightChild->fParent = this;
} }
return this; return this;
@ -280,7 +331,16 @@ RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
// the left child of the uset node. // the left child of the uset node.
// //
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
void RBBINode::flattenSets() { void RBBINode::flattenSets(UErrorCode &status, int depth) {
if (U_FAILURE(status)) {
return;
}
// If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
// to avoid stack overflow crash.
if (depth > kRecursiveDepthLimit) {
status = U_INPUT_TOO_LONG_ERROR;
return;
}
U_ASSERT(fType != setRef); U_ASSERT(fType != setRef);
if (fLeftChild != nullptr) { if (fLeftChild != nullptr) {
@ -288,11 +348,15 @@ void RBBINode::flattenSets() {
RBBINode *setRefNode = fLeftChild; RBBINode *setRefNode = fLeftChild;
RBBINode *usetNode = setRefNode->fLeftChild; RBBINode *usetNode = setRefNode->fLeftChild;
RBBINode *replTree = usetNode->fLeftChild; RBBINode *replTree = usetNode->fLeftChild;
fLeftChild = replTree->cloneTree(); fLeftChild = replTree->cloneTree(status, depth+1);
if (U_FAILURE(status)) {
delete setRefNode;
return;
}
fLeftChild->fParent = this; fLeftChild->fParent = this;
delete setRefNode; delete setRefNode;
} else { } else {
fLeftChild->flattenSets(); fLeftChild->flattenSets(status, depth+1);
} }
} }
@ -301,11 +365,15 @@ void RBBINode::flattenSets() {
RBBINode *setRefNode = fRightChild; RBBINode *setRefNode = fRightChild;
RBBINode *usetNode = setRefNode->fLeftChild; RBBINode *usetNode = setRefNode->fLeftChild;
RBBINode *replTree = usetNode->fLeftChild; RBBINode *replTree = usetNode->fLeftChild;
fRightChild = replTree->cloneTree(); fRightChild = replTree->cloneTree(status, depth+1);
if (U_FAILURE(status)) {
delete setRefNode;
return;
}
fRightChild->fParent = this; fRightChild->fParent = this;
delete setRefNode; delete setRefNode;
} else { } else {
fRightChild->flattenSets(); fRightChild->flattenSets(status, depth+1);
} }
} }
} }

View file

@ -91,14 +91,14 @@ class RBBINode : public UMemory {
UVector *fFollowPos; UVector *fFollowPos;
RBBINode(NodeType t); RBBINode(NodeType t, UErrorCode& status);
RBBINode(const RBBINode &other); RBBINode(const RBBINode &other, UErrorCode& status);
~RBBINode(); ~RBBINode();
static void NRDeleteNode(RBBINode *node); static void NRDeleteNode(RBBINode *node);
RBBINode *cloneTree(); RBBINode *cloneTree(UErrorCode &status, int depth=0);
RBBINode *flattenVariables(UErrorCode &status, int depth=0); RBBINode *flattenVariables(UErrorCode &status, int depth=0);
void flattenSets(); void flattenSets(UErrorCode &status, int depth=0);
void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status); void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
#ifdef RBBI_DEBUG #ifdef RBBI_DEBUG

View file

@ -767,15 +767,24 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
c = s.char32At(0); c = s.char32At(0);
setToAdopt = new UnicodeSet(c, c); setToAdopt = new UnicodeSet(c, c);
} }
if (setToAdopt == nullptr) {
error(U_MEMORY_ALLOCATION_ERROR);
return;
}
} }
// //
// Make a new uset node to refer to this UnicodeSet // Make a new uset node to refer to this UnicodeSet
// This new uset node becomes the child of the caller's setReference node. // This new uset node becomes the child of the caller's setReference node.
// //
RBBINode *usetNode = new RBBINode(RBBINode::uset); UErrorCode localStatus = U_ZERO_ERROR;
RBBINode *usetNode = new RBBINode(RBBINode::uset, localStatus);
if (usetNode == nullptr) { if (usetNode == nullptr) {
error(U_MEMORY_ALLOCATION_ERROR); localStatus = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(localStatus)) {
delete usetNode;
error(localStatus);
delete setToAdopt; delete setToAdopt;
return; return;
} }
@ -1191,7 +1200,7 @@ RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) {
return nullptr; return nullptr;
} }
fNodeStackPtr++; fNodeStackPtr++;
fNodeStack[fNodeStackPtr] = new RBBINode(t); fNodeStack[fNodeStackPtr] = new RBBINode(t, *fRB->fStatus);
if (fNodeStack[fNodeStackPtr] == nullptr) { if (fNodeStack[fNodeStackPtr] == nullptr) {
*fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR;
} }

View file

@ -375,7 +375,11 @@ void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
} }
void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) { void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
RBBINode *leafNode = new RBBINode(RBBINode::leafChar); RBBINode *leafNode = new RBBINode(RBBINode::leafChar, *fStatus);
if (U_FAILURE(*fStatus)) {
delete leafNode;
return;
}
if (leafNode == nullptr) { if (leafNode == nullptr) {
*fStatus = U_MEMORY_ALLOCATION_ERROR; *fStatus = U_MEMORY_ALLOCATION_ERROR;
return; return;
@ -388,9 +392,13 @@ void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
// There are already input symbols present for this set. // There are already input symbols present for this set.
// Set up an OR node, with the previous stuff as the left child // Set up an OR node, with the previous stuff as the left child
// and the new value as the right child. // and the new value as the right child.
RBBINode *orNode = new RBBINode(RBBINode::opOr); RBBINode *orNode = new RBBINode(RBBINode::opOr, *fStatus);
if (orNode == nullptr) { if (orNode == nullptr) {
*fStatus = U_MEMORY_ALLOCATION_ERROR; *fStatus = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(*fStatus)) {
delete orNode;
delete leafNode;
return; return;
} }
orNode->fLeftChild = usetNode->fLeftChild; orNode->fLeftChild = usetNode->fLeftChild;

View file

@ -99,13 +99,22 @@ void RBBITableBuilder::buildForwardTable() {
// {bof} fake character. // {bof} fake character.
// //
if (fRB->fSetBuilder->sawBOF()) { if (fRB->fSetBuilder->sawBOF()) {
RBBINode *bofTop = new RBBINode(RBBINode::opCat); RBBINode *bofTop = new RBBINode(RBBINode::opCat, *fStatus);
RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); if (bofTop == nullptr) {
// Delete and exit if memory allocation failed.
if (bofTop == nullptr || bofLeaf == nullptr) {
*fStatus = U_MEMORY_ALLOCATION_ERROR; *fStatus = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(*fStatus)) {
delete bofTop; delete bofTop;
return;
}
RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar, *fStatus);
// Delete and exit if memory allocation failed.
if (bofLeaf == nullptr) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(*fStatus)) {
delete bofLeaf; delete bofLeaf;
delete bofTop;
return; return;
} }
bofTop->fLeftChild = bofLeaf; bofTop->fLeftChild = bofLeaf;
@ -120,18 +129,23 @@ void RBBITableBuilder::buildForwardTable() {
// Appears as a cat-node, left child being the original tree, // Appears as a cat-node, left child being the original tree,
// right child being the end marker. // right child being the end marker.
// //
RBBINode *cn = new RBBINode(RBBINode::opCat); RBBINode *cn = new RBBINode(RBBINode::opCat, *fStatus);
// Exit if memory allocation failed. // Exit if memory allocation failed.
if (cn == nullptr) { if (cn == nullptr) {
*fStatus = U_MEMORY_ALLOCATION_ERROR; *fStatus = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(*fStatus)) {
delete cn;
return; return;
} }
cn->fLeftChild = fTree; cn->fLeftChild = fTree;
fTree->fParent = cn; fTree->fParent = cn;
RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark); RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark, *fStatus);
// Delete and exit if memory allocation failed. // Delete and exit if memory allocation failed.
if (cn->fRightChild == nullptr) { if (cn->fRightChild == nullptr) {
*fStatus = U_MEMORY_ALLOCATION_ERROR; *fStatus = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(*fStatus)) {
delete cn; delete cn;
return; return;
} }
@ -142,7 +156,7 @@ void RBBITableBuilder::buildForwardTable() {
// Replace all references to UnicodeSets with the tree for the equivalent // Replace all references to UnicodeSets with the tree for the equivalent
// expression. // expression.
// //
fTree->flattenSets(); fTree->flattenSets(*fStatus, 0);
#ifdef RBBI_DEBUG #ifdef RBBI_DEBUG
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
RBBIDebugPuts("\nParse tree after flattening Unicode Set references."); RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");

View file

@ -388,7 +388,7 @@ const Locale &ResourceBundle::getLocale() const {
return ncThis->fLocale != nullptr ? *ncThis->fLocale : Locale::getDefault(); return ncThis->fLocale != nullptr ? *ncThis->fLocale : Locale::getDefault();
} }
const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
{ {
return ures_getLocaleByType(fResource, type, &status); return ures_getLocaleByType(fResource, type, &status);
} }

View file

@ -3146,11 +3146,8 @@ ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
if(c<0) { if(c<0) {
if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) { if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
/* incomplete character byte sequence */ /* incomplete character byte sequence */
uint8_t *bytes=cnv->toUBytes;
cnv->toULength = static_cast<int8_t>(source - lastSource); cnv->toULength = static_cast<int8_t>(source - lastSource);
do { uprv_memcpy(cnv->toUBytes, lastSource, cnv->toULength);
*bytes++=*lastSource++;
} while(lastSource<source);
*pErrorCode=U_TRUNCATED_CHAR_FOUND; *pErrorCode=U_TRUNCATED_CHAR_FOUND;
} else if(U_FAILURE(*pErrorCode)) { } else if(U_FAILURE(*pErrorCode)) {
/* callback(illegal) */ /* callback(illegal) */

View file

@ -372,12 +372,8 @@ struct CReg : public icu::UMemory {
CReg(const char16_t* _iso, const char* _id) CReg(const char16_t* _iso, const char* _id)
: next(nullptr) : next(nullptr)
{ {
int32_t len = static_cast<int32_t>(uprv_strlen(_id)); uprv_strncpy(id, _id, sizeof(id)-1);
if (len > static_cast<int32_t>(sizeof(id) - 1)) { id[sizeof(id)-1] = 0;
len = (sizeof(id)-1);
}
uprv_strncpy(id, _id, len);
id[len] = 0;
u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH); u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH);
iso[ISO_CURRENCY_CODE_LENGTH] = 0; iso[ISO_CURRENCY_CODE_LENGTH] = 0;
} }
@ -682,6 +678,9 @@ ucurr_getName(const char16_t* currency,
// this function. // this function.
UErrorCode ec2 = U_ZERO_ERROR; UErrorCode ec2 = U_ZERO_ERROR;
if (locale == nullptr) {
locale = uloc_getDefault();
}
CharString loc = ulocimp_getName(locale, ec2); CharString loc = ulocimp_getName(locale, ec2);
if (U_FAILURE(ec2)) { if (U_FAILURE(ec2)) {
*ec = U_ILLEGAL_ARGUMENT_ERROR; *ec = U_ILLEGAL_ARGUMENT_ERROR;
@ -780,6 +779,9 @@ ucurr_getPluralName(const char16_t* currency,
// this function. // this function.
UErrorCode ec2 = U_ZERO_ERROR; UErrorCode ec2 = U_ZERO_ERROR;
if (locale == nullptr) {
locale = uloc_getDefault();
}
CharString loc = ulocimp_getName(locale, ec2); CharString loc = ulocimp_getName(locale, ec2);
if (U_FAILURE(ec2)) { if (U_FAILURE(ec2)) {
*ec = U_ILLEGAL_ARGUMENT_ERROR; *ec = U_ILLEGAL_ARGUMENT_ERROR;
@ -973,6 +975,9 @@ collectCurrencyNames(const char* locale,
// Look up the Currencies resource for the given locale. // Look up the Currencies resource for the given locale.
UErrorCode ec2 = U_ZERO_ERROR; UErrorCode ec2 = U_ZERO_ERROR;
if (locale == nullptr) {
locale = uloc_getDefault();
}
CharString loc = ulocimp_getName(locale, ec2); CharString loc = ulocimp_getName(locale, ec2);
if (U_FAILURE(ec2)) { if (U_FAILURE(ec2)) {
ec = U_ILLEGAL_ARGUMENT_ERROR; ec = U_ILLEGAL_ARGUMENT_ERROR;

File diff suppressed because it is too large Load diff

View file

@ -1043,7 +1043,7 @@ _initializeULanguageTag(ULanguageTag* langtag) {
} }
void void
_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { _appendLanguageToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
UErrorCode tmpStatus = U_ZERO_ERROR; UErrorCode tmpStatus = U_ZERO_ERROR;
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
@ -1088,7 +1088,7 @@ _appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool str
} }
void void
_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { _appendScriptToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
UErrorCode tmpStatus = U_ZERO_ERROR; UErrorCode tmpStatus = U_ZERO_ERROR;
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
@ -1118,7 +1118,7 @@ _appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool stric
} }
void void
_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { _appendRegionToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) {
UErrorCode tmpStatus = U_ZERO_ERROR; UErrorCode tmpStatus = U_ZERO_ERROR;
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
@ -1169,7 +1169,7 @@ void _sortVariants(VariantListEntry* first) {
} }
void void
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) { _appendVariantsToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) {
if (U_FAILURE(status)) { return; } if (U_FAILURE(status)) { return; }
UErrorCode tmpStatus = U_ZERO_ERROR; UErrorCode tmpStatus = U_ZERO_ERROR;
@ -1872,7 +1872,7 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode& status)
} }
void void
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) { _appendPrivateuseToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) {
if (U_FAILURE(status)) { return; } if (U_FAILURE(status)) { return; }
UErrorCode tmpStatus = U_ZERO_ERROR; UErrorCode tmpStatus = U_ZERO_ERROR;
@ -2596,6 +2596,9 @@ ulocimp_toLanguageTag(const char* localeID,
bool hadPosix = false; bool hadPosix = false;
const char* pKeywordStart; const char* pKeywordStart;
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
icu::CharString canonical = ulocimp_canonicalize(localeID, tmpStatus); icu::CharString canonical = ulocimp_canonicalize(localeID, tmpStatus);
if (U_FAILURE(tmpStatus)) { if (U_FAILURE(tmpStatus)) {
@ -2604,7 +2607,7 @@ ulocimp_toLanguageTag(const char* localeID,
} }
/* For handling special case - private use only tag */ /* For handling special case - private use only tag */
pKeywordStart = locale_getKeywordsStart(canonical.data()); pKeywordStart = locale_getKeywordsStart(canonical.toStringPiece());
if (pKeywordStart == canonical.data()) { if (pKeywordStart == canonical.data()) {
int kwdCnt = 0; int kwdCnt = 0;
bool done = false; bool done = false;
@ -2642,12 +2645,12 @@ ulocimp_toLanguageTag(const char* localeID,
} }
} }
_appendLanguageToLanguageTag(canonical.data(), sink, strict, status); _appendLanguageToLanguageTag(canonical.toStringPiece(), sink, strict, status);
_appendScriptToLanguageTag(canonical.data(), sink, strict, status); _appendScriptToLanguageTag(canonical.toStringPiece(), sink, strict, status);
_appendRegionToLanguageTag(canonical.data(), sink, strict, status); _appendRegionToLanguageTag(canonical.toStringPiece(), sink, strict, status);
_appendVariantsToLanguageTag(canonical.data(), sink, strict, hadPosix, status); _appendVariantsToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status);
_appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status); _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
_appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status); _appendPrivateuseToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status);
} }

View file

@ -10,7 +10,6 @@
#include "unicode/locid.h" #include "unicode/locid.h"
#include "bytesinkutil.h" #include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h" #include "cmemory.h"
U_NAMESPACE_USE U_NAMESPACE_USE
@ -24,9 +23,7 @@ ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err) {
if (length < 0) { if (length < 0) {
return EXTERNAL(icu::Locale::createFromName(localeID).clone()); return EXTERNAL(icu::Locale::createFromName(localeID).clone());
} }
CharString str(localeID, length, *err); // Make a NUL terminated copy. return EXTERNAL(icu::Locale::createFromName(StringPiece{localeID, length}).clone());
if (U_FAILURE(*err)) { return nullptr; }
return EXTERNAL(icu::Locale::createFromName(str.data()).clone());
} }
ULocale* ULocale*

View file

@ -68,42 +68,42 @@ U_EXPORT std::optional<std::string_view>
ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value); ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_getKeywords(const char* localeID, ulocimp_getKeywords(std::string_view localeID,
char prev, char prev,
bool valuesToo, bool valuesToo,
UErrorCode& status); UErrorCode& status);
U_EXPORT void U_EXPORT void
ulocimp_getKeywords(const char* localeID, ulocimp_getKeywords(std::string_view localeID,
char prev, char prev,
icu::ByteSink& sink, icu::ByteSink& sink,
bool valuesToo, bool valuesToo,
UErrorCode& status); UErrorCode& status);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_getName(const char* localeID, ulocimp_getName(std::string_view localeID,
UErrorCode& err); UErrorCode& err);
U_EXPORT void U_EXPORT void
ulocimp_getName(const char* localeID, ulocimp_getName(std::string_view localeID,
icu::ByteSink& sink, icu::ByteSink& sink,
UErrorCode& err); UErrorCode& err);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_getBaseName(const char* localeID, ulocimp_getBaseName(std::string_view localeID,
UErrorCode& err); UErrorCode& err);
U_EXPORT void U_EXPORT void
ulocimp_getBaseName(const char* localeID, ulocimp_getBaseName(std::string_view localeID,
icu::ByteSink& sink, icu::ByteSink& sink,
UErrorCode& err); UErrorCode& err);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_canonicalize(const char* localeID, ulocimp_canonicalize(std::string_view localeID,
UErrorCode& err); UErrorCode& err);
U_EXPORT void U_EXPORT void
ulocimp_canonicalize(const char* localeID, ulocimp_canonicalize(std::string_view localeID,
icu::ByteSink& sink, icu::ByteSink& sink,
UErrorCode& err); UErrorCode& err);
@ -119,16 +119,16 @@ ulocimp_getKeywordValue(const char* localeID,
UErrorCode& status); UErrorCode& status);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_getLanguage(const char* localeID, UErrorCode& status); ulocimp_getLanguage(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_getScript(const char* localeID, UErrorCode& status); ulocimp_getScript(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_getRegion(const char* localeID, UErrorCode& status); ulocimp_getRegion(std::string_view localeID, UErrorCode& status);
U_EXPORT icu::CharString U_EXPORT icu::CharString
ulocimp_getVariant(const char* localeID, UErrorCode& status); ulocimp_getVariant(std::string_view localeID, UErrorCode& status);
U_EXPORT void U_EXPORT void
ulocimp_setKeywordValue(std::string_view keywordName, ulocimp_setKeywordValue(std::string_view keywordName,
@ -145,7 +145,7 @@ ulocimp_setKeywordValue(std::string_view keywords,
U_EXPORT void U_EXPORT void
ulocimp_getSubtags( ulocimp_getSubtags(
const char* localeID, std::string_view localeID,
icu::CharString* language, icu::CharString* language,
icu::CharString* script, icu::CharString* script,
icu::CharString* region, icu::CharString* region,
@ -155,7 +155,7 @@ ulocimp_getSubtags(
U_EXPORT void U_EXPORT void
ulocimp_getSubtags( ulocimp_getSubtags(
const char* localeID, std::string_view localeID,
icu::ByteSink* language, icu::ByteSink* language,
icu::ByteSink* script, icu::ByteSink* script,
icu::ByteSink* region, icu::ByteSink* region,
@ -165,7 +165,7 @@ ulocimp_getSubtags(
inline void inline void
ulocimp_getSubtags( ulocimp_getSubtags(
const char* localeID, std::string_view localeID,
std::nullptr_t, std::nullptr_t,
std::nullptr_t, std::nullptr_t,
std::nullptr_t, std::nullptr_t,
@ -364,7 +364,7 @@ ulocimp_minimizeSubtags(const char* localeID,
UErrorCode& err); UErrorCode& err);
U_CAPI const char * U_EXPORT2 U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID); locale_getKeywordsStart(std::string_view localeID);
bool bool
ultag_isExtensionSubtags(const char* s, int32_t len); ultag_isExtensionSubtags(const char* s, int32_t len);

View file

@ -237,8 +237,13 @@ typedef HANDLE MemoryMap;
pData->map = (char *)data + length; pData->map = (char *)data + length;
pData->pHeader=(const DataHeader *)data; pData->pHeader=(const DataHeader *)data;
pData->mapAddr = data; pData->mapAddr = data;
#if U_PLATFORM == U_PF_IPHONE #if U_PLATFORM == U_PF_IPHONE || U_PLATFORM == U_PF_ANDROID
// Apparently supported from Android 23 and higher:
// https://github.com/ggml-org/llama.cpp/pull/3631
// Checking for the flag itself is safer than checking for __ANDROID_API__.
# ifdef POSIX_MADV_RANDOM
posix_madvise(data, length, POSIX_MADV_RANDOM); posix_madvise(data, length, POSIX_MADV_RANDOM);
# endif
#endif #endif
return true; return true;
} }

View file

@ -58,6 +58,8 @@ U_NAMESPACE_END
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
class CharString;
/** /**
* The BreakIterator class implements methods for finding the location * The BreakIterator class implements methods for finding the location
* of boundaries in text. BreakIterator is an abstract base class. * of boundaries in text. BreakIterator is an abstract base class.
@ -646,9 +648,9 @@ protected:
private: private:
/** @internal (private) */ /** @internal (private) */
char actualLocale[ULOC_FULLNAME_CAPACITY]; CharString* actualLocale = nullptr;
char validLocale[ULOC_FULLNAME_CAPACITY]; CharString* validLocale = nullptr;
char requestLocale[ULOC_FULLNAME_CAPACITY]; CharString* requestLocale = nullptr;
}; };
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API

View file

@ -9,10 +9,13 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#include <cstddef> #include <cstddef>
#include <string_view> #include <string_view>
#include <type_traits>
#endif
/** /**
* \file * \file
@ -21,8 +24,6 @@
* Also conversion functions from char16_t * to UChar * and OldUChar *. * Also conversion functions from char16_t * to UChar * and OldUChar *.
*/ */
U_NAMESPACE_BEGIN
/** /**
* \def U_ALIASING_BARRIER * \def U_ALIASING_BARRIER
* Barrier for pointer anti-aliasing optimizations even across function boundaries. * Barrier for pointer anti-aliasing optimizations even across function boundaries.
@ -36,6 +37,11 @@ U_NAMESPACE_BEGIN
# define U_ALIASING_BARRIER(ptr) # define U_ALIASING_BARRIER(ptr)
#endif #endif
// ICU DLL-exported
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/** /**
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @stable ICU 59 * @stable ICU 59
@ -251,6 +257,60 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif #endif
/// \endcond /// \endcond
U_NAMESPACE_END
#endif // U_SHOW_CPLUSPLUS_API
// Usable in header-only definitions
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
namespace U_ICU_NAMESPACE_OR_INTERNAL {
#ifndef U_FORCE_HIDE_INTERNAL_API
/** @internal */
template<typename T, typename = std::enable_if_t<std::is_same_v<T, UChar>>>
inline const char16_t *uprv_char16PtrFromUChar(const T *p) {
if constexpr (std::is_same_v<UChar, char16_t>) {
return p;
} else {
#if U_SHOW_CPLUSPLUS_API
return ConstChar16Ptr(p).get();
#else
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const char16_t *>(p);
#endif
}
}
#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
/** @internal */
inline const char16_t *uprv_char16PtrFromUint16(const uint16_t *p) {
#if U_SHOW_CPLUSPLUS_API
return ConstChar16Ptr(p).get();
#else
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const char16_t *>(p);
#endif
}
#endif
#if U_SIZEOF_WCHAR_T==2
/** @internal */
inline const char16_t *uprv_char16PtrFromWchar(const wchar_t *p) {
#if U_SHOW_CPLUSPLUS_API
return ConstChar16Ptr(p).get();
#else
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const char16_t *>(p);
#endif
}
#endif
#endif
/** /**
* Converts from const char16_t * to const UChar *. * Converts from const char16_t * to const UChar *.
* Includes an aliasing barrier if available. * Includes an aliasing barrier if available.
@ -307,6 +367,15 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {
return reinterpret_cast<OldUChar *>(p); return reinterpret_cast<OldUChar *>(p);
} }
} // U_ICU_NAMESPACE_OR_INTERNAL
#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
// ICU DLL-exported
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
#ifndef U_FORCE_HIDE_INTERNAL_API #ifndef U_FORCE_HIDE_INTERNAL_API
/** /**
* Is T convertible to a std::u16string_view or some other 16-bit string view? * Is T convertible to a std::u16string_view or some other 16-bit string view?
@ -379,6 +448,6 @@ inline std::u16string_view toU16StringViewNullable(const T& text) {
U_NAMESPACE_END U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */ #endif // U_SHOW_CPLUSPLUS_API
#endif // __CHAR16PTR_H__ #endif // __CHAR16PTR_H__

View file

@ -449,6 +449,11 @@ public:
*/ */
static Locale U_EXPORT2 createFromName(const char *name); static Locale U_EXPORT2 createFromName(const char *name);
#ifndef U_HIDE_INTERNAL_API
/** @internal */
static Locale U_EXPORT2 createFromName(StringPiece name);
#endif /* U_HIDE_INTERNAL_API */
/** /**
* Creates a locale from the given string after canonicalizing * Creates a locale from the given string after canonicalizing
* the string according to CLDR by calling uloc_canonicalize(). * the string according to CLDR by calling uloc_canonicalize().
@ -1133,7 +1138,9 @@ private:
* @param cLocaleID The new locale name. * @param cLocaleID The new locale name.
* @param canonicalize whether to call uloc_canonicalize on cLocaleID * @param canonicalize whether to call uloc_canonicalize on cLocaleID
*/ */
Locale& init(const char* cLocaleID, UBool canonicalize); Locale& init(const char* localeID, UBool canonicalize);
/** @internal */
Locale& init(StringPiece localeID, UBool canonicalize);
/* /*
* Internal constructor to allow construction of a locale object with * Internal constructor to allow construction of a locale object with

View file

@ -450,7 +450,7 @@ public:
* @return a Locale object * @return a Locale object
* @stable ICU 2.8 * @stable ICU 2.8
*/ */
const Locale Locale
getLocale(ULocDataLocaleType type, UErrorCode &status) const; getLocale(ULocDataLocaleType type, UErrorCode &status) const;
#ifndef U_HIDE_INTERNAL_API #ifndef U_HIDE_INTERNAL_API
/** /**

View file

@ -675,14 +675,14 @@ typedef enum UProperty {
* @stable ICU 63 * @stable ICU 63
*/ */
UCHAR_VERTICAL_ORIENTATION=0x1018, UCHAR_VERTICAL_ORIENTATION=0x1018,
#ifndef U_HIDE_DRAFT_API
/** /**
* Enumerated property Identifier_Status. * Enumerated property Identifier_Status.
* Used for UTS #39 General Security Profile for Identifiers * Used for UTS #39 General Security Profile for Identifiers
* (https://www.unicode.org/reports/tr39/#General_Security_Profile). * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
* @draft ICU 75 * @stable ICU 75
*/ */
UCHAR_IDENTIFIER_STATUS=0x1019, UCHAR_IDENTIFIER_STATUS=0x1019,
#ifndef U_HIDE_DRAFT_API
/** /**
* Enumerated property Indic_Conjunct_Break. * Enumerated property Indic_Conjunct_Break.
* Used in the grapheme cluster break algorithm in UAX #29. * Used in the grapheme cluster break algorithm in UAX #29.
@ -796,7 +796,6 @@ typedef enum UProperty {
UCHAR_SCRIPT_EXTENSIONS=0x7000, UCHAR_SCRIPT_EXTENSIONS=0x7000,
/** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
#ifndef U_HIDE_DRAFT_API
/** /**
* Miscellaneous property Identifier_Type. * Miscellaneous property Identifier_Type.
* Used for UTS #39 General Security Profile for Identifiers * Used for UTS #39 General Security Profile for Identifiers
@ -808,10 +807,9 @@ typedef enum UProperty {
* *
* @see u_hasIDType * @see u_hasIDType
* @see u_getIDTypes * @see u_getIDTypes
* @draft ICU 75 * @stable ICU 75
*/ */
UCHAR_IDENTIFIER_TYPE=0x7001, UCHAR_IDENTIFIER_TYPE=0x7001,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the last constant for Unicode properties with unusual value types. * One more than the last constant for Unicode properties with unusual value types.
@ -2791,13 +2789,12 @@ typedef enum UVerticalOrientation {
U_VO_UPRIGHT, U_VO_UPRIGHT,
} UVerticalOrientation; } UVerticalOrientation;
#ifndef U_HIDE_DRAFT_API
/** /**
* Identifier Status constants. * Identifier Status constants.
* See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
* *
* @see UCHAR_IDENTIFIER_STATUS * @see UCHAR_IDENTIFIER_STATUS
* @draft ICU 75 * @stable ICU 75
*/ */
typedef enum UIdentifierStatus { typedef enum UIdentifierStatus {
/* /*
@ -2806,9 +2803,9 @@ typedef enum UIdentifierStatus {
* U_ID_STATUS_<Unicode Identifier_Status value name> * U_ID_STATUS_<Unicode Identifier_Status value name>
*/ */
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_STATUS_RESTRICTED, U_ID_STATUS_RESTRICTED,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_STATUS_ALLOWED, U_ID_STATUS_ALLOWED,
} UIdentifierStatus; } UIdentifierStatus;
@ -2817,7 +2814,7 @@ typedef enum UIdentifierStatus {
* See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
* *
* @see UCHAR_IDENTIFIER_TYPE * @see UCHAR_IDENTIFIER_TYPE
* @draft ICU 75 * @stable ICU 75
*/ */
typedef enum UIdentifierType { typedef enum UIdentifierType {
/* /*
@ -2826,32 +2823,31 @@ typedef enum UIdentifierType {
* U_ID_TYPE_<Unicode Identifier_Type value name> * U_ID_TYPE_<Unicode Identifier_Type value name>
*/ */
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_NOT_CHARACTER, U_ID_TYPE_NOT_CHARACTER,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_DEPRECATED, U_ID_TYPE_DEPRECATED,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_DEFAULT_IGNORABLE, U_ID_TYPE_DEFAULT_IGNORABLE,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_NOT_NFKC, U_ID_TYPE_NOT_NFKC,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_NOT_XID, U_ID_TYPE_NOT_XID,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_EXCLUSION, U_ID_TYPE_EXCLUSION,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_OBSOLETE, U_ID_TYPE_OBSOLETE,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_TECHNICAL, U_ID_TYPE_TECHNICAL,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_UNCOMMON_USE, U_ID_TYPE_UNCOMMON_USE,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_LIMITED_USE, U_ID_TYPE_LIMITED_USE,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_INCLUSION, U_ID_TYPE_INCLUSION,
/** @draft ICU 75 */ /** @stable ICU 75 */
U_ID_TYPE_RECOMMENDED, U_ID_TYPE_RECOMMENDED,
} UIdentifierType; } UIdentifierType;
#endif // U_HIDE_DRAFT_API
/** /**
* Check a binary Unicode property for a code point. * Check a binary Unicode property for a code point.
@ -4057,7 +4053,6 @@ u_isIDStart(UChar32 c);
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
u_isIDPart(UChar32 c); u_isIDPart(UChar32 c);
#ifndef U_HIDE_DRAFT_API
/** /**
* Does the set of Identifier_Type values code point c contain the given type? * Does the set of Identifier_Type values code point c contain the given type?
* *
@ -4069,7 +4064,7 @@ u_isIDPart(UChar32 c);
* @param c code point * @param c code point
* @param type Identifier_Type to check * @param type Identifier_Type to check
* @return true if type is in Identifier_Type(c) * @return true if type is in Identifier_Type(c)
* @draft ICU 75 * @stable ICU 75
*/ */
U_CAPI bool U_EXPORT2 U_CAPI bool U_EXPORT2
u_hasIDType(UChar32 c, UIdentifierType type); u_hasIDType(UChar32 c, UIdentifierType type);
@ -4104,11 +4099,10 @@ u_hasIDType(UChar32 c, UIdentifierType type);
* function chaining. (See User Guide for details.) * function chaining. (See User Guide for details.)
* @return number of values in c's Identifier_Type, * @return number of values in c's Identifier_Type,
* written to types unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity * written to types unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
* @draft ICU 75 * @stable ICU 75
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode); u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode);
#endif // U_HIDE_DRAFT_API
/** /**
* Determines if the specified character should be regarded * Determines if the specified character should be regarded

View file

@ -1173,10 +1173,12 @@ public:
inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const { inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet()); return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
} }
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/** /**
* Returns a C++ iterator for iterating over all of the elements of this set. * Returns a C++ iterator for iterating over all of the elements of this set.
* Convenient all-in one iteration, but creates a UnicodeString for each * Convenient all-in one iteration, but creates a std::u16string for each
* code point or string. * code point or string.
* (Similar to how Java UnicodeSet *is an* Iterable&lt;String&gt;.) * (Similar to how Java UnicodeSet *is an* Iterable&lt;String&gt;.)
* *
@ -1185,13 +1187,14 @@ public:
* \code * \code
* UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode); * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode);
* for (auto el : set) { * for (auto el : set) {
* UnicodeString us(el);
* std::string u8; * std::string u8;
* printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str()); * printf("set.element length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str());
* } * }
* \endcode * \endcode
* *
* @return an all-elements iterator. * @return an all-elements iterator.
* @draft ICU 76 * @draft ICU 77
* @see end * @see end
* @see codePoints * @see codePoints
* @see ranges * @see ranges
@ -1203,7 +1206,7 @@ public:
/** /**
* @return an exclusive-end sentinel for iterating over all of the elements of this set. * @return an exclusive-end sentinel for iterating over all of the elements of this set.
* @draft ICU 76 * @draft ICU 77
* @see begin * @see begin
* @see codePoints * @see codePoints
* @see ranges * @see ranges

View file

@ -32,12 +32,13 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/uchar.h" #include "unicode/uchar.h"
#if U_SHOW_CPLUSPLUS_API #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#include <string>
#include <string_view> #include <string_view>
#include "unicode/char16ptr.h" #include "unicode/char16ptr.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#include "unicode/unistr.h" #include "unicode/utf16.h"
#endif // U_SHOW_CPLUSPLUS_API #endif
#ifndef USET_DEFINED #ifndef USET_DEFINED
@ -1392,8 +1393,8 @@ public:
private: private:
friend class USetCodePoints; friend class USetCodePoints;
USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount) USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
: uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount), : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount),
c(U_SENTINEL), end(U_SENTINEL) { c(U_SENTINEL), end(U_SENTINEL) {
// Fetch the first range. // Fetch the first range.
operator++(); operator++();
@ -1429,7 +1430,7 @@ public:
* Constructs a C++ "range" object over the code points of the USet. * Constructs a C++ "range" object over the code points of the USet.
* @draft ICU 76 * @draft ICU 76
*/ */
USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {} USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
/** @draft ICU 76 */ /** @draft ICU 76 */
USetCodePoints(const USetCodePoints &other) = default; USetCodePoints(const USetCodePoints &other) = default;
@ -1460,7 +1461,7 @@ struct CodePointRange {
/** @draft ICU 76 */ /** @draft ICU 76 */
struct iterator { struct iterator {
/** @draft ICU 76 */ /** @draft ICU 76 */
iterator(UChar32 c) : c(c) {} iterator(UChar32 aC) : c(aC) {}
/** @draft ICU 76 */ /** @draft ICU 76 */
bool operator==(const iterator &other) const { return c == other.c; } bool operator==(const iterator &other) const { return c == other.c; }
@ -1573,8 +1574,8 @@ public:
private: private:
friend class USetRanges; friend class USetRanges;
USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount) USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
: uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {} : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {}
const USet *uset; const USet *uset;
int32_t rangeIndex; int32_t rangeIndex;
@ -1610,7 +1611,7 @@ public:
* Constructs a C++ "range" object over the code point ranges of the USet. * Constructs a C++ "range" object over the code point ranges of the USet.
* @draft ICU 76 * @draft ICU 76
*/ */
USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {} USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
/** @draft ICU 76 */ /** @draft ICU 76 */
USetRanges(const USetRanges &other) = default; USetRanges(const USetRanges &other) = default;
@ -1657,7 +1658,7 @@ public:
int32_t length; int32_t length;
const UChar *uchars = uset_getString(uset, index, &length); const UChar *uchars = uset_getString(uset, index, &length);
// assert uchars != nullptr; // assert uchars != nullptr;
return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)}; return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
} }
return {}; return {};
} }
@ -1684,8 +1685,8 @@ public:
private: private:
friend class USetStrings; friend class USetStrings;
USetStringIterator(const USet *uset, int32_t index, int32_t count) USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount)
: uset(uset), index(index), count(count) {} : uset(pUset), index(nIndex), count(nCount) {}
const USet *uset; const USet *uset;
int32_t index; int32_t index;
@ -1699,9 +1700,11 @@ private:
* using U_HEADER_NESTED_NAMESPACE::USetStrings; * using U_HEADER_NESTED_NAMESPACE::USetStrings;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
* for (auto s : USetStrings(uset.getAlias())) { * for (auto s : USetStrings(uset.getAlias())) {
* UnicodeString us(s); * int32_t len32 = s.length();
* std::string u8; * char utf8[200];
* printf("uset.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str()); * u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
* s.data(), len32, 0xFFFD, nullptr, errorCode);
* printf("uset.string length %ld \"%s\"\n", long{len32}, utf8);
* } * }
* \endcode * \endcode
* *
@ -1718,7 +1721,7 @@ public:
* Constructs a C++ "range" object over the strings of the USet. * Constructs a C++ "range" object over the strings of the USet.
* @draft ICU 76 * @draft ICU 76
*/ */
USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {} USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}
/** @draft ICU 76 */ /** @draft ICU 76 */
USetStrings(const USetStrings &other) = default; USetStrings(const USetStrings &other) = default;
@ -1737,17 +1740,19 @@ private:
const USet *uset; const USet *uset;
int32_t count; int32_t count;
}; };
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/** /**
* Iterator returned by USetElements. * Iterator returned by USetElements.
* @draft ICU 76 * @draft ICU 77
*/ */
class USetElementIterator { class USetElementIterator {
public: public:
/** @draft ICU 76 */ /** @draft ICU 77 */
USetElementIterator(const USetElementIterator &other) = default; USetElementIterator(const USetElementIterator &other) = default;
/** @draft ICU 76 */ /** @draft ICU 77 */
bool operator==(const USetElementIterator &other) const { bool operator==(const USetElementIterator &other) const {
// No need to compare rangeCount & end given private constructor // No need to compare rangeCount & end given private constructor
// and assuming we don't compare iterators across the set being modified. // and assuming we don't compare iterators across the set being modified.
@ -1756,26 +1761,28 @@ public:
return uset == other.uset && c == other.c && index == other.index; return uset == other.uset && c == other.c && index == other.index;
} }
/** @draft ICU 76 */ /** @draft ICU 77 */
bool operator!=(const USetElementIterator &other) const { return !operator==(other); } bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
/** @draft ICU 76 */ /** @draft ICU 77 */
UnicodeString operator*() const { std::u16string operator*() const {
if (c >= 0) { if (c >= 0) {
return UnicodeString(c); return c <= 0xffff ?
std::u16string({static_cast<char16_t>(c)}) :
std::u16string({U16_LEAD(c), U16_TRAIL(c)});
} else if (index < totalCount) { } else if (index < totalCount) {
int32_t length; int32_t length;
const UChar *uchars = uset_getString(uset, index - rangeCount, &length); const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
// assert uchars != nullptr; // assert uchars != nullptr;
return UnicodeString(uchars, length); return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
} else { } else {
return UnicodeString(); return {};
} }
} }
/** /**
* Pre-increment. * Pre-increment.
* @draft ICU 76 * @draft ICU 77
*/ */
USetElementIterator &operator++() { USetElementIterator &operator++() {
if (c < end) { if (c < end) {
@ -1800,7 +1807,7 @@ public:
/** /**
* Post-increment. * Post-increment.
* @draft ICU 76 * @draft ICU 77
*/ */
USetElementIterator operator++(int) { USetElementIterator operator++(int) {
USetElementIterator result(*this); USetElementIterator result(*this);
@ -1811,8 +1818,8 @@ public:
private: private:
friend class USetElements; friend class USetElements;
USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount) USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount)
: uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount), : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount),
c(U_SENTINEL), end(U_SENTINEL) { c(U_SENTINEL), end(U_SENTINEL) {
if (index < rangeCount) { if (index < rangeCount) {
// Fetch the first range. // Fetch the first range.
@ -1840,7 +1847,7 @@ private:
/** /**
* A C++ "range" for iterating over all of the elements of a USet. * A C++ "range" for iterating over all of the elements of a USet.
* Convenient all-in one iteration, but creates a UnicodeString for each * Convenient all-in one iteration, but creates a std::u16string for each
* code point or string. * code point or string.
* *
* Code points are returned first, then empty and multi-character strings. * Code points are returned first, then empty and multi-character strings.
@ -1849,15 +1856,18 @@ private:
* using U_HEADER_NESTED_NAMESPACE::USetElements; * using U_HEADER_NESTED_NAMESPACE::USetElements;
* LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode));
* for (auto el : USetElements(uset.getAlias())) { * for (auto el : USetElements(uset.getAlias())) {
* std::string u8; * int32_t len32 = el.length();
* printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str()); * char utf8[200];
* u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr,
* el.data(), len32, 0xFFFD, nullptr, errorCode);
* printf("uset.element length %ld \"%s\"\n", long{len32}, utf8);
* } * }
* \endcode * \endcode
* *
* C++ UnicodeSet has member functions for iteration, including begin() and end(). * C++ UnicodeSet has member functions for iteration, including begin() and end().
* *
* @return an all-elements iterator. * @return an all-elements iterator.
* @draft ICU 76 * @draft ICU 77
* @see USetCodePoints * @see USetCodePoints
* @see USetRanges * @see USetRanges
* @see USetStrings * @see USetStrings
@ -1866,21 +1876,21 @@ class USetElements {
public: public:
/** /**
* Constructs a C++ "range" object over all of the elements of the USet. * Constructs a C++ "range" object over all of the elements of the USet.
* @draft ICU 76 * @draft ICU 77
*/ */
USetElements(const USet *uset) USetElements(const USet *pUset)
: uset(uset), rangeCount(uset_getRangeCount(uset)), : uset(pUset), rangeCount(uset_getRangeCount(pUset)),
stringCount(uset_getStringCount(uset)) {} stringCount(uset_getStringCount(pUset)) {}
/** @draft ICU 76 */ /** @draft ICU 77 */
USetElements(const USetElements &other) = default; USetElements(const USetElements &other) = default;
/** @draft ICU 76 */ /** @draft ICU 77 */
USetElementIterator begin() const { USetElementIterator begin() const {
return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount); return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
} }
/** @draft ICU 76 */ /** @draft ICU 77 */
USetElementIterator end() const { USetElementIterator end() const {
return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount); return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
} }

View file

@ -124,7 +124,7 @@
* @internal * @internal
*/ */
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict);
/** /**
* Function for handling "append code point" with error-checking. * Function for handling "append code point" with error-checking.
@ -148,7 +148,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool
* @internal * @internal
*/ */
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict);
/** /**
* Function for handling "skip backward one code point" with error-checking. * Function for handling "skip backward one code point" with error-checking.

View file

@ -598,12 +598,13 @@ typedef enum UErrorCode {
U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */ U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
U_MF_BAD_OPTION, /**< An option value provided to a function does not have the required form for that option. @internal ICU 77 technology preview @deprecated This API is for technology preview only. */
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the highest normal formatting API error code. * One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/ */
U_FMT_PARSE_ERROR_LIMIT = 0x10120, U_FMT_PARSE_ERROR_LIMIT = 0x10121,
#endif // U_HIDE_DEPRECATED_API #endif // U_HIDE_DEPRECATED_API
/* /*

View file

@ -53,7 +53,7 @@
* This value will change in the subsequent releases of ICU * This value will change in the subsequent releases of ICU
* @stable ICU 2.4 * @stable ICU 2.4
*/ */
#define U_ICU_VERSION_MAJOR_NUM 76 #define U_ICU_VERSION_MAJOR_NUM 77
/** The current ICU minor version as an integer. /** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU * This value will change in the subsequent releases of ICU
@ -79,7 +79,7 @@
* This value will change in the subsequent releases of ICU * This value will change in the subsequent releases of ICU
* @stable ICU 2.6 * @stable ICU 2.6
*/ */
#define U_ICU_VERSION_SUFFIX _76 #define U_ICU_VERSION_SUFFIX _77
/** /**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME * \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -132,7 +132,7 @@
* This value will change in the subsequent releases of ICU * This value will change in the subsequent releases of ICU
* @stable ICU 2.4 * @stable ICU 2.4
*/ */
#define U_ICU_VERSION "76.1" #define U_ICU_VERSION "77.1"
/** /**
* The current ICU library major version number as a string, for library name suffixes. * The current ICU library major version number as a string, for library name suffixes.
@ -145,13 +145,13 @@
* *
* @stable ICU 2.6 * @stable ICU 2.6
*/ */
#define U_ICU_VERSION_SHORT "76" #define U_ICU_VERSION_SHORT "77"
#ifndef U_HIDE_INTERNAL_API #ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C. /** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only * @internal ICU 4.4 Internal Use Only
**/ **/
#define U_ICU_DATA_VERSION "76.1" #define U_ICU_DATA_VERSION "77.1"
#endif /* U_HIDE_INTERNAL_API */ #endif /* U_HIDE_INTERNAL_API */
/*=========================================================================== /*===========================================================================

View file

@ -125,7 +125,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
U_NAMESPACE_USE U_NAMESPACE_USE
# endif # endif
#ifndef U_HIDE_DRAFT_API #ifndef U_FORCE_HIDE_DRAFT_API
/** /**
* \def U_HEADER_NESTED_NAMESPACE * \def U_HEADER_NESTED_NAMESPACE
* Nested namespace used inside U_ICU_NAMESPACE for header-only APIs. * Nested namespace used inside U_ICU_NAMESPACE for header-only APIs.
@ -150,22 +150,37 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* @draft ICU 76 * @draft ICU 76
*/ */
/**
* \def U_ICU_NAMESPACE_OR_INTERNAL
* Namespace used for header-only APIs that used to be regular C++ APIs.
* Different when used inside ICU to prevent public use of internal instantiations.
* Similar to U_HEADER_ONLY_NAMESPACE, but the public definition is the same as U_ICU_NAMESPACE.
* "U_ICU_NAMESPACE" or "U_ICU_NAMESPACE::internal".
*
* @draft ICU 77
*/
// The first test is the same as for defining U_EXPORT for Windows. // The first test is the same as for defining U_EXPORT for Windows.
#if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \ #if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__)) UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
# define U_HEADER_NESTED_NAMESPACE header # define U_HEADER_NESTED_NAMESPACE header
# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ #elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_HEADER_NESTED_NAMESPACE internal # define U_HEADER_NESTED_NAMESPACE internal
# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE::internal
namespace U_ICU_NAMESPACE_OR_INTERNAL {}
using namespace U_ICU_NAMESPACE_OR_INTERNAL;
#else #else
# define U_HEADER_NESTED_NAMESPACE header # define U_HEADER_NESTED_NAMESPACE header
# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE
#endif #endif
#define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE #define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE
namespace U_HEADER_ONLY_NAMESPACE {} namespace U_HEADER_ONLY_NAMESPACE {}
#endif // U_HIDE_DRAFT_API #endif // U_FORCE_HIDE_DRAFT_API
#endif /* __cplusplus */ #endif /* __cplusplus */

View file

@ -1945,6 +1945,13 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
growCapacity = newCapacity; growCapacity = newCapacity;
} else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
growCapacity = US_STACKBUF_SIZE; growCapacity = US_STACKBUF_SIZE;
} else if(newCapacity > growCapacity) {
setToBogus();
return false; // bad inputs
}
if(growCapacity > kMaxCapacity) {
setToBogus();
return false;
} }
// save old values // save old values

View file

@ -2716,6 +2716,9 @@ ures_openWithType(UResourceBundle *r, const char* path, const char* localeID,
UResourceDataEntry *entry; UResourceDataEntry *entry;
if(openType != URES_OPEN_DIRECT) { if(openType != URES_OPEN_DIRECT) {
if (localeID == nullptr) {
localeID = uloc_getDefault();
}
/* first "canonicalize" the locale ID */ /* first "canonicalize" the locale ID */
CharString canonLocaleID = ulocimp_getBaseName(localeID, *status); CharString canonLocaleID = ulocimp_getBaseName(localeID, *status);
if(U_FAILURE(*status)) { if(U_FAILURE(*status)) {
@ -3080,6 +3083,9 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
kwVal.clear(); kwVal.clear();
} }
} }
if (locid == nullptr) {
locid = uloc_getDefault();
}
CharString base = ulocimp_getBaseName(locid, subStatus); CharString base = ulocimp_getBaseName(locid, subStatus);
#if defined(URES_TREE_DEBUG) #if defined(URES_TREE_DEBUG)
fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n", fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n",
@ -3244,7 +3250,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
const char *validLoc = ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus); const char *validLoc = ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus);
if (U_SUCCESS(subStatus) && validLoc != nullptr && validLoc[0] != 0 && uprv_strcmp(validLoc, "root") != 0) { if (U_SUCCESS(subStatus) && validLoc != nullptr && validLoc[0] != 0 && uprv_strcmp(validLoc, "root") != 0) {
CharString validLang = ulocimp_getLanguage(validLoc, subStatus); CharString validLang = ulocimp_getLanguage(validLoc, subStatus);
CharString parentLang = ulocimp_getLanguage(parent.data(), subStatus); CharString parentLang = ulocimp_getLanguage(parent.toStringPiece(), subStatus);
if (U_SUCCESS(subStatus) && validLang != parentLang) { if (U_SUCCESS(subStatus) && validLang != parentLang) {
// validLoc is not root and has a different language than parent, use it instead // validLoc is not root and has a different language than parent, use it instead
found.clear().append(validLoc, subStatus); found.clear().append(validLoc, subStatus);

View file

@ -59,6 +59,9 @@ getCodesFromLocale(const char *locale,
if (U_FAILURE(*err)) { return 0; } if (U_FAILURE(*err)) { return 0; }
icu::CharString lang; icu::CharString lang;
icu::CharString script; icu::CharString script;
if (locale == nullptr) {
locale = uloc_getDefault();
}
ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err); ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err);
if (U_FAILURE(*err)) { return 0; } if (U_FAILURE(*err)) { return 0; }
// Multi-script languages, equivalent to the LocaleScript data // Multi-script languages, equivalent to the LocaleScript data

View file

@ -28,6 +28,7 @@
#include "ubidi_props.h" #include "ubidi_props.h"
#include "uassert.h" #include "uassert.h"
#include <limits>
/* /*
* This implementation is designed for 16-bit Unicode strings. * This implementation is designed for 16-bit Unicode strings.
* The main assumption is that the Arabic characters and their * The main assumption is that the Arabic characters and their
@ -747,6 +748,10 @@ handleGeneratedSpaces(char16_t *dest, int32_t sourceLength,
} }
} }
if (static_cast<size_t>(sourceLength) + 1 > std::numeric_limits<size_t>::max() / U_SIZEOF_UCHAR) {
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
tempbuffer = static_cast<char16_t*>(uprv_malloc((sourceLength + 1) * U_SIZEOF_UCHAR)); tempbuffer = static_cast<char16_t*>(uprv_malloc((sourceLength + 1) * U_SIZEOF_UCHAR));
/* Test for nullptr */ /* Test for nullptr */
if(tempbuffer == nullptr) { if(tempbuffer == nullptr) {

View file

@ -126,7 +126,7 @@ compareEntries(const UHashTok p1, const UHashTok p2) {
name2.pointer = b2->name; name2.pointer = b2->name;
path1.pointer = b1->path; path1.pointer = b1->path;
path2.pointer = b2->path; path2.pointer = b2->path;
return uhash_compareChars(name1, name2) & uhash_compareChars(path1, path2); return uhash_compareChars(name1, name2) && uhash_compareChars(path1, path2);
} }
static void static void

View file

@ -124,11 +124,9 @@ errorValue(int32_t count, int8_t strict) {
* >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., true): * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., true):
* Same as the obsolete "safe" behavior, but non-characters are also treated * Same as the obsolete "safe" behavior, but non-characters are also treated
* like illegal sequences. * like illegal sequences.
*
* Note that a UBool is the same as an int8_t.
*/ */
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict) {
// *pi is one after byte c. // *pi is one after byte c.
int32_t i=*pi; int32_t i=*pi;
// length can be negative for NUL-terminated strings: Read and validate one byte at a time. // length can be negative for NUL-terminated strings: Read and validate one byte at a time.
@ -233,7 +231,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool
} }
U_CAPI UChar32 U_EXPORT2 U_CAPI UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) { utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict) {
// *pi is the index of byte c. // *pi is the index of byte c.
int32_t i=*pi; int32_t i=*pi;
if(U8_IS_TRAIL(c) && i>start) { if(U8_IS_TRAIL(c) && i>start) {

View file

@ -140,7 +140,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_MF_MISSING_SELECTOR_ANNOTATION_ERROR", "U_MF_MISSING_SELECTOR_ANNOTATION_ERROR",
"U_MF_DUPLICATE_DECLARATION_ERROR", "U_MF_DUPLICATE_DECLARATION_ERROR",
"U_MF_OPERAND_MISMATCH_ERROR", "U_MF_OPERAND_MISMATCH_ERROR",
"U_MF_DUPLICATE_VARIANT_ERROR" "U_MF_DUPLICATE_VARIANT_ERROR",
"U_MF_BAD_OPTION"
}; };
static const char * const static const char * const

View file

@ -285,19 +285,19 @@ uhash_equalsScriptSet(const UElement key1, const UElement key2) {
return (*s1 == *s2); return (*s1 == *s2);
} }
U_CAPI int8_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_compareScriptSet(UElement key0, UElement key1) { uhash_compareScriptSet(UElement key0, UElement key1) {
icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
int32_t diff = s0->countMembers() - s1->countMembers(); int32_t diff = s0->countMembers() - s1->countMembers();
if (diff != 0) return static_cast<UBool>(diff); if (diff != 0) return diff;
int32_t i0 = s0->nextSetBit(0); int32_t i0 = s0->nextSetBit(0);
int32_t i1 = s1->nextSetBit(0); int32_t i1 = s1->nextSetBit(0);
while ((diff = i0-i1) == 0 && i0 > 0) { while ((diff = i0-i1) == 0 && i0 > 0) {
i0 = s0->nextSetBit(i0+1); i0 = s0->nextSetBit(i0+1);
i1 = s1->nextSetBit(i1+1); i1 = s1->nextSetBit(i1+1);
} }
return (int8_t)diff; return diff;
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2

View file

@ -74,7 +74,7 @@ class U_I18N_API ScriptSet: public UMemory {
U_NAMESPACE_END U_NAMESPACE_END
U_CAPI UBool U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_compareScriptSet(const UElement key1, const UElement key2); uhash_compareScriptSet(const UElement key1, const UElement key2);
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2

View file

@ -39,6 +39,7 @@ typedef enum ECleanupI18NType {
UCLN_I18N_HEBREW_CALENDAR, UCLN_I18N_HEBREW_CALENDAR,
UCLN_I18N_ASTRO_CALENDAR, UCLN_I18N_ASTRO_CALENDAR,
UCLN_I18N_DANGI_CALENDAR, UCLN_I18N_DANGI_CALENDAR,
UCLN_I18N_PERSIAN_CALENDAR,
UCLN_I18N_CALENDAR, UCLN_I18N_CALENDAR,
UCLN_I18N_TIMEZONEFORMAT, UCLN_I18N_TIMEZONEFORMAT,
UCLN_I18N_TZDBTIMEZONENAMES, UCLN_I18N_TZDBTIMEZONENAMES,
@ -62,6 +63,7 @@ typedef enum ECleanupI18NType {
UCLN_I18N_REGION, UCLN_I18N_REGION,
UCLN_I18N_LIST_FORMATTER, UCLN_I18N_LIST_FORMATTER,
UCLN_I18N_NUMSYS, UCLN_I18N_NUMSYS,
UCLN_I18N_MF2_UNISETS,
UCLN_I18N_COUNT /* This must be last */ UCLN_I18N_COUNT /* This must be last */
} ECleanupI18NType; } ECleanupI18NType;

Binary file not shown.