ICU4C: Update to version 74.1

This commit is contained in:
bruvzg 2023-11-01 08:56:12 +02:00
parent 6afd320984
commit 5e55c6c611
No known key found for this signature in database
GPG key ID: 7960FCF39844EC38
65 changed files with 8399 additions and 6799 deletions

View file

@ -21,6 +21,7 @@
#include "unicode/uscript.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "unicode/rbbi.h"
#include "brkeng.h"
#include "cmemory.h"
@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() {
}
UBool
UnhandledEngine::handles(UChar32 c) const {
UnhandledEngine::handles(UChar32 c, const char* locale) const {
(void)locale; // Unused
return fHandled && fHandled->contains(c);
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t startPos,
int32_t endPos,
UVector32 &/*foundBreaks*/,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
UChar32 c = utext_current32(text);
utext_setNativeIndex(text, startPos);
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
@ -120,41 +123,39 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() {
}
}
U_NAMESPACE_END
U_CDECL_BEGIN
static void U_CALLCONV _deleteEngine(void *obj) {
delete (const icu::LanguageBreakEngine *) obj;
void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
if (fEngines == nullptr) {
LocalPointer<UStack> engines(new UStack(uprv_deleteUObject, nullptr, status), status);
if (U_SUCCESS(status)) {
fEngines = engines.orphan();
}
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
const LanguageBreakEngine *lbe = nullptr;
UErrorCode status = U_ZERO_ERROR;
ensureEngines(status);
if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
return nullptr;
}
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
if (fEngines == nullptr) {
LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
return nullptr;
}
fEngines = engines.orphan();
} else {
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != nullptr && lbe->handles(c)) {
return lbe;
}
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != nullptr && lbe->handles(c, locale)) {
return lbe;
}
}
// We didn't find an engine. Create one.
lbe = loadEngineFor(c);
lbe = loadEngineFor(c, locale);
if (lbe != nullptr) {
fEngines->push((void *)lbe, status);
}
@ -162,7 +163,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
}
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
return nullptr;
}
void ICULanguageBreakFactory::addExternalEngine(
ExternalBreakEngine* external, UErrorCode& status) {
LocalPointer<ExternalBreakEngine> engine(external, status);
ensureEngines(status);
LocalPointer<BreakEngineWrapper> wrapper(
new BreakEngineWrapper(engine.orphan(), status), status);
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
fEngines->push(wrapper.getAlias(), status);
wrapper.orphan();
}
BreakEngineWrapper::BreakEngineWrapper(
ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) {
}
BreakEngineWrapper::~BreakEngineWrapper() {
}
UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const {
return delegate->isFor(c, locale);
}
int32_t BreakEngineWrapper::findBreaks(
UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
int32_t result = 0;
// Find the span of characters included in the set.
// The span to break begins at the current position in the text, and
// extends towards the start or end of the text, depending on 'reverse'.
utext_setNativeIndex(text, startPos);
int32_t start = (int32_t)utext_getNativeIndex(text);
int32_t current;
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
int32_t beforeSize = foundBreaks.size();
int32_t additionalCapacity = rangeEnd - rangeStart + 1;
// enlarge to contains (rangeEnd-rangeStart+1) more items
foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status);
if (U_FAILURE(status)) return 0;
foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity);
result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize,
additionalCapacity, status);
if (U_FAILURE(status)) return 0;
foundBreaks.setSize(beforeSize + result);
utext_setNativeIndex(text, current);
return result;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */