mirror of
https://github.com/godotengine/godot.git
synced 2025-12-08 06:09:55 +00:00
Update HarfBuzz, ICU and FreeType.
HarfBuzz: Update to version 4.2.1 FreeType: Update to version 2.12.1 ICU: Update to version 71.1
This commit is contained in:
parent
7ea8cde983
commit
93fba7ead3
497 changed files with 21141 additions and 3961 deletions
125
thirdparty/icu4c/LICENSE
vendored
125
thirdparty/icu4c/LICENSE
vendored
|
|
@ -1,6 +1,19 @@
|
|||
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
|
||||
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
||||
|
||||
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
|
||||
See Terms of Use <https://www.unicode.org/copyright.html>
|
||||
for definitions of Unicode Inc.’s Data Files and Software.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2022 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
|
@ -32,7 +45,7 @@ shall not be used in advertising or otherwise to promote the sale,
|
|||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
---------------------
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Third-Party Software Licenses
|
||||
|
||||
|
|
@ -40,7 +53,9 @@ This section contains third-party software notices and/or additional
|
|||
terms for licensed third-party software components included within ICU
|
||||
libraries.
|
||||
|
||||
1. ICU License - ICU 1.8.1 to ICU 57.1
|
||||
----------------------------------------------------------------------
|
||||
|
||||
ICU License - ICU 1.8.1 to ICU 57.1
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
|
|
@ -75,7 +90,9 @@ of the copyright holder.
|
|||
All trademarks and registered trademarks mentioned herein are the
|
||||
property of their respective owners.
|
||||
|
||||
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
|
||||
|
||||
# The Google Chrome software developed by Google is licensed under
|
||||
# the BSD license. Other software included in this distribution is
|
||||
|
|
@ -279,7 +296,9 @@ property of their respective owners.
|
|||
#
|
||||
# ---------------COPYING.ipadic-----END----------------------------------
|
||||
|
||||
3. Lao Word Break Dictionary Data (laodict.txt)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Lao Word Break Dictionary Data (laodict.txt)
|
||||
|
||||
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
|
@ -319,7 +338,9 @@ property of their respective owners.
|
|||
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
4. Burmese Word Break Dictionary Data (burmesedict.txt)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Burmese Word Break Dictionary Data (burmesedict.txt)
|
||||
|
||||
# Copyright (c) 2014 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
|
|
@ -359,7 +380,9 @@ property of their respective owners.
|
|||
# SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
5. Time Zone Database
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Time Zone Database
|
||||
|
||||
ICU uses the public domain data and code derived from Time Zone
|
||||
Database for its time zone support. The ownership of the TZ database
|
||||
|
|
@ -382,7 +405,9 @@ Database section 7.
|
|||
# making a contribution to the database or code waives all rights to
|
||||
# future claims in that contribution or in the TZ Database.
|
||||
|
||||
6. Google double-conversion
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Google double-conversion
|
||||
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
|
@ -410,3 +435,85 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
File: aclocal.m4 (only for ICU4C)
|
||||
Section: pkg.m4 - Macros to locate and utilise pkg-config.
|
||||
|
||||
|
||||
Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
|
||||
Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||
02111-1307, USA.
|
||||
|
||||
As a special exception to the GNU General Public License, if you
|
||||
distribute this file as part of a program that contains a
|
||||
configuration script generated by Autoconf, you may include it under
|
||||
the same distribution terms that you use for the rest of that
|
||||
program.
|
||||
|
||||
|
||||
(The condition for the exception is fulfilled because
|
||||
ICU4C includes a configuration script generated by Autoconf,
|
||||
namely the `configure` script.)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
File: config.guess (only for ICU4C)
|
||||
|
||||
|
||||
This file is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
As a special exception to the GNU General Public License, if you
|
||||
distribute this file as part of a program that contains a
|
||||
configuration script generated by Autoconf, you may include it under
|
||||
the same distribution terms that you use for the rest of that
|
||||
program. This Exception is an additional permission under section 7
|
||||
of the GNU General Public License, version 3 ("GPLv3").
|
||||
|
||||
|
||||
(The condition for the exception is fulfilled because
|
||||
ICU4C includes a configuration script generated by Autoconf,
|
||||
namely the `configure` script.)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
File: install-sh (only for ICU4C)
|
||||
|
||||
|
||||
Copyright 1991 by the Massachusetts Institute of Technology
|
||||
|
||||
Permission to use, copy, modify, distribute, and sell this software and its
|
||||
documentation for any purpose is hereby granted without fee, provided that
|
||||
the above copyright notice appear in all copies and that both that
|
||||
copyright notice and this permission notice appear in supporting
|
||||
documentation, and that the name of M.I.T. not be used in advertising or
|
||||
publicity pertaining to distribution of the software without specific,
|
||||
written prior permission. M.I.T. makes no representations about the
|
||||
suitability of this software for any purpose. It is provided "as is"
|
||||
without express or implied warranty.
|
||||
|
|
|
|||
1
thirdparty/icu4c/common/brkeng.cpp
vendored
1
thirdparty/icu4c/common/brkeng.cpp
vendored
|
|
@ -79,6 +79,7 @@ UnhandledEngine::findBreaks( UText *text,
|
|||
int32_t /* startPos */,
|
||||
int32_t endPos,
|
||||
UVector32 &/*foundBreaks*/,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
UChar32 c = utext_current32(text);
|
||||
|
|
|
|||
2
thirdparty/icu4c/common/brkeng.h
vendored
2
thirdparty/icu4c/common/brkeng.h
vendored
|
|
@ -75,6 +75,7 @@ class LanguageBreakEngine : public UMemory {
|
|||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode &status) const = 0;
|
||||
|
||||
};
|
||||
|
|
@ -194,6 +195,7 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode &status) const override;
|
||||
|
||||
/**
|
||||
|
|
|
|||
31
thirdparty/icu4c/common/brkiter.cpp
vendored
31
thirdparty/icu4c/common/brkiter.cpp
vendored
|
|
@ -30,6 +30,7 @@
|
|||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/filteredbrk.h"
|
||||
#include "bytesinkutil.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
|
|
@ -115,7 +116,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
|
|||
}
|
||||
|
||||
// Create a RuleBasedBreakIterator
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status);
|
||||
|
||||
// If there is a result, set the valid locale and actual locale, and the kind
|
||||
if (U_SUCCESS(status) && result != NULL) {
|
||||
|
|
@ -408,7 +409,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
char lbType[kKeyValueLenMax];
|
||||
|
||||
BreakIterator *result = NULL;
|
||||
switch (kind) {
|
||||
|
|
@ -428,18 +428,29 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||
break;
|
||||
case UBRK_LINE:
|
||||
{
|
||||
char lb_lw[kKeyValueLenMax];
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
|
||||
uprv_strcpy(lbType, "line");
|
||||
char lbKeyValue[kKeyValueLenMax] = {0};
|
||||
uprv_strcpy(lb_lw, "line");
|
||||
UErrorCode kvStatus = U_ZERO_ERROR;
|
||||
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
|
||||
uprv_strcat(lbType, "_");
|
||||
uprv_strcat(lbType, lbKeyValue);
|
||||
CharString value;
|
||||
CharStringByteSink valueSink(&value);
|
||||
loc.getKeywordValue("lb", valueSink, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
|
||||
uprv_strcat(lb_lw, "_");
|
||||
uprv_strcat(lb_lw, value.data());
|
||||
}
|
||||
result = BreakIterator::buildInstance(loc, lbType, status);
|
||||
// lw=phrase is only supported in Japanese.
|
||||
if (uprv_strcmp(loc.getLanguage(), "ja") == 0) {
|
||||
value.clear();
|
||||
loc.getKeywordValue("lw", valueSink, kvStatus);
|
||||
if (U_SUCCESS(kvStatus) && value == "phrase") {
|
||||
uprv_strcat(lb_lw, "_");
|
||||
uprv_strcat(lb_lw, value.data());
|
||||
}
|
||||
}
|
||||
result = BreakIterator::buildInstance(loc, lb_lw, status);
|
||||
|
||||
UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
|
||||
UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
|
||||
UTRACE_EXIT_STATUS(status);
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
165
thirdparty/icu4c/common/dictbe.cpp
vendored
165
thirdparty/icu4c/common/dictbe.cpp
vendored
|
|
@ -17,7 +17,10 @@
|
|||
#include "dictbe.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "ubrkimpl.h"
|
||||
#include "utracimp.h"
|
||||
#include "uvectr32.h"
|
||||
#include "uvector.h"
|
||||
|
|
@ -48,6 +51,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
|
|||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
(void)startPos; // TODO: remove this param?
|
||||
|
|
@ -68,7 +72,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
|
|||
}
|
||||
rangeStart = start;
|
||||
rangeEnd = current;
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, status);
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, isPhraseBreaking, status);
|
||||
utext_setNativeIndex(text, current);
|
||||
|
||||
return result;
|
||||
|
|
@ -199,13 +203,13 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
|
|||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
|
||||
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
|
||||
UnicodeSet thaiWordSet(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fThaiWordSet);
|
||||
setCharacters(thaiWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fThaiWordSet;
|
||||
fEndWordSet = thaiWordSet;
|
||||
fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
|
||||
fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
|
||||
fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
|
||||
|
|
@ -230,6 +234,7 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
|
|
@ -441,13 +446,13 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
|
|||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
|
||||
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
|
||||
UnicodeSet laoWordSet(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fLaoWordSet);
|
||||
setCharacters(laoWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fLaoWordSet;
|
||||
fEndWordSet = laoWordSet;
|
||||
fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
|
||||
fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
|
||||
fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
|
||||
|
|
@ -469,6 +474,7 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
|
||||
|
|
@ -637,14 +643,13 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
|
|||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
|
||||
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fBurmeseWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fBurmeseWordSet;
|
||||
fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels
|
||||
fEndWordSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fEndWordSet);
|
||||
}
|
||||
|
||||
// Compact for caching.
|
||||
fMarkSet.compact();
|
||||
|
|
@ -662,6 +667,7 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status ) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
|
||||
|
|
@ -830,13 +836,13 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
|
|||
{
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
|
||||
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
|
||||
UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fKhmerWordSet);
|
||||
setCharacters(khmerWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fKhmerWordSet;
|
||||
fEndWordSet = khmerWordSet;
|
||||
fBeginWordSet.add(0x1780, 0x17B3);
|
||||
//fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
|
||||
//fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
|
||||
|
|
@ -867,6 +873,7 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status ) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
|
||||
|
|
@ -1050,25 +1057,27 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType
|
|||
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
|
||||
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
|
||||
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
|
||||
// Korean dictionary only includes Hangul syllables
|
||||
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
|
||||
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
|
||||
fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
|
||||
fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
|
||||
nfkcNorm2 = Normalizer2::getNFKCInstance(status);
|
||||
// Korean dictionary only includes Hangul syllables
|
||||
fHangulWordSet.applyPattern(UnicodeString(u"[\\uac00-\\ud7a3]"), status);
|
||||
fHangulWordSet.compact();
|
||||
// Digits, open puncutation and Alphabetic characters.
|
||||
fDigitOrOpenPunctuationOrAlphabetSet.applyPattern(
|
||||
UnicodeString(u"[[:Nd:][:Pi:][:Ps:][:Alphabetic:]]"), status);
|
||||
fDigitOrOpenPunctuationOrAlphabetSet.compact();
|
||||
fClosePunctuationSet.applyPattern(UnicodeString(u"[[:Pc:][:Pd:][:Pe:][:Pf:][:Po:]]"), status);
|
||||
fClosePunctuationSet.compact();
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
// handle Korean and Japanese/Chinese using different dictionaries
|
||||
if (type == kKorean) {
|
||||
// handle Korean and Japanese/Chinese using different dictionaries
|
||||
if (type == kKorean) {
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fHangulWordSet);
|
||||
} else { //Chinese and Japanese
|
||||
UnicodeSet cjSet;
|
||||
cjSet.addAll(fHanWordSet);
|
||||
cjSet.addAll(fKatakanaWordSet);
|
||||
cjSet.addAll(fHiraganaWordSet);
|
||||
cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
}
|
||||
} else { //Chinese and Japanese
|
||||
UnicodeSet cjSet(UnicodeString(u"[[:Han:][:Hiragana:][:Katakana:]\\u30fc\\uff70\\uff9e\\uff9f]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(cjSet);
|
||||
initJapanesePhraseParameter(status);
|
||||
}
|
||||
}
|
||||
UTRACE_EXIT_STATUS(status);
|
||||
|
|
@ -1096,14 +1105,12 @@ static inline bool isKatakana(UChar32 value) {
|
|||
(value >= 0xFF66 && value <= 0xFF9f);
|
||||
}
|
||||
|
||||
|
||||
// Function for accessing internal utext flags.
|
||||
// Replicates an internal UText function.
|
||||
|
||||
static inline int32_t utext_i32_flag(int32_t bitIndex) {
|
||||
return (int32_t)1 << bitIndex;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* @param text A UText representing the text
|
||||
|
|
@ -1117,6 +1124,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
if (rangeStart >= rangeEnd) {
|
||||
|
|
@ -1347,6 +1355,31 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
|||
if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
|
||||
t_boundary.addElement(numCodePts, status);
|
||||
numBreaks++;
|
||||
} else if (isPhraseBreaking) {
|
||||
t_boundary.addElement(numCodePts, status);
|
||||
if(U_SUCCESS(status)) {
|
||||
numBreaks++;
|
||||
int32_t prevIdx = numCodePts;
|
||||
|
||||
int32_t codeUnitIdx = -1;
|
||||
int32_t prevCodeUnitIdx = -1;
|
||||
int32_t length = -1;
|
||||
for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) {
|
||||
codeUnitIdx = inString.moveIndex32(0, i);
|
||||
prevCodeUnitIdx = inString.moveIndex32(0, prevIdx);
|
||||
// Calculate the length by using the code unit.
|
||||
length = prevCodeUnitIdx - codeUnitIdx;
|
||||
prevIdx = i;
|
||||
// Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
|
||||
// characters don't occur.
|
||||
if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
|
||||
&& (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1)))
|
||||
|| !isKatakana(inString.char32At(codeUnitIdx)))) {
|
||||
t_boundary.addElement(i, status);
|
||||
numBreaks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
|
||||
t_boundary.addElement(i, status);
|
||||
|
|
@ -1367,7 +1400,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
|||
// while reversing t_boundary and pushing values to foundBreaks.
|
||||
int32_t prevCPPos = -1;
|
||||
int32_t prevUTextPos = -1;
|
||||
for (int32_t i = numBreaks-1; i >= 0; i--) {
|
||||
int32_t correctedNumBreaks = 0;
|
||||
for (int32_t i = numBreaks - 1; i >= 0; i--) {
|
||||
int32_t cpPos = t_boundary.elementAti(i);
|
||||
U_ASSERT(cpPos > prevCPPos);
|
||||
int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
|
||||
|
|
@ -1375,7 +1409,15 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
|||
if (utextPos > prevUTextPos) {
|
||||
// Boundaries are added to foundBreaks output in ascending order.
|
||||
U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
|
||||
foundBreaks.push(utextPos, status);
|
||||
// In phrase breaking, there has to be a breakpoint between Cj character and close
|
||||
// punctuation.
|
||||
// E.g.[携帯電話]正しい選択 -> [携帯▁電話]▁正しい▁選択 -> breakpoint between ] and 正
|
||||
if (utextPos != rangeStart
|
||||
|| (isPhraseBreaking && utextPos > 0
|
||||
&& fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) {
|
||||
foundBreaks.push(utextPos, status);
|
||||
correctedNumBreaks++;
|
||||
}
|
||||
} else {
|
||||
// Normalization expanded the input text, the dictionary found a boundary
|
||||
// within the expansion, giving two boundaries with the same index in the
|
||||
|
|
@ -1387,9 +1429,52 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
|||
}
|
||||
(void)prevCPPos; // suppress compiler warnings about unused variable
|
||||
|
||||
UChar32 nextChar = utext_char32At(inText, rangeEnd);
|
||||
if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
|
||||
// In phrase breaking, there has to be a breakpoint between Cj character and
|
||||
// the number/open punctuation.
|
||||
// E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「
|
||||
// E.g. 乗車率90%程度だろうか -> 乗車▁率▁90%▁程度だろうか -> breakpoint between 率 and 9
|
||||
// E.g. しかもロゴがUnicode! -> しかも▁ロゴが▁Unicode!-> breakpoint between が and U
|
||||
if (isPhraseBreaking) {
|
||||
if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) {
|
||||
foundBreaks.popi();
|
||||
correctedNumBreaks--;
|
||||
}
|
||||
} else {
|
||||
foundBreaks.popi();
|
||||
correctedNumBreaks--;
|
||||
}
|
||||
}
|
||||
|
||||
// inString goes out of scope
|
||||
// inputMap goes out of scope
|
||||
return numBreaks;
|
||||
return correctedNumBreaks;
|
||||
}
|
||||
|
||||
void CjkBreakEngine::initJapanesePhraseParameter(UErrorCode& error) {
|
||||
loadJapaneseExtensions(error);
|
||||
loadHiragana(error);
|
||||
}
|
||||
|
||||
void CjkBreakEngine::loadJapaneseExtensions(UErrorCode& error) {
|
||||
const char* tag = "extensions";
|
||||
ResourceBundle ja(U_ICUDATA_BRKITR, "ja", error);
|
||||
if (U_SUCCESS(error)) {
|
||||
ResourceBundle bundle = ja.get(tag, error);
|
||||
while (U_SUCCESS(error) && bundle.hasNext()) {
|
||||
fSkipSet.puti(bundle.getNextString(error), 1, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CjkBreakEngine::loadHiragana(UErrorCode& error) {
|
||||
UnicodeSet hiraganaWordSet(UnicodeString(u"[:Hiragana:]"), error);
|
||||
hiraganaWordSet.compact();
|
||||
UnicodeSetIterator iterator(hiraganaWordSet);
|
||||
while (iterator.next()) {
|
||||
fSkipSet.puti(UnicodeString(iterator.getCodepoint()), 1, error);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
27
thirdparty/icu4c/common/dictbe.h
vendored
27
thirdparty/icu4c/common/dictbe.h
vendored
|
|
@ -15,6 +15,7 @@
|
|||
#include "unicode/utext.h"
|
||||
|
||||
#include "brkeng.h"
|
||||
#include "hash.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
|
@ -80,6 +81,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status ) const override;
|
||||
|
||||
protected:
|
||||
|
|
@ -105,6 +107,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const = 0;
|
||||
|
||||
};
|
||||
|
|
@ -127,7 +130,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
|
|||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fThaiWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fSuffixSet;
|
||||
|
|
@ -164,6 +166,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
|
@ -186,7 +189,6 @@ class LaoBreakEngine : public DictionaryBreakEngine {
|
|||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fLaoWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
|
|
@ -222,6 +224,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
|
@ -244,7 +247,6 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
|
|||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fBurmeseWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
|
|
@ -280,6 +282,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
|
@ -302,7 +305,6 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
|
|||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fKhmerWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fMarkSet;
|
||||
|
|
@ -338,6 +340,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
|
@ -366,13 +369,22 @@ class CjkBreakEngine : public DictionaryBreakEngine {
|
|||
* @internal
|
||||
*/
|
||||
UnicodeSet fHangulWordSet;
|
||||
UnicodeSet fHanWordSet;
|
||||
UnicodeSet fKatakanaWordSet;
|
||||
UnicodeSet fHiraganaWordSet;
|
||||
UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;
|
||||
UnicodeSet fClosePunctuationSet;
|
||||
|
||||
DictionaryMatcher *fDictionary;
|
||||
const Normalizer2 *nfkcNorm2;
|
||||
|
||||
private:
|
||||
// Load Japanese extensions.
|
||||
void loadJapaneseExtensions(UErrorCode& error);
|
||||
// Load Japanese Hiragana.
|
||||
void loadHiragana(UErrorCode& error);
|
||||
// Initialize fSkipSet by loading Japanese Hiragana and extensions.
|
||||
void initJapanesePhraseParameter(UErrorCode& error);
|
||||
|
||||
Hashtable fSkipSet;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
|
|
@ -404,6 +416,7 @@ class CjkBreakEngine : public DictionaryBreakEngine {
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
|
||||
};
|
||||
|
|
|
|||
42
thirdparty/icu4c/common/localematcher.cpp
vendored
42
thirdparty/icu4c/common/localematcher.cpp
vendored
|
|
@ -168,12 +168,9 @@ void LocaleMatcher::Builder::clearSupportedLocales() {
|
|||
bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
|
||||
if (U_FAILURE(errorCode_)) { return false; }
|
||||
if (supportedLocales_ != nullptr) { return true; }
|
||||
supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
|
||||
LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
|
||||
if (U_FAILURE(errorCode_)) { return false; }
|
||||
if (supportedLocales_ == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
supportedLocales_ = lpSupportedLocales.orphan();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -187,9 +184,8 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
|
|||
for (int32_t i = 0; i < length; ++i) {
|
||||
Locale *locale = list.orphanLocaleAt(i);
|
||||
if (locale == nullptr) { continue; }
|
||||
supportedLocales_->addElementX(locale, errorCode_);
|
||||
supportedLocales_->adoptElement(locale, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete locale;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -197,35 +193,21 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
|
|||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
clearSupportedLocales();
|
||||
if (!ensureSupportedLocaleVector()) { return *this; }
|
||||
while (locales.hasNext()) {
|
||||
const Locale &locale = locales.next();
|
||||
Locale *clone = locale.clone();
|
||||
if (clone == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
supportedLocales_->addElementX(clone, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete clone;
|
||||
break;
|
||||
if (ensureSupportedLocaleVector()) {
|
||||
clearSupportedLocales();
|
||||
while (locales.hasNext() && U_SUCCESS(errorCode_)) {
|
||||
const Locale &locale = locales.next();
|
||||
LocalPointer<Locale> clone (locale.clone(), errorCode_);
|
||||
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
|
||||
if (!ensureSupportedLocaleVector()) { return *this; }
|
||||
Locale *clone = locale.clone();
|
||||
if (clone == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
supportedLocales_->addElementX(clone, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete clone;
|
||||
if (ensureSupportedLocaleVector()) {
|
||||
LocalPointer<Locale> clone(locale.clone(), errorCode_);
|
||||
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
|
|
|||
18
thirdparty/icu4c/common/locid.cpp
vendored
18
thirdparty/icu4c/common/locid.cpp
vendored
|
|
@ -1204,14 +1204,11 @@ AliasReplacer::parseLanguageReplacement(
|
|||
// We have multiple field so we have to allocate and parse
|
||||
CharString* str = new CharString(
|
||||
replacement, (int32_t)uprv_strlen(replacement), status);
|
||||
LocalPointer<CharString> lpStr(str, status);
|
||||
toBeFreed.adoptElement(lpStr.orphan(), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (str == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
toBeFreed.addElementX(str, status);
|
||||
char* data = str->data();
|
||||
replacedLanguage = (const char*) data;
|
||||
char* endOfField = uprv_strchr(data, '_');
|
||||
|
|
@ -1420,12 +1417,9 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
|
|||
(int32_t)(firstSpace - replacement), status), status);
|
||||
}
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
if (item.isNull()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
replacedRegion = item->data();
|
||||
toBeFreed.addElementX(item.orphan(), status);
|
||||
toBeFreed.adoptElement(item.orphan(), status);
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
}
|
||||
U_ASSERT(!same(region, replacedRegion));
|
||||
region = replacedRegion;
|
||||
|
|
@ -1659,10 +1653,10 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
|||
while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
|
||||
U_SUCCESS(status)) {
|
||||
*end = NULL_CHAR; // null terminate inside variantsBuff
|
||||
variants.addElementX(start, status);
|
||||
variants.addElement(start, status);
|
||||
start = end + 1;
|
||||
}
|
||||
variants.addElementX(start, status);
|
||||
variants.addElement(start, status);
|
||||
}
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
|
||||
|
|
|
|||
3
thirdparty/icu4c/common/lstmbe.cpp
vendored
3
thirdparty/icu4c/common/lstmbe.cpp
vendored
|
|
@ -1,8 +1,8 @@
|
|||
// © 2021 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include <complex>
|
||||
#include <utility>
|
||||
#include <ctgmath>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
|
@ -639,6 +639,7 @@ LSTMBreakEngine::divideUpDictionaryRange( UText *text,
|
|||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
int32_t beginFoundBreakSize = foundBreaks.size();
|
||||
|
|
|
|||
1
thirdparty/icu4c/common/lstmbe.h
vendored
1
thirdparty/icu4c/common/lstmbe.h
vendored
|
|
@ -62,6 +62,7 @@ protected:
|
|||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const override;
|
||||
private:
|
||||
const LSTMData* fData;
|
||||
|
|
|
|||
11
thirdparty/icu4c/common/normalizer2impl.cpp
vendored
11
thirdparty/icu4c/common/normalizer2impl.cpp
vendored
|
|
@ -2496,15 +2496,18 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
|
|||
// origin is not the first character, or it is U+0000.
|
||||
UnicodeSet *set;
|
||||
if((canonValue&CANON_HAS_SET)==0) {
|
||||
set=new UnicodeSet;
|
||||
if(set==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
LocalPointer<UnicodeSet> lpSet(new UnicodeSet, errorCode);
|
||||
set=lpSet.getAlias();
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
|
||||
canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
|
||||
umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
|
||||
canonStartSets.addElementX(set, errorCode);
|
||||
canonStartSets.adoptElement(lpSet.orphan(), errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if(firstOrigin!=0) {
|
||||
set->add(firstOrigin);
|
||||
}
|
||||
|
|
|
|||
14
thirdparty/icu4c/common/rbbi.cpp
vendored
14
thirdparty/icu4c/common/rbbi.cpp
vendored
|
|
@ -82,6 +82,19 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
|
|||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// Constructor from a UDataMemory handle to precompiled break rules
|
||||
// stored in an ICU data file. This construcotr is private API,
|
||||
// only for internal use.
|
||||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UBool isPhraseBreaking,
|
||||
UErrorCode &status) : RuleBasedBreakIterator(udm, status)
|
||||
{
|
||||
fIsPhraseBreaking = isPhraseBreaking;
|
||||
}
|
||||
|
||||
//
|
||||
// Construct from precompiled binary rules (tables). This constructor is public API,
|
||||
// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
|
||||
|
|
@ -322,6 +335,7 @@ void RuleBasedBreakIterator::init(UErrorCode &status) {
|
|||
fBreakCache = nullptr;
|
||||
fDictionaryCache = nullptr;
|
||||
fLookAheadMatches = nullptr;
|
||||
fIsPhraseBreaking = false;
|
||||
|
||||
// Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
|
||||
// fText = UTEXT_INITIALIZER;
|
||||
|
|
|
|||
2
thirdparty/icu4c/common/rbbi_cache.cpp
vendored
2
thirdparty/icu4c/common/rbbi_cache.cpp
vendored
|
|
@ -163,7 +163,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
|
|||
// Ask the language object if there are any breaks. It will add them to the cache and
|
||||
// leave the text pointer on the other side of its range, ready to search for the next one.
|
||||
if (lbe != NULL) {
|
||||
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, status);
|
||||
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status);
|
||||
}
|
||||
|
||||
// Reload the loop variables for the next go-round
|
||||
|
|
|
|||
5
thirdparty/icu4c/common/serv.cpp
vendored
5
thirdparty/icu4c/common/serv.cpp
vendored
|
|
@ -625,10 +625,7 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC
|
|||
}
|
||||
}
|
||||
|
||||
LocalPointer<UnicodeString> idClone(new UnicodeString(*id), status);
|
||||
if (U_SUCCESS(status) && idClone->isBogus()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
LocalPointer<UnicodeString> idClone(id->clone(), status);
|
||||
result.adoptElement(idClone.orphan(), status);
|
||||
}
|
||||
delete fallbackKey;
|
||||
|
|
|
|||
3
thirdparty/icu4c/common/servls.cpp
vendored
3
thirdparty/icu4c/common/servls.cpp
vendored
|
|
@ -179,7 +179,8 @@ private:
|
|||
|
||||
length = other._ids.size();
|
||||
for(i = 0; i < length; ++i) {
|
||||
_ids.addElementX(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
|
||||
LocalPointer<UnicodeString> clonedId(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
|
||||
_ids.adoptElement(clonedId.orphan(), status);
|
||||
}
|
||||
|
||||
if(U_SUCCESS(status)) {
|
||||
|
|
|
|||
18
thirdparty/icu4c/common/servnotf.cpp
vendored
18
thirdparty/icu4c/common/servnotf.cpp
vendored
|
|
@ -49,7 +49,11 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
|
|||
if (acceptsListener(*l)) {
|
||||
Mutex lmx(¬ifyLock);
|
||||
if (listeners == NULL) {
|
||||
listeners = new UVector(5, status);
|
||||
LocalPointer<UVector> lpListeners(new UVector(5, status), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
listeners = lpListeners.orphan();
|
||||
} else {
|
||||
for (int i = 0, e = listeners->size(); i < e; ++i) {
|
||||
const EventListener* el = (const EventListener*)(listeners->elementAt(i));
|
||||
|
|
@ -59,7 +63,7 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
|
|||
}
|
||||
}
|
||||
|
||||
listeners->addElementX((void*)l, status); // cast away const
|
||||
listeners->addElement((void*)l, status); // cast away const
|
||||
}
|
||||
#ifdef NOTIFIER_DEBUG
|
||||
else {
|
||||
|
|
@ -102,13 +106,11 @@ ICUNotifier::removeListener(const EventListener *l, UErrorCode& status)
|
|||
void
|
||||
ICUNotifier::notifyChanged(void)
|
||||
{
|
||||
Mutex lmx(¬ifyLock);
|
||||
if (listeners != NULL) {
|
||||
Mutex lmx(¬ifyLock);
|
||||
if (listeners != NULL) {
|
||||
for (int i = 0, e = listeners->size(); i < e; ++i) {
|
||||
EventListener* el = (EventListener*)listeners->elementAt(i);
|
||||
notifyListener(*el);
|
||||
}
|
||||
for (int i = 0, e = listeners->size(); i < e; ++i) {
|
||||
EventListener* el = (EventListener*)listeners->elementAt(i);
|
||||
notifyListener(*el);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
12
thirdparty/icu4c/common/ubrk.cpp
vendored
12
thirdparty/icu4c/common/ubrk.cpp
vendored
|
|
@ -168,7 +168,7 @@ ubrk_safeClone(
|
|||
BreakIterator *newBI = ((BreakIterator *)bi)->clone();
|
||||
if (newBI == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
} else if (pBufferSize != NULL) {
|
||||
*status = U_SAFECLONE_ALLOCATED_WARNING;
|
||||
}
|
||||
return (UBreakIterator *)newBI;
|
||||
|
|
@ -176,15 +176,7 @@ ubrk_safeClone(
|
|||
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return nullptr;
|
||||
}
|
||||
BreakIterator *newBI = ((BreakIterator *)bi)->clone();
|
||||
if (newBI == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
return (UBreakIterator *)newBI;
|
||||
return ubrk_safeClone(bi, nullptr, nullptr, status);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
40
thirdparty/icu4c/common/ucase.cpp
vendored
40
thirdparty/icu4c/common/ucase.cpp
vendored
|
|
@ -22,27 +22,14 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/udata.h" /* UDataInfo */
|
||||
#include "unicode/utf16.h"
|
||||
#include "ucmndata.h" /* DataHeader */
|
||||
#include "udatamem.h"
|
||||
#include "umutex.h"
|
||||
#include "uassert.h"
|
||||
#include "cmemory.h"
|
||||
#include "utrie2.h"
|
||||
#include "uassert.h"
|
||||
#include "ucase.h"
|
||||
#include "umutex.h"
|
||||
#include "utrie2.h"
|
||||
|
||||
struct UCaseProps {
|
||||
UDataMemory *mem;
|
||||
const int32_t *indexes;
|
||||
const uint16_t *exceptions;
|
||||
const uint16_t *unfold;
|
||||
|
||||
UTrie2 trie;
|
||||
uint8_t formatVersion[4];
|
||||
};
|
||||
|
||||
/* ucase_props_data.h is machine-generated by gencase --csource */
|
||||
/* ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp */
|
||||
#define INCLUDED_FROM_UCASE_CPP
|
||||
#include "ucase_props_data.h"
|
||||
|
||||
|
|
@ -77,6 +64,13 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
|
||||
/* data access primitives --------------------------------------------------- */
|
||||
|
||||
U_CAPI const struct UCaseProps * U_EXPORT2
|
||||
ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength) {
|
||||
*pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions);
|
||||
*pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold);
|
||||
return &ucase_props_singleton;
|
||||
}
|
||||
|
||||
U_CFUNC const UTrie2 * U_EXPORT2
|
||||
ucase_getTrie() {
|
||||
return &ucase_props_singleton.trie;
|
||||
|
|
@ -690,7 +684,7 @@ ucase_isCaseSensitive(UChar32 c) {
|
|||
* - The general category of C is
|
||||
* Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
|
||||
* Letter Modifier (Lm), or Symbol Modifier (Sk)
|
||||
* - C is one of the following characters
|
||||
* - C is one of the following characters
|
||||
* U+0027 APOSTROPHE
|
||||
* U+00AD SOFT HYPHEN (SHY)
|
||||
* U+2019 RIGHT SINGLE QUOTATION MARK
|
||||
|
|
@ -1064,6 +1058,8 @@ ucase_toFullLower(UChar32 c,
|
|||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
// Reset the output pointer in case it was uninitialized.
|
||||
*pString=nullptr;
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_IS_UPPER_OR_TITLE(props)) {
|
||||
|
|
@ -1148,7 +1144,6 @@ ucase_toFullLower(UChar32 c,
|
|||
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
|
||||
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
|
||||
*/
|
||||
*pString=nullptr;
|
||||
return 0; /* remove the dot (continue without output) */
|
||||
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
|
||||
/*
|
||||
|
|
@ -1215,6 +1210,8 @@ toUpperOrTitle(UChar32 c,
|
|||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
// Reset the output pointer in case it was uninitialized.
|
||||
*pString=nullptr;
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
|
||||
|
|
@ -1252,7 +1249,6 @@ toUpperOrTitle(UChar32 c,
|
|||
|
||||
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||
*/
|
||||
*pString=nullptr;
|
||||
return 0; /* remove the dot (continue without output) */
|
||||
} else if(c==0x0587) {
|
||||
// See ICU-13416:
|
||||
|
|
@ -1449,6 +1445,8 @@ ucase_toFullFolding(UChar32 c,
|
|||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
// Reset the output pointer in case it was uninitialized.
|
||||
*pString=nullptr;
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_IS_UPPER_OR_TITLE(props)) {
|
||||
|
|
@ -1542,7 +1540,7 @@ U_CAPI UChar32 U_EXPORT2
|
|||
u_tolower(UChar32 c) {
|
||||
return ucase_tolower(c);
|
||||
}
|
||||
|
||||
|
||||
/* Transforms the Unicode character to its upper case equivalent.*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
u_toupper(UChar32 c) {
|
||||
|
|
|
|||
15
thirdparty/icu4c/common/ucase.h
vendored
15
thirdparty/icu4c/common/ucase.h
vendored
|
|
@ -312,6 +312,21 @@ UCaseMapFull(UChar32 c,
|
|||
|
||||
U_CDECL_END
|
||||
|
||||
/* for icuexportdata -------------------------------------------------------- */
|
||||
|
||||
struct UCaseProps {
|
||||
void *mem; // TODO: was unused, and type UDataMemory -- remove
|
||||
const int32_t *indexes;
|
||||
const uint16_t *exceptions;
|
||||
const uint16_t *unfold;
|
||||
|
||||
UTrie2 trie;
|
||||
uint8_t formatVersion[4];
|
||||
};
|
||||
|
||||
U_CAPI const struct UCaseProps * U_EXPORT2
|
||||
ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength);
|
||||
|
||||
/* file definitions --------------------------------------------------------- */
|
||||
|
||||
#define UCASE_DATA_NAME "ucase"
|
||||
|
|
|
|||
115
thirdparty/icu4c/common/ucasemap.cpp
vendored
115
thirdparty/icu4c/common/ucasemap.cpp
vendored
|
|
@ -112,8 +112,7 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
|
|||
if(length==sizeof(csm->locale)) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
csm->caseLocale=UCASE_LOC_UNKNOWN;
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
csm->caseLocale = ucase_getCaseLocale(csm->locale);
|
||||
} else {
|
||||
csm->locale[0]=0;
|
||||
|
|
@ -420,6 +419,97 @@ void toUpper(int32_t caseLocale, uint32_t options,
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint8_t ACUTE_BYTE0 = u8"\u0301"[0];
|
||||
|
||||
constexpr uint8_t ACUTE_BYTE1 = u8"\u0301"[1];
|
||||
|
||||
/**
|
||||
* Input: c is a letter I with or without acute accent.
|
||||
* start is the index in src after c, and is less than segmentLimit.
|
||||
* If a plain i/I is followed by a plain j/J,
|
||||
* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
|
||||
* then we output accordingly.
|
||||
*
|
||||
* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
|
||||
*/
|
||||
int32_t maybeTitleDutchIJ(const uint8_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
|
||||
ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
|
||||
U_ASSERT(start < segmentLimit);
|
||||
|
||||
int32_t index = start;
|
||||
bool withAcute = false;
|
||||
|
||||
// If the conditions are met, then the following variables tell us what to output.
|
||||
int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
|
||||
bool doTitleJ = false; // true if the j needs to be titlecased
|
||||
int32_t unchanged2 = 0; // after the j (0 or 1)
|
||||
|
||||
// next character after the first letter
|
||||
UChar32 c2;
|
||||
c2 = src[index++];
|
||||
|
||||
// Is the first letter an i/I with accent?
|
||||
if (c == u'I') {
|
||||
if (c2 == ACUTE_BYTE0 && index < segmentLimit && src[index++] == ACUTE_BYTE1) {
|
||||
withAcute = true;
|
||||
unchanged1 = 2; // ACUTE is 2 code units in UTF-8
|
||||
if (index == segmentLimit) { return start; }
|
||||
c2 = src[index++];
|
||||
}
|
||||
} else { // Í
|
||||
withAcute = true;
|
||||
}
|
||||
|
||||
// Is the next character a j/J?
|
||||
if (c2 == u'j') {
|
||||
doTitleJ = true;
|
||||
} else if (c2 == u'J') {
|
||||
++unchanged1;
|
||||
} else {
|
||||
return start;
|
||||
}
|
||||
|
||||
// A plain i/I must be followed by a plain j/J.
|
||||
// An i/I with acute must be followed by a j/J with acute.
|
||||
if (withAcute) {
|
||||
if ((index + 1) >= segmentLimit || src[index++] != ACUTE_BYTE0 || src[index++] != ACUTE_BYTE1) {
|
||||
return start;
|
||||
}
|
||||
if (doTitleJ) {
|
||||
unchanged2 = 2; // ACUTE is 2 code units in UTF-8
|
||||
} else {
|
||||
unchanged1 = unchanged1 + 2; // ACUTE is 2 code units in UTF-8
|
||||
}
|
||||
}
|
||||
|
||||
// There must not be another combining mark.
|
||||
if (index < segmentLimit) {
|
||||
int32_t cp;
|
||||
int32_t i = index;
|
||||
U8_NEXT(src, i, segmentLimit, cp);
|
||||
uint32_t typeMask = U_GET_GC_MASK(cp);
|
||||
if ((typeMask & U_GC_M_MASK) != 0) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
// Output the rest of the Dutch IJ.
|
||||
ByteSinkUtil::appendUnchanged(src + start, unchanged1, sink, options, edits, errorCode);
|
||||
start += unchanged1;
|
||||
if (doTitleJ) {
|
||||
ByteSinkUtil::appendCodePoint(1, u'J', sink, edits);
|
||||
++start;
|
||||
}
|
||||
ByteSinkUtil::appendUnchanged(src + start, unchanged2, sink, options, edits, errorCode);
|
||||
|
||||
U_ASSERT(start + unchanged2 == index);
|
||||
return index;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CFUNC void U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(
|
||||
int32_t caseLocale, uint32_t options, BreakIterator *iter,
|
||||
|
|
@ -504,19 +594,14 @@ ucasemap_internalUTF8ToTitle(
|
|||
}
|
||||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < index &&
|
||||
caseLocale == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
|
||||
if (src[titleStart+1] == 0x006A) {
|
||||
ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
|
||||
titleLimit++;
|
||||
} else if (src[titleStart+1] == 0x004A) {
|
||||
// Keep the capital J from getting lowercased.
|
||||
if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
|
||||
sink, options, edits, errorCode)) {
|
||||
return;
|
||||
}
|
||||
titleLimit++;
|
||||
if (titleLimit < index &&
|
||||
caseLocale == UCASE_LOC_DUTCH) {
|
||||
if (c < 0) {
|
||||
c = ~c;
|
||||
}
|
||||
|
||||
if (c == u'I' || c == u'Í') {
|
||||
titleLimit = maybeTitleDutchIJ(src, c, titleLimit, index, sink, options, edits, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
11
thirdparty/icu4c/common/ucnv.cpp
vendored
11
thirdparty/icu4c/common/ucnv.cpp
vendored
|
|
@ -252,7 +252,10 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
|
|||
UTRACE_EXIT_STATUS(*status);
|
||||
return NULL;
|
||||
}
|
||||
*status = U_SAFECLONE_ALLOCATED_WARNING;
|
||||
// If pBufferSize was NULL as the input, pBufferSize is set to &stackBufferSize in this function.
|
||||
if (pBufferSize != &stackBufferSize) {
|
||||
*status = U_SAFECLONE_ALLOCATED_WARNING;
|
||||
}
|
||||
|
||||
/* record the fact that memory was allocated */
|
||||
*pBufferSize = bufferSizeNeeded;
|
||||
|
|
@ -317,7 +320,11 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
|
|||
return localConverter;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI UConverter* U_EXPORT2
|
||||
ucnv_clone(const UConverter* cnv, UErrorCode *status)
|
||||
{
|
||||
return ucnv_safeClone(cnv, nullptr, nullptr, status);
|
||||
}
|
||||
|
||||
/*Decreases the reference counter in the shared immutable section of the object
|
||||
*and frees the mutable part*/
|
||||
|
|
|
|||
2
thirdparty/icu4c/common/ucurr.cpp
vendored
2
thirdparty/icu4c/common/ucurr.cpp
vendored
|
|
@ -254,7 +254,7 @@ currSymbolsEquiv_cleanup(void)
|
|||
}
|
||||
|
||||
/**
|
||||
* Deleter for OlsonToMetaMappingEntry
|
||||
* Deleter for IsoCodeEntry
|
||||
*/
|
||||
static void U_CALLCONV
|
||||
deleteIsoCodeEntry(void *obj) {
|
||||
|
|
|
|||
6
thirdparty/icu4c/common/uloc.cpp
vendored
6
thirdparty/icu4c/common/uloc.cpp
vendored
|
|
@ -186,10 +186,10 @@ NULL
|
|||
};
|
||||
|
||||
static const char* const DEPRECATED_LANGUAGES[]={
|
||||
"in", "iw", "ji", "jw", NULL, NULL
|
||||
"in", "iw", "ji", "jw", "mo", NULL, NULL
|
||||
};
|
||||
static const char* const REPLACEMENT_LANGUAGES[]={
|
||||
"id", "he", "yi", "jv", NULL, NULL
|
||||
"id", "he", "yi", "jv", "ro", NULL, NULL
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -444,7 +444,7 @@ static const char * const COUNTRIES_3[] = {
|
|||
/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
|
||||
"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
|
||||
/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
|
||||
"WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
|
||||
"WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
|
||||
NULL,
|
||||
/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
|
||||
"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
|
||||
|
|
|
|||
|
|
@ -461,13 +461,13 @@ public:
|
|||
* Option for whether to include or ignore one-way (fallback) match data.
|
||||
* By default, they are included.
|
||||
*
|
||||
* @param direction the match direction to set.
|
||||
* @param matchDirection the match direction to set.
|
||||
* @return this Builder object
|
||||
* @stable ICU 67
|
||||
*/
|
||||
Builder &setDirection(ULocMatchDirection direction) {
|
||||
Builder &setDirection(ULocMatchDirection matchDirection) {
|
||||
if (U_SUCCESS(errorCode_)) {
|
||||
direction_ = direction;
|
||||
direction_ = matchDirection;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
|
|
|||
20
thirdparty/icu4c/common/unicode/rbbi.h
vendored
20
thirdparty/icu4c/common/unicode/rbbi.h
vendored
|
|
@ -147,6 +147,11 @@ private:
|
|||
*/
|
||||
int32_t *fLookAheadMatches;
|
||||
|
||||
/**
|
||||
* A flag to indicate if phrase based breaking is enabled.
|
||||
*/
|
||||
UBool fIsPhraseBreaking;
|
||||
|
||||
//=======================================================================
|
||||
// constructors
|
||||
//=======================================================================
|
||||
|
|
@ -163,6 +168,21 @@ private:
|
|||
*/
|
||||
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* This constructor uses the udata interface to create a BreakIterator
|
||||
* whose internal tables live in a memory-mapped file. "image" is an
|
||||
* ICU UDataMemory handle for the pre-compiled break iterator tables.
|
||||
* @param image handle to the memory image for the break iterator data.
|
||||
* Ownership of the UDataMemory handle passes to the Break Iterator,
|
||||
* which will be responsible for closing it when it is no longer needed.
|
||||
* @param status Information on any errors encountered.
|
||||
* @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
|
||||
* @see udata_open
|
||||
* @see #getBinaryRules
|
||||
* @internal (private)
|
||||
*/
|
||||
RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
|
||||
|
||||
/** @internal */
|
||||
friend class RBBIRuleBuilder;
|
||||
/** @internal */
|
||||
|
|
|
|||
11
thirdparty/icu4c/common/unicode/ubrk.h
vendored
11
thirdparty/icu4c/common/unicode/ubrk.h
vendored
|
|
@ -312,11 +312,12 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
|||
* If *pBufferSize is not enough for a stack-based safe clone,
|
||||
* new memory will be allocated.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used
|
||||
* if pBufferSize != NULL and any allocations were necessary
|
||||
* @return pointer to the new clone
|
||||
* @deprecated ICU 69 Use ubrk_clone() instead.
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
U_DEPRECATED UBreakIterator * U_EXPORT2
|
||||
ubrk_safeClone(
|
||||
const UBreakIterator *bi,
|
||||
void *stackBuffer,
|
||||
|
|
@ -325,21 +326,17 @@ ubrk_safeClone(
|
|||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation.
|
||||
* @param bi iterator to be cloned
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* @return pointer to the new clone
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
|
|
|
|||
26
thirdparty/icu4c/common/unicode/ucnv.h
vendored
26
thirdparty/icu4c/common/unicode/ucnv.h
vendored
|
|
@ -477,7 +477,7 @@ ucnv_openCCSID(int32_t codepage,
|
|||
*
|
||||
* <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
|
||||
* stored in the converter cache or the alias table. The only way to open further converters
|
||||
* is call this function multiple times, or use the ucnv_safeClone() function to clone a
|
||||
* is call this function multiple times, or use the ucnv_clone() function to clone a
|
||||
* 'primary' converter.</p>
|
||||
*
|
||||
* <p>A future version of ICU may add alias table lookups and/or caching
|
||||
|
|
@ -493,13 +493,27 @@ ucnv_openCCSID(int32_t codepage,
|
|||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
|
||||
* @see udata_open
|
||||
* @see ucnv_open
|
||||
* @see ucnv_safeClone
|
||||
* @see ucnv_clone
|
||||
* @see ucnv_close
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
U_CAPI UConverter* U_EXPORT2
|
||||
ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Thread safe converter cloning operation.
|
||||
*
|
||||
* You must ucnv_close() the clone.
|
||||
*
|
||||
* @param cnv converter to be cloned
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* @return pointer to the new clone
|
||||
* @stable ICU 71
|
||||
*/
|
||||
U_CAPI UConverter* U_EXPORT2 ucnv_clone(const UConverter *cnv, UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Thread safe converter cloning operation.
|
||||
* For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
|
||||
|
|
@ -532,21 +546,19 @@ ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode
|
|||
* pointer to size of allocated space.
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
|
||||
* is used if any allocations were necessary.
|
||||
* is used if pBufferSize != NULL and any allocations were necessary
|
||||
* However, it is better to check if *pBufferSize grew for checking for
|
||||
* allocations because warning codes can be overridden by subsequent
|
||||
* function calls.
|
||||
* @return pointer to the new clone
|
||||
* @stable ICU 2.0
|
||||
* @deprecated ICU 71 Use ucnv_clone() instead.
|
||||
*/
|
||||
U_CAPI UConverter * U_EXPORT2
|
||||
U_DEPRECATED UConverter * U_EXPORT2
|
||||
ucnv_safeClone(const UConverter *cnv,
|
||||
void *stackBuffer,
|
||||
int32_t *pBufferSize,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* \def U_CNV_SAFECLONE_BUFFERSIZE
|
||||
* Definition of a buffer size that is designed to be large enough for
|
||||
|
|
|
|||
4
thirdparty/icu4c/common/unicode/uniset.h
vendored
4
thirdparty/icu4c/common/unicode/uniset.h
vendored
|
|
@ -1229,7 +1229,6 @@ public:
|
|||
*/
|
||||
UnicodeSet& retain(UChar32 c);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Retains only the specified string from this set if it is present.
|
||||
* Upon return this set will be empty if it did not contain s, or
|
||||
|
|
@ -1238,10 +1237,9 @@ public:
|
|||
*
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
UnicodeSet& retain(const UnicodeString &s);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes the specified range from this set if it is present.
|
||||
|
|
|
|||
5
thirdparty/icu4c/common/unicode/urename.h
vendored
5
thirdparty/icu4c/common/unicode/urename.h
vendored
|
|
@ -567,6 +567,7 @@
|
|||
#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
|
||||
#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
|
||||
#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
|
||||
#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton)
|
||||
#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
|
||||
#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
|
||||
#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
|
||||
|
|
@ -630,6 +631,7 @@
|
|||
#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars)
|
||||
#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub)
|
||||
#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars)
|
||||
#define ucnv_clone U_ICU_ENTRY_POINT_RENAME(ucnv_clone)
|
||||
#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close)
|
||||
#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames)
|
||||
#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert)
|
||||
|
|
@ -725,6 +727,7 @@
|
|||
#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString)
|
||||
#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8)
|
||||
#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
|
||||
#define ucol_clone U_ICU_ENTRY_POINT_RENAME(ucol_clone)
|
||||
#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
|
||||
#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
|
||||
#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
|
||||
|
|
@ -904,6 +907,7 @@
|
|||
#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern)
|
||||
#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
|
||||
#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
|
||||
#define udatpg_getDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormatForStyle)
|
||||
#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
|
||||
#define udatpg_getDefaultHourCycle U_ICU_ENTRY_POINT_RENAME(udatpg_getDefaultHourCycle)
|
||||
#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
|
||||
|
|
@ -918,6 +922,7 @@
|
|||
#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat)
|
||||
#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
|
||||
#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
|
||||
#define udatpg_setDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormatForStyle)
|
||||
#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
|
||||
#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
|
||||
#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)
|
||||
|
|
|
|||
18
thirdparty/icu4c/common/unicode/uset.h
vendored
18
thirdparty/icu4c/common/unicode/uset.h
vendored
|
|
@ -628,7 +628,6 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
|
|||
U_CAPI void U_EXPORT2
|
||||
uset_removeString(USet* set, const UChar* str, int32_t strLen);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* A frozen set will not be modified.
|
||||
|
|
@ -636,11 +635,10 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen);
|
|||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes from this set all of its elements that are contained in the
|
||||
|
|
@ -671,7 +669,6 @@ uset_removeAll(USet* set, const USet* removeSet);
|
|||
U_CAPI void U_EXPORT2
|
||||
uset_retain(USet* set, UChar32 start, UChar32 end);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Retains only the specified string from this set if it is present.
|
||||
* Upon return this set will be empty if it did not contain s, or
|
||||
|
|
@ -681,7 +678,7 @@ uset_retain(USet* set, UChar32 start, UChar32 end);
|
|||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainString(USet *set, const UChar *str, int32_t length);
|
||||
|
|
@ -693,11 +690,10 @@ uset_retainString(USet *set, const UChar *str, int32_t length);
|
|||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Retains only the elements in this set that are contained in the
|
||||
|
|
@ -741,7 +737,6 @@ uset_compact(USet* set);
|
|||
U_CAPI void U_EXPORT2
|
||||
uset_complement(USet* set);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Complements the specified range in this set. Any character in
|
||||
* the range will be removed if it is in this set, or will be
|
||||
|
|
@ -753,7 +748,7 @@ uset_complement(USet* set);
|
|||
* @param set the object to be modified
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementRange(USet *set, UChar32 start, UChar32 end);
|
||||
|
|
@ -766,7 +761,7 @@ uset_complementRange(USet *set, UChar32 start, UChar32 end);
|
|||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementString(USet *set, const UChar *str, int32_t length);
|
||||
|
|
@ -778,11 +773,10 @@ uset_complementString(USet *set, const UChar *str, int32_t length);
|
|||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
* @stable ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Complements in this set all elements contained in the specified
|
||||
|
|
|
|||
10
thirdparty/icu4c/common/unicode/uvernum.h
vendored
10
thirdparty/icu4c/common/unicode/uvernum.h
vendored
|
|
@ -60,7 +60,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 70
|
||||
#define U_ICU_VERSION_MAJOR_NUM 71
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
|
|
@ -86,7 +86,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _70
|
||||
#define U_ICU_VERSION_SUFFIX _71
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
|
|
@ -139,7 +139,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "70.1"
|
||||
#define U_ICU_VERSION "71.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
|
|
@ -152,13 +152,13 @@
|
|||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "70"
|
||||
#define U_ICU_VERSION_SHORT "71"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "70.1"
|
||||
#define U_ICU_DATA_VERSION "71.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
|
|
|
|||
8
thirdparty/icu4c/common/unistr.cpp
vendored
8
thirdparty/icu4c/common/unistr.cpp
vendored
|
|
@ -334,7 +334,8 @@ Replaceable::clone() const {
|
|||
// UnicodeString overrides clone() with a real implementation
|
||||
UnicodeString *
|
||||
UnicodeString::clone() const {
|
||||
return new UnicodeString(*this);
|
||||
LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
|
||||
return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
|
||||
}
|
||||
|
||||
//========================================
|
||||
|
|
@ -1976,7 +1977,12 @@ The vector deleting destructor is already a part of UObject,
|
|||
but defining it here makes sure that it is included with this object file.
|
||||
This makes sure that static library dependencies are kept to a minimum.
|
||||
*/
|
||||
#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
static void uprv_UnicodeStringDummy(void) {
|
||||
delete [] (new UnicodeString[2]);
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
130
thirdparty/icu4c/common/ustrcase.cpp
vendored
130
thirdparty/icu4c/common/ustrcase.cpp
vendored
|
|
@ -36,6 +36,12 @@
|
|||
#include "ustr_imp.h"
|
||||
#include "uassert.h"
|
||||
|
||||
/**
|
||||
* Code point for COMBINING ACUTE ACCENT
|
||||
* @internal
|
||||
*/
|
||||
#define ACUTE u'\u0301'
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
|
@ -396,6 +402,94 @@ U_NAMESPACE_USE
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* Input: c is a letter I with or without acute accent.
|
||||
* start is the index in src after c, and is less than segmentLimit.
|
||||
* If a plain i/I is followed by a plain j/J,
|
||||
* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
|
||||
* then we output accordingly.
|
||||
*
|
||||
* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
|
||||
*/
|
||||
int32_t maybeTitleDutchIJ(const UChar *src, UChar32 c, int32_t start, int32_t segmentLimit,
|
||||
UChar *dest, int32_t &destIndex, int32_t destCapacity, uint32_t options,
|
||||
icu::Edits *edits) {
|
||||
U_ASSERT(start < segmentLimit);
|
||||
|
||||
int32_t index = start;
|
||||
bool withAcute = false;
|
||||
|
||||
// If the conditions are met, then the following variables tell us what to output.
|
||||
int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
|
||||
bool doTitleJ = false; // true if the j needs to be titlecased
|
||||
int32_t unchanged2 = 0; // after the j (0 or 1)
|
||||
|
||||
// next character after the first letter
|
||||
UChar c2 = src[index++];
|
||||
|
||||
// Is the first letter an i/I with accent?
|
||||
if (c == u'I') {
|
||||
if (c2 == ACUTE) {
|
||||
withAcute = true;
|
||||
unchanged1 = 1;
|
||||
if (index == segmentLimit) { return start; }
|
||||
c2 = src[index++];
|
||||
}
|
||||
} else { // Í
|
||||
withAcute = true;
|
||||
}
|
||||
|
||||
// Is the next character a j/J?
|
||||
if (c2 == u'j') {
|
||||
doTitleJ = true;
|
||||
} else if (c2 == u'J') {
|
||||
++unchanged1;
|
||||
} else {
|
||||
return start;
|
||||
}
|
||||
|
||||
// A plain i/I must be followed by a plain j/J.
|
||||
// An i/I with acute must be followed by a j/J with acute.
|
||||
if (withAcute) {
|
||||
if (index == segmentLimit || src[index++] != ACUTE) { return start; }
|
||||
if (doTitleJ) {
|
||||
unchanged2 = 1;
|
||||
} else {
|
||||
++unchanged1;
|
||||
}
|
||||
}
|
||||
|
||||
// There must not be another combining mark.
|
||||
if (index < segmentLimit) {
|
||||
int32_t cp;
|
||||
int32_t i = index;
|
||||
U16_NEXT(src, i, segmentLimit, cp);
|
||||
uint32_t typeMask = U_GET_GC_MASK(cp);
|
||||
if ((typeMask & U_GC_M_MASK) != 0) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
// Output the rest of the Dutch IJ.
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged1, options, edits);
|
||||
start += unchanged1;
|
||||
if (doTitleJ) {
|
||||
destIndex = appendUChar(dest, destIndex, destCapacity, u'J');
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
++start;
|
||||
}
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged2, options, edits);
|
||||
|
||||
U_ASSERT(start + unchanged2 == index);
|
||||
return index;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
|
|
@ -412,14 +506,14 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
|
|||
csc.limit=srcLength;
|
||||
int32_t destIndex=0;
|
||||
int32_t prev=0;
|
||||
UBool isFirstIndex=TRUE;
|
||||
bool isFirstIndex=true;
|
||||
|
||||
/* titlecasing loop */
|
||||
while(prev<srcLength) {
|
||||
/* find next index where to titlecase */
|
||||
int32_t index;
|
||||
if(isFirstIndex) {
|
||||
isFirstIndex=FALSE;
|
||||
isFirstIndex=false;
|
||||
index=iter->first();
|
||||
} else {
|
||||
index=iter->next();
|
||||
|
|
@ -446,7 +540,7 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
|
|||
// Stop with titleStart<titleLimit<=index
|
||||
// if there is a character to be titlecased,
|
||||
// or else stop with titleStart==titleLimit==index.
|
||||
UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
|
||||
bool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
|
||||
while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
|
||||
titleStart=titleLimit;
|
||||
if(titleLimit==index) {
|
||||
|
|
@ -479,27 +573,15 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
|
|||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < index &&
|
||||
caseLocale == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
|
||||
if (src[titleStart+1] == 0x006A) {
|
||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
|
||||
if(destIndex<0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(edits!=NULL) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
titleLimit++;
|
||||
} else if (src[titleStart+1] == 0x004A) {
|
||||
// Keep the capital J from getting lowercased.
|
||||
destIndex=appendUnchanged(dest, destIndex, destCapacity,
|
||||
src+titleStart+1, 1, options, edits);
|
||||
if(destIndex<0) {
|
||||
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
titleLimit++;
|
||||
caseLocale == UCASE_LOC_DUTCH) {
|
||||
if (c < 0) {
|
||||
c = ~c;
|
||||
}
|
||||
|
||||
if (c == u'I' || c == u'Í') {
|
||||
titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index,
|
||||
dest, destIndex, destCapacity, options,
|
||||
edits);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
43
thirdparty/icu4c/common/uvector.cpp
vendored
43
thirdparty/icu4c/common/uvector.cpp
vendored
|
|
@ -99,14 +99,6 @@ bool UVector::operator==(const UVector& other) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
// TODO: delete this function once all call sites have been migrated to the
|
||||
// new addElement().
|
||||
void UVector::addElementX(void* obj, UErrorCode &status) {
|
||||
if (ensureCapacityX(count + 1, status)) {
|
||||
elements[count++].pointer = obj;
|
||||
}
|
||||
}
|
||||
|
||||
void UVector::addElement(void* obj, UErrorCode &status) {
|
||||
U_ASSERT(deleter == nullptr);
|
||||
if (ensureCapacity(count + 1, status)) {
|
||||
|
|
@ -331,38 +323,6 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
|
|||
return -1;
|
||||
}
|
||||
|
||||
UBool UVector::ensureCapacityX(int32_t minimumCapacity, UErrorCode &status) {
|
||||
if (minimumCapacity < 0) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
if (capacity < minimumCapacity) {
|
||||
if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
int32_t newCap = capacity * 2;
|
||||
if (newCap < minimumCapacity) {
|
||||
newCap = minimumCapacity;
|
||||
}
|
||||
if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) { // integer overflow check
|
||||
// We keep the original memory contents on bad minimumCapacity.
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap);
|
||||
if (newElems == nullptr) {
|
||||
// We keep the original contents on the memory failure on realloc or bad minimumCapacity.
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
elements = newElems;
|
||||
capacity = newCap;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
|
|
@ -370,7 +330,7 @@ UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
|
|||
if (minimumCapacity < 0) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (capacity < minimumCapacity) {
|
||||
if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
|
@ -396,6 +356,7 @@ UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the size of this vector as follows: If newSize is smaller,
|
||||
* then truncate the array, possibly deleting held elements for i >=
|
||||
|
|
|
|||
12
thirdparty/icu4c/common/uvector.h
vendored
12
thirdparty/icu4c/common/uvector.h
vendored
|
|
@ -123,12 +123,6 @@ public:
|
|||
// java.util.Vector API
|
||||
//------------------------------------------------------------
|
||||
|
||||
/*
|
||||
* Old version of addElement, with non-standard error handling.
|
||||
* Will be removed once all uses have been switched to the new addElement().
|
||||
*/
|
||||
void addElementX(void* obj, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Add an element at the end of the vector.
|
||||
* For use only with vectors that do not adopt their elements, which is to say,
|
||||
|
|
@ -197,12 +191,6 @@ public:
|
|||
|
||||
inline UBool isEmpty(void) const {return count == 0;}
|
||||
|
||||
/*
|
||||
* Old version of ensureCapacity, with non-standard error handling.
|
||||
* Will be removed once all uses have been switched to the new ensureCapacity().
|
||||
*/
|
||||
UBool ensureCapacityX(int32_t minimumCapacity, UErrorCode &status);
|
||||
|
||||
UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
|
|
|
|||
2
thirdparty/icu4c/common/uvectr32.cpp
vendored
2
thirdparty/icu4c/common/uvectr32.cpp
vendored
|
|
@ -83,7 +83,7 @@ void UVector32::assign(const UVector32& other, UErrorCode &ec) {
|
|||
}
|
||||
|
||||
|
||||
bool UVector32::operator==(const UVector32& other) {
|
||||
bool UVector32::operator==(const UVector32& other) const {
|
||||
int32_t i;
|
||||
if (count != other.count) return false;
|
||||
for (i=0; i<count; ++i) {
|
||||
|
|
|
|||
6
thirdparty/icu4c/common/uvectr32.h
vendored
6
thirdparty/icu4c/common/uvectr32.h
vendored
|
|
@ -86,12 +86,12 @@ public:
|
|||
* equal if they are of the same size and all elements are equal,
|
||||
* as compared using this object's comparer.
|
||||
*/
|
||||
bool operator==(const UVector32& other);
|
||||
bool operator==(const UVector32& other) const;
|
||||
|
||||
/**
|
||||
* Equivalent to !operator==()
|
||||
*/
|
||||
inline bool operator!=(const UVector32& other);
|
||||
inline bool operator!=(const UVector32& other) const;
|
||||
|
||||
//------------------------------------------------------------
|
||||
// java.util.Vector API
|
||||
|
|
@ -268,7 +268,7 @@ inline int32_t UVector32::lastElementi(void) const {
|
|||
return elementAti(count-1);
|
||||
}
|
||||
|
||||
inline bool UVector32::operator!=(const UVector32& other) {
|
||||
inline bool UVector32::operator!=(const UVector32& other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue