mirror of
https://github.com/python/cpython.git
synced 2025-12-31 04:23:37 +00:00
gh-139353: Rename formatter_unicode.c to unicode_formatter.c (#139723)
* Move Python/formatter_unicode.c to Objects/unicode_formatter.c. * Move Objects/stringlib/localeutil.h content into unicode_formatter.c. Remove localeutil.h. * Move _PyUnicode_InsertThousandsGrouping() to unicode_formatter.c and mark the function as static. * Rename unicode_fill() to _PyUnicode_Fill() and export it in pycore_unicodeobject.h. * Move MAX_UNICODE to pycore_unicodeobject.h as _Py_MAX_UNICODE.
This commit is contained in:
parent
5cea843594
commit
3d3f126e86
10 changed files with 293 additions and 305 deletions
|
|
@ -11,6 +11,44 @@ extern "C" {
|
|||
#include "pycore_fileutils.h" // _Py_error_handler
|
||||
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
|
||||
|
||||
// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
|
||||
#define _Py_MAX_UNICODE 0x10ffff
|
||||
|
||||
|
||||
static inline void
|
||||
_PyUnicode_Fill(int kind, void *data, Py_UCS4 value,
|
||||
Py_ssize_t start, Py_ssize_t length)
|
||||
{
|
||||
assert(0 <= start);
|
||||
switch (kind) {
|
||||
case PyUnicode_1BYTE_KIND: {
|
||||
assert(value <= 0xff);
|
||||
Py_UCS1 ch = (unsigned char)value;
|
||||
Py_UCS1 *to = (Py_UCS1 *)data + start;
|
||||
memset(to, ch, length);
|
||||
break;
|
||||
}
|
||||
case PyUnicode_2BYTE_KIND: {
|
||||
assert(value <= 0xffff);
|
||||
Py_UCS2 ch = (Py_UCS2)value;
|
||||
Py_UCS2 *to = (Py_UCS2 *)data + start;
|
||||
const Py_UCS2 *end = to + length;
|
||||
for (; to < end; ++to) *to = ch;
|
||||
break;
|
||||
}
|
||||
case PyUnicode_4BYTE_KIND: {
|
||||
assert(value <= _Py_MAX_UNICODE);
|
||||
Py_UCS4 ch = value;
|
||||
Py_UCS4 * to = (Py_UCS4 *)data + start;
|
||||
const Py_UCS4 *end = to + length;
|
||||
for (; to < end; ++to) *to = ch;
|
||||
break;
|
||||
}
|
||||
default: Py_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* --- Characters Type APIs ----------------------------------------------- */
|
||||
|
||||
extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
|
||||
|
|
@ -240,21 +278,6 @@ extern PyObject* _PyUnicode_XStrip(
|
|||
);
|
||||
|
||||
|
||||
/* Using explicit passed-in values, insert the thousands grouping
|
||||
into the string pointed to by buffer. For the argument descriptions,
|
||||
see Objects/stringlib/localeutil.h */
|
||||
extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
|
||||
_PyUnicodeWriter *writer,
|
||||
Py_ssize_t n_buffer,
|
||||
PyObject *digits,
|
||||
Py_ssize_t d_pos,
|
||||
Py_ssize_t n_digits,
|
||||
Py_ssize_t min_width,
|
||||
const char *grouping,
|
||||
PyObject *thousands_sep,
|
||||
Py_UCS4 *maxchar,
|
||||
int forward);
|
||||
|
||||
/* Dedent a string.
|
||||
Behaviour is expected to be an exact match of `textwrap.dedent`.
|
||||
Return a new reference on success, NULL with exception set on error.
|
||||
|
|
|
|||
|
|
@ -501,7 +501,6 @@ PYTHON_OBJS= \
|
|||
Python/pystrtod.o \
|
||||
Python/pystrhex.o \
|
||||
Python/dtoa.o \
|
||||
Python/formatter_unicode.o \
|
||||
Python/fileutils.o \
|
||||
Python/suggestions.o \
|
||||
Python/perf_trampoline.o \
|
||||
|
|
@ -558,6 +557,7 @@ OBJECT_OBJS= \
|
|||
Objects/tupleobject.o \
|
||||
Objects/typeobject.o \
|
||||
Objects/typevarobject.o \
|
||||
Objects/unicode_formatter.o \
|
||||
Objects/unicodeobject.o \
|
||||
Objects/unicodectype.o \
|
||||
Objects/unionobject.o \
|
||||
|
|
@ -2091,7 +2091,6 @@ UNICODE_DEPS = \
|
|||
$(srcdir)/Objects/stringlib/fastsearch.h \
|
||||
$(srcdir)/Objects/stringlib/find.h \
|
||||
$(srcdir)/Objects/stringlib/find_max_char.h \
|
||||
$(srcdir)/Objects/stringlib/localeutil.h \
|
||||
$(srcdir)/Objects/stringlib/partition.h \
|
||||
$(srcdir)/Objects/stringlib/replace.h \
|
||||
$(srcdir)/Objects/stringlib/repr.h \
|
||||
|
|
|
|||
|
|
@ -1,97 +0,0 @@
|
|||
/* _PyUnicode_InsertThousandsGrouping() helper functions */
|
||||
|
||||
typedef struct {
|
||||
const char *grouping;
|
||||
char previous;
|
||||
Py_ssize_t i; /* Where we're currently pointing in grouping. */
|
||||
} GroupGenerator;
|
||||
|
||||
|
||||
static void
|
||||
GroupGenerator_init(GroupGenerator *self, const char *grouping)
|
||||
{
|
||||
self->grouping = grouping;
|
||||
self->i = 0;
|
||||
self->previous = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Returns the next grouping, or 0 to signify end. */
|
||||
static Py_ssize_t
|
||||
GroupGenerator_next(GroupGenerator *self)
|
||||
{
|
||||
/* Note that we don't really do much error checking here. If a
|
||||
grouping string contains just CHAR_MAX, for example, then just
|
||||
terminate the generator. That shouldn't happen, but at least we
|
||||
fail gracefully. */
|
||||
switch (self->grouping[self->i]) {
|
||||
case 0:
|
||||
return self->previous;
|
||||
case CHAR_MAX:
|
||||
/* Stop the generator. */
|
||||
return 0;
|
||||
default: {
|
||||
char ch = self->grouping[self->i];
|
||||
self->previous = ch;
|
||||
self->i++;
|
||||
return (Py_ssize_t)ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Fill in some digits, leading zeros, and thousands separator. All
|
||||
are optional, depending on when we're called. */
|
||||
static void
|
||||
InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
|
||||
PyObject *digits, Py_ssize_t *digits_pos,
|
||||
Py_ssize_t n_chars, Py_ssize_t n_zeros,
|
||||
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
|
||||
Py_UCS4 *maxchar, int forward)
|
||||
{
|
||||
if (!writer) {
|
||||
/* if maxchar > 127, maxchar is already set */
|
||||
if (*maxchar == 127 && thousands_sep) {
|
||||
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
|
||||
*maxchar = Py_MAX(*maxchar, maxchar2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (thousands_sep) {
|
||||
if (!forward) {
|
||||
*buffer_pos -= thousands_sep_len;
|
||||
}
|
||||
/* Copy the thousands_sep chars into the buffer. */
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||
thousands_sep, 0,
|
||||
thousands_sep_len);
|
||||
if (forward) {
|
||||
*buffer_pos += thousands_sep_len;
|
||||
}
|
||||
}
|
||||
|
||||
if (!forward) {
|
||||
*buffer_pos -= n_chars;
|
||||
*digits_pos -= n_chars;
|
||||
}
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||
digits, *digits_pos,
|
||||
n_chars);
|
||||
if (forward) {
|
||||
*buffer_pos += n_chars;
|
||||
*digits_pos += n_chars;
|
||||
}
|
||||
|
||||
if (n_zeros) {
|
||||
if (!forward) {
|
||||
*buffer_pos -= n_zeros;
|
||||
}
|
||||
int kind = PyUnicode_KIND(writer->buffer);
|
||||
void *data = PyUnicode_DATA(writer->buffer);
|
||||
unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
|
||||
if (forward) {
|
||||
*buffer_pos += n_zeros;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -8,6 +8,241 @@
|
|||
#include "pycore_unicodeobject.h" // PyUnicode_MAX_CHAR_VALUE()
|
||||
#include <locale.h>
|
||||
|
||||
|
||||
/* _PyUnicode_InsertThousandsGrouping() helper functions */
|
||||
|
||||
typedef struct {
|
||||
const char *grouping;
|
||||
char previous;
|
||||
Py_ssize_t i; /* Where we're currently pointing in grouping. */
|
||||
} GroupGenerator;
|
||||
|
||||
|
||||
static void
|
||||
GroupGenerator_init(GroupGenerator *self, const char *grouping)
|
||||
{
|
||||
self->grouping = grouping;
|
||||
self->i = 0;
|
||||
self->previous = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Returns the next grouping, or 0 to signify end. */
|
||||
static Py_ssize_t
|
||||
GroupGenerator_next(GroupGenerator *self)
|
||||
{
|
||||
/* Note that we don't really do much error checking here. If a
|
||||
grouping string contains just CHAR_MAX, for example, then just
|
||||
terminate the generator. That shouldn't happen, but at least we
|
||||
fail gracefully. */
|
||||
switch (self->grouping[self->i]) {
|
||||
case 0:
|
||||
return self->previous;
|
||||
case CHAR_MAX:
|
||||
/* Stop the generator. */
|
||||
return 0;
|
||||
default: {
|
||||
char ch = self->grouping[self->i];
|
||||
self->previous = ch;
|
||||
self->i++;
|
||||
return (Py_ssize_t)ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Fill in some digits, leading zeros, and thousands separator. All
|
||||
are optional, depending on when we're called. */
|
||||
static void
|
||||
InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
|
||||
PyObject *digits, Py_ssize_t *digits_pos,
|
||||
Py_ssize_t n_chars, Py_ssize_t n_zeros,
|
||||
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
|
||||
Py_UCS4 *maxchar, int forward)
|
||||
{
|
||||
if (!writer) {
|
||||
/* if maxchar > 127, maxchar is already set */
|
||||
if (*maxchar == 127 && thousands_sep) {
|
||||
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
|
||||
*maxchar = Py_MAX(*maxchar, maxchar2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (thousands_sep) {
|
||||
if (!forward) {
|
||||
*buffer_pos -= thousands_sep_len;
|
||||
}
|
||||
/* Copy the thousands_sep chars into the buffer. */
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||
thousands_sep, 0,
|
||||
thousands_sep_len);
|
||||
if (forward) {
|
||||
*buffer_pos += thousands_sep_len;
|
||||
}
|
||||
}
|
||||
|
||||
if (!forward) {
|
||||
*buffer_pos -= n_chars;
|
||||
*digits_pos -= n_chars;
|
||||
}
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||
digits, *digits_pos,
|
||||
n_chars);
|
||||
if (forward) {
|
||||
*buffer_pos += n_chars;
|
||||
*digits_pos += n_chars;
|
||||
}
|
||||
|
||||
if (n_zeros) {
|
||||
if (!forward) {
|
||||
*buffer_pos -= n_zeros;
|
||||
}
|
||||
int kind = PyUnicode_KIND(writer->buffer);
|
||||
void *data = PyUnicode_DATA(writer->buffer);
|
||||
_PyUnicode_Fill(kind, data, '0', *buffer_pos, n_zeros);
|
||||
if (forward) {
|
||||
*buffer_pos += n_zeros;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* InsertThousandsGrouping:
|
||||
* @writer: Unicode writer.
|
||||
* @n_buffer: Number of characters in @buffer.
|
||||
* @digits: Digits we're reading from. If count is non-NULL, this is unused.
|
||||
* @d_pos: Start of digits string.
|
||||
* @n_digits: The number of digits in the string, in which we want
|
||||
* to put the grouping chars.
|
||||
* @min_width: The minimum width of the digits in the output string.
|
||||
* Output will be zero-padded on the left to fill.
|
||||
* @grouping: see definition in localeconv().
|
||||
* @thousands_sep: see definition in localeconv().
|
||||
*
|
||||
* There are 2 modes: counting and filling. If @writer is NULL,
|
||||
* we are in counting mode, else filling mode.
|
||||
* If counting, the required buffer size is returned.
|
||||
* If filling, we know the buffer will be large enough, so we don't
|
||||
* need to pass in the buffer size.
|
||||
* Inserts thousand grouping characters (as defined by grouping and
|
||||
* thousands_sep) into @writer.
|
||||
*
|
||||
* Return value: -1 on error, number of characters otherwise.
|
||||
**/
|
||||
static Py_ssize_t
|
||||
_PyUnicode_InsertThousandsGrouping(
|
||||
_PyUnicodeWriter *writer,
|
||||
Py_ssize_t n_buffer,
|
||||
PyObject *digits,
|
||||
Py_ssize_t d_pos,
|
||||
Py_ssize_t n_digits,
|
||||
Py_ssize_t min_width,
|
||||
const char *grouping,
|
||||
PyObject *thousands_sep,
|
||||
Py_UCS4 *maxchar,
|
||||
int forward)
|
||||
{
|
||||
min_width = Py_MAX(0, min_width);
|
||||
if (writer) {
|
||||
assert(digits != NULL);
|
||||
assert(maxchar == NULL);
|
||||
}
|
||||
else {
|
||||
assert(digits == NULL);
|
||||
assert(maxchar != NULL);
|
||||
}
|
||||
assert(0 <= d_pos);
|
||||
assert(0 <= n_digits);
|
||||
assert(grouping != NULL);
|
||||
|
||||
Py_ssize_t count = 0;
|
||||
Py_ssize_t n_zeros;
|
||||
int loop_broken = 0;
|
||||
int use_separator = 0; /* First time through, don't append the
|
||||
separator. They only go between
|
||||
groups. */
|
||||
Py_ssize_t buffer_pos;
|
||||
Py_ssize_t digits_pos;
|
||||
Py_ssize_t len;
|
||||
Py_ssize_t n_chars;
|
||||
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
|
||||
be looked at */
|
||||
/* A generator that returns all of the grouping widths, until it
|
||||
returns 0. */
|
||||
GroupGenerator groupgen;
|
||||
GroupGenerator_init(&groupgen, grouping);
|
||||
const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
|
||||
|
||||
/* if digits are not grouped, thousands separator
|
||||
should be an empty string */
|
||||
assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
|
||||
|
||||
digits_pos = d_pos + (forward ? 0 : n_digits);
|
||||
if (writer) {
|
||||
buffer_pos = writer->pos + (forward ? 0 : n_buffer);
|
||||
assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
|
||||
assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
|
||||
}
|
||||
else {
|
||||
buffer_pos = forward ? 0 : n_buffer;
|
||||
}
|
||||
|
||||
if (!writer) {
|
||||
*maxchar = 127;
|
||||
}
|
||||
|
||||
while ((len = GroupGenerator_next(&groupgen)) > 0) {
|
||||
len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1));
|
||||
n_zeros = Py_MAX(0, len - remaining);
|
||||
n_chars = Py_MAX(0, Py_MIN(remaining, len));
|
||||
|
||||
/* Use n_zero zero's and n_chars chars */
|
||||
|
||||
/* Count only, don't do anything. */
|
||||
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
||||
|
||||
/* Copy into the writer. */
|
||||
InsertThousandsGrouping_fill(writer, &buffer_pos,
|
||||
digits, &digits_pos,
|
||||
n_chars, n_zeros,
|
||||
use_separator ? thousands_sep : NULL,
|
||||
thousands_sep_len, maxchar, forward);
|
||||
|
||||
/* Use a separator next time. */
|
||||
use_separator = 1;
|
||||
|
||||
remaining -= n_chars;
|
||||
min_width -= len;
|
||||
|
||||
if (remaining <= 0 && min_width <= 0) {
|
||||
loop_broken = 1;
|
||||
break;
|
||||
}
|
||||
min_width -= thousands_sep_len;
|
||||
}
|
||||
if (!loop_broken) {
|
||||
/* We left the loop without using a break statement. */
|
||||
|
||||
len = Py_MAX(Py_MAX(remaining, min_width), 1);
|
||||
n_zeros = Py_MAX(0, len - remaining);
|
||||
n_chars = Py_MAX(0, Py_MIN(remaining, len));
|
||||
|
||||
/* Use n_zero zero's and n_chars chars */
|
||||
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
||||
|
||||
/* Copy into the writer. */
|
||||
InsertThousandsGrouping_fill(writer, &buffer_pos,
|
||||
digits, &digits_pos,
|
||||
n_chars, n_zeros,
|
||||
use_separator ? thousands_sep : NULL,
|
||||
thousands_sep_len, maxchar, forward);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/* Raises an exception about an unknown presentation type for this
|
||||
* type. */
|
||||
|
||||
|
|
@ -104,9 +104,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently
|
|||
|
||||
*/
|
||||
|
||||
// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
|
||||
// The value must be the same in fileutils.c.
|
||||
#define MAX_UNICODE 0x10ffff
|
||||
#define MAX_UNICODE _Py_MAX_UNICODE
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
# define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0)
|
||||
|
|
@ -420,39 +418,6 @@ static void clear_global_interned_strings(void)
|
|||
return unicode_get_empty(); \
|
||||
} while (0)
|
||||
|
||||
static inline void
|
||||
unicode_fill(int kind, void *data, Py_UCS4 value,
|
||||
Py_ssize_t start, Py_ssize_t length)
|
||||
{
|
||||
assert(0 <= start);
|
||||
switch (kind) {
|
||||
case PyUnicode_1BYTE_KIND: {
|
||||
assert(value <= 0xff);
|
||||
Py_UCS1 ch = (unsigned char)value;
|
||||
Py_UCS1 *to = (Py_UCS1 *)data + start;
|
||||
memset(to, ch, length);
|
||||
break;
|
||||
}
|
||||
case PyUnicode_2BYTE_KIND: {
|
||||
assert(value <= 0xffff);
|
||||
Py_UCS2 ch = (Py_UCS2)value;
|
||||
Py_UCS2 *to = (Py_UCS2 *)data + start;
|
||||
const Py_UCS2 *end = to + length;
|
||||
for (; to < end; ++to) *to = ch;
|
||||
break;
|
||||
}
|
||||
case PyUnicode_4BYTE_KIND: {
|
||||
assert(value <= MAX_UNICODE);
|
||||
Py_UCS4 ch = value;
|
||||
Py_UCS4 * to = (Py_UCS4 *)data + start;
|
||||
const Py_UCS4 *end = to + length;
|
||||
for (; to < end; ++to) *to = ch;
|
||||
break;
|
||||
}
|
||||
default: Py_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Fast detection of the most frequent whitespace characters */
|
||||
const unsigned char _Py_ascii_whitespace[] = {
|
||||
|
|
@ -9735,142 +9700,6 @@ any_find_slice(PyObject* s1, PyObject* s2,
|
|||
return result;
|
||||
}
|
||||
|
||||
/* _PyUnicode_InsertThousandsGrouping() helper functions */
|
||||
#include "stringlib/localeutil.h"
|
||||
|
||||
/**
|
||||
* InsertThousandsGrouping:
|
||||
* @writer: Unicode writer.
|
||||
* @n_buffer: Number of characters in @buffer.
|
||||
* @digits: Digits we're reading from. If count is non-NULL, this is unused.
|
||||
* @d_pos: Start of digits string.
|
||||
* @n_digits: The number of digits in the string, in which we want
|
||||
* to put the grouping chars.
|
||||
* @min_width: The minimum width of the digits in the output string.
|
||||
* Output will be zero-padded on the left to fill.
|
||||
* @grouping: see definition in localeconv().
|
||||
* @thousands_sep: see definition in localeconv().
|
||||
*
|
||||
* There are 2 modes: counting and filling. If @writer is NULL,
|
||||
* we are in counting mode, else filling mode.
|
||||
* If counting, the required buffer size is returned.
|
||||
* If filling, we know the buffer will be large enough, so we don't
|
||||
* need to pass in the buffer size.
|
||||
* Inserts thousand grouping characters (as defined by grouping and
|
||||
* thousands_sep) into @writer.
|
||||
*
|
||||
* Return value: -1 on error, number of characters otherwise.
|
||||
**/
|
||||
Py_ssize_t
|
||||
_PyUnicode_InsertThousandsGrouping(
|
||||
_PyUnicodeWriter *writer,
|
||||
Py_ssize_t n_buffer,
|
||||
PyObject *digits,
|
||||
Py_ssize_t d_pos,
|
||||
Py_ssize_t n_digits,
|
||||
Py_ssize_t min_width,
|
||||
const char *grouping,
|
||||
PyObject *thousands_sep,
|
||||
Py_UCS4 *maxchar,
|
||||
int forward)
|
||||
{
|
||||
min_width = Py_MAX(0, min_width);
|
||||
if (writer) {
|
||||
assert(digits != NULL);
|
||||
assert(maxchar == NULL);
|
||||
}
|
||||
else {
|
||||
assert(digits == NULL);
|
||||
assert(maxchar != NULL);
|
||||
}
|
||||
assert(0 <= d_pos);
|
||||
assert(0 <= n_digits);
|
||||
assert(grouping != NULL);
|
||||
|
||||
Py_ssize_t count = 0;
|
||||
Py_ssize_t n_zeros;
|
||||
int loop_broken = 0;
|
||||
int use_separator = 0; /* First time through, don't append the
|
||||
separator. They only go between
|
||||
groups. */
|
||||
Py_ssize_t buffer_pos;
|
||||
Py_ssize_t digits_pos;
|
||||
Py_ssize_t len;
|
||||
Py_ssize_t n_chars;
|
||||
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
|
||||
be looked at */
|
||||
/* A generator that returns all of the grouping widths, until it
|
||||
returns 0. */
|
||||
GroupGenerator groupgen;
|
||||
GroupGenerator_init(&groupgen, grouping);
|
||||
const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
|
||||
|
||||
/* if digits are not grouped, thousands separator
|
||||
should be an empty string */
|
||||
assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
|
||||
|
||||
digits_pos = d_pos + (forward ? 0 : n_digits);
|
||||
if (writer) {
|
||||
buffer_pos = writer->pos + (forward ? 0 : n_buffer);
|
||||
assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
|
||||
assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
|
||||
}
|
||||
else {
|
||||
buffer_pos = forward ? 0 : n_buffer;
|
||||
}
|
||||
|
||||
if (!writer) {
|
||||
*maxchar = 127;
|
||||
}
|
||||
|
||||
while ((len = GroupGenerator_next(&groupgen)) > 0) {
|
||||
len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1));
|
||||
n_zeros = Py_MAX(0, len - remaining);
|
||||
n_chars = Py_MAX(0, Py_MIN(remaining, len));
|
||||
|
||||
/* Use n_zero zero's and n_chars chars */
|
||||
|
||||
/* Count only, don't do anything. */
|
||||
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
||||
|
||||
/* Copy into the writer. */
|
||||
InsertThousandsGrouping_fill(writer, &buffer_pos,
|
||||
digits, &digits_pos,
|
||||
n_chars, n_zeros,
|
||||
use_separator ? thousands_sep : NULL,
|
||||
thousands_sep_len, maxchar, forward);
|
||||
|
||||
/* Use a separator next time. */
|
||||
use_separator = 1;
|
||||
|
||||
remaining -= n_chars;
|
||||
min_width -= len;
|
||||
|
||||
if (remaining <= 0 && min_width <= 0) {
|
||||
loop_broken = 1;
|
||||
break;
|
||||
}
|
||||
min_width -= thousands_sep_len;
|
||||
}
|
||||
if (!loop_broken) {
|
||||
/* We left the loop without using a break statement. */
|
||||
|
||||
len = Py_MAX(Py_MAX(remaining, min_width), 1);
|
||||
n_zeros = Py_MAX(0, len - remaining);
|
||||
n_chars = Py_MAX(0, Py_MIN(remaining, len));
|
||||
|
||||
/* Use n_zero zero's and n_chars chars */
|
||||
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
||||
|
||||
/* Copy into the writer. */
|
||||
InsertThousandsGrouping_fill(writer, &buffer_pos,
|
||||
digits, &digits_pos,
|
||||
n_chars, n_zeros,
|
||||
use_separator ? thousands_sep : NULL,
|
||||
thousands_sep_len, maxchar, forward);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
Py_ssize_t
|
||||
PyUnicode_Count(PyObject *str,
|
||||
|
|
@ -10427,7 +10256,7 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
|
|||
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
|
||||
assert(start >= 0);
|
||||
assert(start + length <= PyUnicode_GET_LENGTH(unicode));
|
||||
unicode_fill(kind, data, fill_char, start, length);
|
||||
_PyUnicode_Fill(kind, data, fill_char, start, length);
|
||||
}
|
||||
|
||||
Py_ssize_t
|
||||
|
|
@ -10496,9 +10325,10 @@ pad(PyObject *self,
|
|||
kind = PyUnicode_KIND(u);
|
||||
data = PyUnicode_DATA(u);
|
||||
if (left)
|
||||
unicode_fill(kind, data, fill, 0, left);
|
||||
_PyUnicode_Fill(kind, data, fill, 0, left);
|
||||
if (right)
|
||||
unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
|
||||
_PyUnicode_Fill(kind, data, fill,
|
||||
left + _PyUnicode_LENGTH(self), right);
|
||||
_PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
|
||||
assert(_PyUnicode_CheckConsistency(u, 1));
|
||||
return u;
|
||||
|
|
@ -11910,7 +11740,7 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
|
|||
if (tabsize > 0) {
|
||||
incr = tabsize - (line_pos % tabsize);
|
||||
line_pos += incr;
|
||||
unicode_fill(kind, dest_data, ' ', j, incr);
|
||||
_PyUnicode_Fill(kind, dest_data, ' ', j, incr);
|
||||
j += incr;
|
||||
}
|
||||
}
|
||||
|
|
@ -15405,7 +15235,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
|
|||
/* Pad left with the fill character if needed */
|
||||
if (arg->width > len && !(arg->flags & F_LJUST)) {
|
||||
sublen = arg->width - len;
|
||||
unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen);
|
||||
_PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
|
||||
writer->pos += sublen;
|
||||
arg->width = len;
|
||||
}
|
||||
|
|
@ -15437,7 +15267,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
|
|||
/* Pad right with the fill character if needed */
|
||||
if (arg->width > len) {
|
||||
sublen = arg->width - len;
|
||||
unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen);
|
||||
_PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
|
||||
writer->pos += sublen;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@
|
|||
<ClCompile Include="..\Objects\typeobject.c" />
|
||||
<ClCompile Include="..\Objects\typevarobject.c" />
|
||||
<ClCompile Include="..\Objects\unicodectype.c" />
|
||||
<ClCompile Include="..\Objects\unicode_formatter.c" />
|
||||
<ClCompile Include="..\Objects\unicodeobject.c" />
|
||||
<ClCompile Include="..\Objects\unionobject.c" />
|
||||
<ClCompile Include="..\Objects\weakrefobject.c" />
|
||||
|
|
@ -209,7 +210,6 @@
|
|||
<ClCompile Include="..\Python\errors.c" />
|
||||
<ClCompile Include="..\Python\fileutils.c" />
|
||||
<ClCompile Include="..\Python\flowgraph.c" />
|
||||
<ClCompile Include="..\Python\formatter_unicode.c" />
|
||||
<ClCompile Include="..\Python\frame.c" />
|
||||
<ClCompile Include="..\Python\future.c" />
|
||||
<ClCompile Include="..\Python\gc.c" />
|
||||
|
|
|
|||
|
|
@ -160,9 +160,6 @@
|
|||
<ClCompile Include="..\Python\flowgraph.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\formatter_unicode.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\frame.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
@ -487,6 +484,9 @@
|
|||
<ClCompile Include="..\Objects\unicodectype.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Objects\unicode_formatter.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Objects\unicodeobject.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
|||
|
|
@ -559,6 +559,7 @@
|
|||
<ClCompile Include="..\Objects\typeobject.c" />
|
||||
<ClCompile Include="..\Objects\typevarobject.c" />
|
||||
<ClCompile Include="..\Objects\unicodectype.c" />
|
||||
<ClCompile Include="..\Objects\unicode_formatter.c" />
|
||||
<ClCompile Include="..\Objects\unicodeobject.c" />
|
||||
<ClCompile Include="..\Objects\unionobject.c" />
|
||||
<ClCompile Include="..\Objects\weakrefobject.c" />
|
||||
|
|
@ -605,7 +606,6 @@
|
|||
<ClCompile Include="..\Python\errors.c" />
|
||||
<ClCompile Include="..\Python\fileutils.c" />
|
||||
<ClCompile Include="..\Python\flowgraph.c" />
|
||||
<ClCompile Include="..\Python\formatter_unicode.c" />
|
||||
<ClCompile Include="..\Python\frame.c" />
|
||||
<ClCompile Include="..\Python\frozen.c">
|
||||
<AdditionalIncludeDirectories>$(GeneratedFrozenModulesDir)Python;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
|
|
|
|||
|
|
@ -1274,6 +1274,9 @@
|
|||
<ClCompile Include="..\Objects\unicodectype.c">
|
||||
<Filter>Objects</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Objects\unicode_formatter.c">
|
||||
<Filter>Objects</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Objects\unicodeobject.c">
|
||||
<Filter>Objects</Filter>
|
||||
</ClCompile>
|
||||
|
|
@ -1382,9 +1385,6 @@
|
|||
<ClCompile Include="..\Python\flowgraph.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\formatter_unicode.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\frozen.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "pycore_fileutils.h" // fileutils definitions
|
||||
#include "pycore_runtime.h" // _PyRuntime
|
||||
#include "pycore_pystate.h" // _Py_AssertHoldsTstate()
|
||||
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
|
||||
#include "osdefs.h" // SEP
|
||||
|
||||
#include <stdlib.h> // mbstowcs()
|
||||
|
|
@ -50,9 +51,6 @@ extern int winerror_to_errno(int);
|
|||
int _Py_open_cloexec_works = -1;
|
||||
#endif
|
||||
|
||||
// The value must be the same in unicodeobject.c.
|
||||
#define MAX_UNICODE 0x10ffff
|
||||
|
||||
// mbstowcs() and mbrtowc() errors
|
||||
static const size_t DECODE_ERROR = ((size_t)-1);
|
||||
#ifdef HAVE_MBRTOWC
|
||||
|
|
@ -123,7 +121,7 @@ is_valid_wide_char(wchar_t ch)
|
|||
{
|
||||
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
|
||||
/* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
|
||||
for non-Unicode locales, which makes values higher than MAX_UNICODE
|
||||
for non-Unicode locales, which makes values higher than _Py_MAX_UNICODE
|
||||
possibly valid. */
|
||||
return 1;
|
||||
#endif
|
||||
|
|
@ -132,7 +130,7 @@ is_valid_wide_char(wchar_t ch)
|
|||
return 0;
|
||||
}
|
||||
#if SIZEOF_WCHAR_T > 2
|
||||
if (ch > MAX_UNICODE) {
|
||||
if (ch > _Py_MAX_UNICODE) {
|
||||
// bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
|
||||
// The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
|
||||
// it creates characters outside the [U+0000; U+10ffff] range:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue