| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |    Unicode character type helpers. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |    Written by Marc-Andre Lemburg (mal@lemburg.com). | 
					
						
							|  |  |  |    Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |    Copyright (c) Corporation for National Research Initiatives. | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "Python.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  | #define ALPHA_MASK 0x01
 | 
					
						
							|  |  |  | #define DECIMAL_MASK 0x02
 | 
					
						
							|  |  |  | #define DIGIT_MASK 0x04
 | 
					
						
							|  |  |  | #define LOWER_MASK 0x08
 | 
					
						
							|  |  |  | #define TITLE_MASK 0x40
 | 
					
						
							|  |  |  | #define UPPER_MASK 0x80
 | 
					
						
							| 
									
										
										
										
											2007-08-14 22:37:03 +00:00
										 |  |  | #define XID_START_MASK 0x100
 | 
					
						
							|  |  |  | #define XID_CONTINUE_MASK 0x200
 | 
					
						
							| 
									
										
										
										
											2008-07-04 15:55:02 +00:00
										 |  |  | #define PRINTABLE_MASK 0x400
 | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  | #define NUMERIC_MASK 0x800
 | 
					
						
							|  |  |  | #define CASE_IGNORABLE_MASK 0x1000
 | 
					
						
							|  |  |  | #define CASED_MASK 0x2000
 | 
					
						
							|  |  |  | #define EXTENDED_CASE_MASK 0x4000
 | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2015-03-18 21:53:15 +02:00
										 |  |  |     /*
 | 
					
						
							| 
									
										
										
										
											2012-01-15 21:19:20 -05:00
										 |  |  |        These are either deltas to the character or offsets in | 
					
						
							|  |  |  |        _PyUnicode_ExtendedCase. | 
					
						
							|  |  |  |     */ | 
					
						
							|  |  |  |     const int upper; | 
					
						
							|  |  |  |     const int lower; | 
					
						
							|  |  |  |     const int title; | 
					
						
							| 
									
										
										
										
											2012-01-15 21:26:23 -05:00
										 |  |  |     /* Note if more flag space is needed, decimal and digit could be unified. */ | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const unsigned char decimal; | 
					
						
							|  |  |  |     const unsigned char digit; | 
					
						
							| 
									
										
										
										
											2004-06-02 16:49:17 +00:00
										 |  |  |     const unsigned short flags; | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  | } _PyUnicode_TypeRecord; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "unicodetype_db.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const _PyUnicode_TypeRecord * | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | gettyperecord(Py_UCS4 code) | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     int index; | 
					
						
							| 
									
										
										
										
											2000-07-06 13:57:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-10-18 16:11:54 +00:00
										 |  |  |     if (code >= 0x110000) | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |         index = 0; | 
					
						
							| 
									
										
										
										
											2003-12-29 01:36:01 +00:00
										 |  |  |     else | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |         index = index1[(code>>SHIFT)]; | 
					
						
							|  |  |  |         index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))]; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2001-06-26 20:36:12 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     return &_PyUnicode_TypeRecords[index]; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2000-07-06 13:57:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | /* Returns the titlecase Unicode characters corresponding to ch or just
 | 
					
						
							|  |  |  |    ch if no titlecase mapping is known. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-13 20:18:52 +02:00
										 |  |  | Py_UCS4 _PyUnicode_ToTitlecase(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							| 
									
										
										
										
											2001-06-27 06:28:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-01-15 21:33:32 -05:00
										 |  |  |     if (ctype->flags & EXTENDED_CASE_MASK) | 
					
						
							|  |  |  |         return _PyUnicode_ExtendedCase[ctype->title & 0xFFFF]; | 
					
						
							| 
									
										
										
										
											2012-01-15 21:19:20 -05:00
										 |  |  |     return ch + ctype->title; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns 1 for Unicode characters having the category 'Lt', 0
 | 
					
						
							|  |  |  |    otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsTitlecase(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & TITLE_MASK) != 0; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-14 22:37:03 +00:00
										 |  |  | /* Returns 1 for Unicode characters having the XID_Start property, 0
 | 
					
						
							|  |  |  |    otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsXidStart(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2007-08-14 22:37:03 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & XID_START_MASK) != 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns 1 for Unicode characters having the XID_Continue property,
 | 
					
						
							|  |  |  |    0 otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsXidContinue(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2007-08-14 22:37:03 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & XID_CONTINUE_MASK) != 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | /* Returns the integer decimal (0-9) for Unicode characters having
 | 
					
						
							|  |  |  |    this property, -1 otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_ToDecimalDigit(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsDecimalDigit(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     if (_PyUnicode_ToDecimalDigit(ch) < 0) | 
					
						
							| 
									
										
										
										
											2010-08-11 17:31:17 +00:00
										 |  |  |         return 0; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  |     return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns the integer digit (0-9) for Unicode characters having
 | 
					
						
							|  |  |  |    this property, -1 otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_ToDigit(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsDigit(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     if (_PyUnicode_ToDigit(ch) < 0) | 
					
						
							| 
									
										
										
										
											2010-08-11 17:31:17 +00:00
										 |  |  |         return 0; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  |     return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns the numeric value as double for Unicode characters having
 | 
					
						
							|  |  |  |    this property, -1.0 otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsNumeric(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2009-10-06 21:03:20 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & NUMERIC_MASK) != 0; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-11 18:37:52 +00:00
										 |  |  | /* Returns 1 for Unicode characters to be hex-escaped when repr()ed,
 | 
					
						
							|  |  |  |    0 otherwise. | 
					
						
							|  |  |  |    All characters except those characters defined in the Unicode character | 
					
						
							|  |  |  |    database as following categories are considered printable. | 
					
						
							|  |  |  |       * Cc (Other, Control) | 
					
						
							|  |  |  |       * Cf (Other, Format) | 
					
						
							|  |  |  |       * Cs (Other, Surrogate) | 
					
						
							|  |  |  |       * Co (Other, Private Use) | 
					
						
							|  |  |  |       * Cn (Other, Not Assigned) | 
					
						
							|  |  |  |       * Zl Separator, Line ('\u2028', LINE SEPARATOR) | 
					
						
							|  |  |  |       * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) | 
					
						
							|  |  |  |       * Zs (Separator, Space) other than ASCII space('\x20'). | 
					
						
							|  |  |  | */ | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsPrintable(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2008-06-11 18:37:52 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-04 15:55:02 +00:00
										 |  |  |     return (ctype->flags & PRINTABLE_MASK) != 0; | 
					
						
							| 
									
										
										
										
											2008-06-11 18:37:52 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | /* Returns 1 for Unicode characters having the category 'Ll', 0
 | 
					
						
							|  |  |  |    otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsLowercase(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & LOWER_MASK) != 0; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns 1 for Unicode characters having the category 'Lu', 0
 | 
					
						
							|  |  |  |    otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsUppercase(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & UPPER_MASK) != 0; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns the uppercase Unicode characters corresponding to ch or just
 | 
					
						
							|  |  |  |    ch if no uppercase mapping is known. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (ctype->flags & EXTENDED_CASE_MASK) | 
					
						
							| 
									
										
										
										
											2012-01-14 13:23:30 -05:00
										 |  |  |         return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF]; | 
					
						
							| 
									
										
										
										
											2012-01-15 21:19:20 -05:00
										 |  |  |     return ch + ctype->upper; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns the lowercase Unicode characters corresponding to ch or just
 | 
					
						
							|  |  |  |    ch if no lowercase mapping is known. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (ctype->flags & EXTENDED_CASE_MASK) | 
					
						
							| 
									
										
										
										
											2012-01-14 13:23:30 -05:00
										 |  |  |         return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF]; | 
					
						
							| 
									
										
										
										
											2012-01-15 21:19:20 -05:00
										 |  |  |     return ch + ctype->lower; | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (ctype->flags & EXTENDED_CASE_MASK) { | 
					
						
							| 
									
										
										
										
											2012-01-14 13:23:30 -05:00
										 |  |  |         int index = ctype->lower & 0xFFFF; | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  |         int n = ctype->lower >> 24; | 
					
						
							|  |  |  |         int i; | 
					
						
							|  |  |  |         for (i = 0; i < n; i++) | 
					
						
							|  |  |  |             res[i] = _PyUnicode_ExtendedCase[index + i]; | 
					
						
							|  |  |  |         return n; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2012-01-15 21:19:20 -05:00
										 |  |  |     res[0] = ch + ctype->lower; | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  |     return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (ctype->flags & EXTENDED_CASE_MASK) { | 
					
						
							| 
									
										
										
										
											2012-01-14 13:23:30 -05:00
										 |  |  |         int index = ctype->title & 0xFFFF; | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  |         int n = ctype->title >> 24; | 
					
						
							|  |  |  |         int i; | 
					
						
							|  |  |  |         for (i = 0; i < n; i++) | 
					
						
							|  |  |  |             res[i] = _PyUnicode_ExtendedCase[index + i]; | 
					
						
							|  |  |  |         return n; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2012-01-15 21:19:20 -05:00
										 |  |  |     res[0] = ch + ctype->title; | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  |     return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (ctype->flags & EXTENDED_CASE_MASK) { | 
					
						
							| 
									
										
										
										
											2012-01-14 13:23:30 -05:00
										 |  |  |         int index = ctype->upper & 0xFFFF; | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  |         int n = ctype->upper >> 24; | 
					
						
							|  |  |  |         int i; | 
					
						
							|  |  |  |         for (i = 0; i < n; i++) | 
					
						
							|  |  |  |             res[i] = _PyUnicode_ExtendedCase[index + i]; | 
					
						
							|  |  |  |         return n; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2012-01-15 21:19:20 -05:00
										 |  |  |     res[0] = ch + ctype->upper; | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  |     return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-01-14 13:23:30 -05:00
										 |  |  | int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (ctype->flags & EXTENDED_CASE_MASK && (ctype->lower >> 20) & 7) { | 
					
						
							|  |  |  |         int index = (ctype->lower & 0xFFFF) + (ctype->lower >> 24); | 
					
						
							|  |  |  |         int n = (ctype->lower >> 20) & 7; | 
					
						
							|  |  |  |         int i; | 
					
						
							|  |  |  |         for (i = 0; i < n; i++) | 
					
						
							|  |  |  |             res[i] = _PyUnicode_ExtendedCase[index + i]; | 
					
						
							|  |  |  |         return n; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return _PyUnicode_ToLowerFull(ch, res); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-01-11 18:17:06 -05:00
										 |  |  | int _PyUnicode_IsCased(Py_UCS4 ch) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & CASED_MASK) != 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return (ctype->flags & CASE_IGNORABLE_MASK) != 0; | 
					
						
							| 
									
										
										
										
											2000-03-10 22:52:46 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-07-05 09:48:59 +00:00
										 |  |  | /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
 | 
					
						
							|  |  |  |    'Lo' or 'Lm',  0 otherwise. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-18 20:44:58 +00:00
										 |  |  | int _PyUnicode_IsAlpha(Py_UCS4 ch) | 
					
						
							| 
									
										
										
										
											2000-07-05 09:48:59 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); | 
					
						
							| 
									
										
										
										
											2000-07-05 09:48:59 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-09-25 21:48:13 +00:00
										 |  |  |     return (ctype->flags & ALPHA_MASK) != 0; | 
					
						
							| 
									
										
										
										
											2000-07-05 09:48:59 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 |