| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | /**************************************************************************/ | 
					
						
							|  |  |  | /*  char_utils.h                                                          */ | 
					
						
							|  |  |  | /**************************************************************************/ | 
					
						
							|  |  |  | /*                         This file is part of:                          */ | 
					
						
							|  |  |  | /*                             GODOT ENGINE                               */ | 
					
						
							|  |  |  | /*                        https://godotengine.org                         */ | 
					
						
							|  |  |  | /**************************************************************************/ | 
					
						
							|  |  |  | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ | 
					
						
							|  |  |  | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */ | 
					
						
							|  |  |  | /*                                                                        */ | 
					
						
							|  |  |  | /* Permission is hereby granted, free of charge, to any person obtaining  */ | 
					
						
							|  |  |  | /* a copy of this software and associated documentation files (the        */ | 
					
						
							|  |  |  | /* "Software"), to deal in the Software without restriction, including    */ | 
					
						
							|  |  |  | /* without limitation the rights to use, copy, modify, merge, publish,    */ | 
					
						
							|  |  |  | /* distribute, sublicense, and/or sell copies of the Software, and to     */ | 
					
						
							|  |  |  | /* permit persons to whom the Software is furnished to do so, subject to  */ | 
					
						
							|  |  |  | /* the following conditions:                                              */ | 
					
						
							|  |  |  | /*                                                                        */ | 
					
						
							|  |  |  | /* The above copyright notice and this permission notice shall be         */ | 
					
						
							|  |  |  | /* included in all copies or substantial portions of the Software.        */ | 
					
						
							|  |  |  | /*                                                                        */ | 
					
						
							|  |  |  | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */ | 
					
						
							|  |  |  | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */ | 
					
						
							|  |  |  | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ | 
					
						
							|  |  |  | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */ | 
					
						
							|  |  |  | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */ | 
					
						
							|  |  |  | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */ | 
					
						
							|  |  |  | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */ | 
					
						
							|  |  |  | /**************************************************************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #pragma once
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "core/typedefs.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-18 15:07:11 +03:00
										 |  |  | #include "char_range.inc"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-04 17:35:39 +01:00
										 |  |  | #include <iterator>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-20 02:02:25 +01:00
										 |  |  | static constexpr char hex_char_table_upper[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; | 
					
						
							|  |  |  | static constexpr char hex_char_table_lower[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-04 17:35:39 +01:00
										 |  |  | #define BSEARCH_CHAR_RANGE(m_array)                \
 | 
					
						
							|  |  |  | 	int low = 0;                                   \ | 
					
						
							|  |  |  | 	int high = std::size(m_array) - 1;             \ | 
					
						
							|  |  |  | 	int middle = (low + high) / 2;                 \ | 
					
						
							|  |  |  |                                                    \ | 
					
						
							|  |  |  | 	while (low <= high) {                          \ | 
					
						
							|  |  |  | 		if (p_char < m_array[middle].start) {      \ | 
					
						
							|  |  |  | 			high = middle - 1;                     \ | 
					
						
							|  |  |  | 		} else if (p_char > m_array[middle].end) { \ | 
					
						
							|  |  |  | 			low = middle + 1;                      \ | 
					
						
							|  |  |  | 		} else {                                   \ | 
					
						
							|  |  |  | 			return true;                           \ | 
					
						
							|  |  |  | 		}                                          \ | 
					
						
							|  |  |  |                                                    \ | 
					
						
							|  |  |  | 		middle = (low + high) / 2;                 \ | 
					
						
							|  |  |  | 	}                                              \ | 
					
						
							|  |  |  |                                                    \ | 
					
						
							| 
									
										
										
										
											2024-02-22 12:18:45 +03:00
										 |  |  | 	return false | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_unicode_identifier_start(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2024-02-22 12:18:45 +03:00
										 |  |  | 	BSEARCH_CHAR_RANGE(xid_start); | 
					
						
							| 
									
										
										
										
											2021-10-18 15:07:11 +03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_unicode_identifier_continue(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2024-02-22 12:18:45 +03:00
										 |  |  | 	BSEARCH_CHAR_RANGE(xid_continue); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_unicode_upper_case(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2024-02-22 12:18:45 +03:00
										 |  |  | 	BSEARCH_CHAR_RANGE(uppercase_letter); | 
					
						
							| 
									
										
										
										
											2021-10-18 15:07:11 +03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_unicode_lower_case(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2024-02-22 12:18:45 +03:00
										 |  |  | 	BSEARCH_CHAR_RANGE(lowercase_letter); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_unicode_letter(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2024-05-02 21:32:20 +02:00
										 |  |  | 	BSEARCH_CHAR_RANGE(unicode_letter); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-22 12:18:45 +03:00
										 |  |  | #undef BSEARCH_CHAR_RANGE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_ascii_upper_case(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char >= 'A' && p_char <= 'Z'); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_ascii_lower_case(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char >= 'a' && p_char <= 'z'); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_digit(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char >= '0' && p_char <= '9'); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_hex_digit(char32_t p_char) { | 
					
						
							|  |  |  | 	return (is_digit(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F')); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_binary_digit(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char == '0' || p_char == '1'); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_ascii_alphabet_char(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z'); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_ascii_alphanumeric_char(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9'); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_ascii_identifier_char(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9') || p_char == '_'; | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_symbol(char32_t p_char) { | 
					
						
							|  |  |  | 	return p_char != '_' && ((p_char >= '!' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '`') || (p_char >= '{' && p_char <= '~') || p_char == '\t' || p_char == ' '); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_control(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | 	return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_whitespace(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2025-02-06 10:31:32 +02:00
										 |  |  | 	return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200b) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085); | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_linebreak(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | 	return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_punct(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | 	return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-21 14:54:19 -05:00
										 |  |  | constexpr bool is_underscore(char32_t p_char) { | 
					
						
							| 
									
										
										
										
											2022-02-04 10:32:20 +02:00
										 |  |  | 	return (p_char == '_'); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-07-28 21:32:28 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | constexpr bool is_hyphen(char32_t p_char) { | 
					
						
							|  |  |  | 	return (p_char == '-') || (p_char == 0x2010) || (p_char == 0x2011); | 
					
						
							|  |  |  | } |