mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-03 23:21:15 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			99 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			99 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
#!/usr/bin/env python3
 | 
						|
 | 
						|
# Script used to dump char ranges from
 | 
						|
# the Unicode Character Database to the `char_range.inc` file.
 | 
						|
# NOTE: This script is deliberately not integrated into the build system;
 | 
						|
# you should run it manually whenever you want to update the data.
 | 
						|
 | 
						|
import os
 | 
						|
import sys
 | 
						|
from typing import Final, List, Set, Tuple
 | 
						|
from urllib.request import urlopen
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    sys.path.insert(1, os.path.join(os.path.dirname(__file__), "../../"))
 | 
						|
 | 
						|
from methods import generate_copyright_header
 | 
						|
 | 
						|
URL: Final[str] = "https://www.unicode.org/Public/16.0.0/ucd/Blocks.txt"
 | 
						|
 | 
						|
 | 
						|
ranges: List[Tuple[str, str, str]] = []
 | 
						|
 | 
						|
exclude_blocks: Set[str] = {
 | 
						|
    "High Surrogates",
 | 
						|
    "High Private Use Surrogates",
 | 
						|
    "Low Surrogates",
 | 
						|
    "Variation Selectors",
 | 
						|
    "Specials",
 | 
						|
    "Egyptian Hieroglyph Format Controls",
 | 
						|
    "Tags",
 | 
						|
    "Variation Selectors Supplement",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
def parse_unicode_data() -> None:
 | 
						|
    lines: List[str] = [line.decode("utf-8") for line in urlopen(URL)]
 | 
						|
 | 
						|
    for line in lines:
 | 
						|
        if line.startswith("#") or not line.strip():
 | 
						|
            continue
 | 
						|
 | 
						|
        split_line: List[str] = line.split(";")
 | 
						|
 | 
						|
        char_range: str = split_line[0].strip()
 | 
						|
        block: str = split_line[1].strip()
 | 
						|
 | 
						|
        if block in exclude_blocks:
 | 
						|
            continue
 | 
						|
 | 
						|
        range_start, range_end = char_range.split("..")
 | 
						|
 | 
						|
        ranges.append((f"0x{range_start}", f"0x{range_end}", block))
 | 
						|
 | 
						|
 | 
						|
def make_array(array_name: str, ranges: List[Tuple[str, str, str]]) -> str:
 | 
						|
    result: str = f"static UniRange {array_name}[] = {{\n"
 | 
						|
 | 
						|
    for start, end, block in ranges:
 | 
						|
        result += f'\t{{ {start}, {end}, U"{block}" }},\n'
 | 
						|
 | 
						|
    result += """\t{ 0x10FFFF, 0x10FFFF, String() }
 | 
						|
};\n\n"""
 | 
						|
 | 
						|
    return result
 | 
						|
 | 
						|
 | 
						|
def generate_unicode_ranges_inc() -> None:
 | 
						|
    parse_unicode_data()
 | 
						|
 | 
						|
    source: str = generate_copyright_header("unicode_ranges.inc")
 | 
						|
 | 
						|
    source += f"""
 | 
						|
// This file was generated using the `misc/scripts/unicode_ranges_fetch.py` script.
 | 
						|
 | 
						|
#ifndef UNICODE_RANGES_INC
 | 
						|
#define UNICODE_RANGES_INC
 | 
						|
 | 
						|
// Unicode Character Blocks
 | 
						|
// Source: {URL}
 | 
						|
 | 
						|
struct UniRange {{
 | 
						|
\tint32_t start;
 | 
						|
\tint32_t end;
 | 
						|
\tString name;
 | 
						|
}};\n\n"""
 | 
						|
 | 
						|
    source += make_array("unicode_ranges", ranges)
 | 
						|
 | 
						|
    source += "#endif // UNICODE_RANGES_INC\n"
 | 
						|
 | 
						|
    unicode_ranges_path: str = os.path.join(os.path.dirname(__file__), "../../editor/import/unicode_ranges.inc")
 | 
						|
    with open(unicode_ranges_path, "w", newline="\n") as f:
 | 
						|
        f.write(source)
 | 
						|
 | 
						|
    print("`unicode_ranges.inc` generated successfully.")
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    generate_unicode_ranges_inc()
 |