Move unicode_ranges out of dynamic_font_import_settings.cpp into its own file

2025-12-08 06:09:55 +00:00 · 2025-01-21 16:55:42 +01:00 · 2025-01-21 16:55:42 +01:00 · d21007cf9a
commit d21007cf9a
parent 978b38797b
3 changed files with 480 additions and 350 deletions
--- a/misc/scripts/unicode_ranges_fetch.py
+++ b/misc/scripts/unicode_ranges_fetch.py
@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+
+# Script used to dump char ranges from
+# the Unicode Character Database to the `char_range.inc` file.
+# NOTE: This script is deliberately not integrated into the build system;
+# you should run it manually whenever you want to update the data.
+
+import os
+import sys
+from typing import Final, List, Set, Tuple
+from urllib.request import urlopen
+
+if __name__ == "__main__":
+    sys.path.insert(1, os.path.join(os.path.dirname(__file__), "../../"))
+
+from methods import generate_copyright_header
+
+URL: Final[str] = "https://www.unicode.org/Public/16.0.0/ucd/Blocks.txt"
+
+
+ranges: List[Tuple[str, str, str]] = []
+
+exclude_blocks: Set[str] = {
+    "High Surrogates",
+    "High Private Use Surrogates",
+    "Low Surrogates",
+    "Variation Selectors",
+    "Specials",
+    "Egyptian Hieroglyph Format Controls",
+    "Tags",
+    "Variation Selectors Supplement",
+}
+
+
+def parse_unicode_data() -> None:
+    lines: List[str] = [line.decode("utf-8") for line in urlopen(URL)]
+
+    for line in lines:
+        if line.startswith("#") or not line.strip():
+            continue
+
+        split_line: List[str] = line.split(";")
+
+        char_range: str = split_line[0].strip()
+        block: str = split_line[1].strip()
+
+        if block in exclude_blocks:
+            continue
+
+        range_start, range_end = char_range.split("..")
+
+        ranges.append((f"0x{range_start}", f"0x{range_end}", block))
+
+
+def make_array(array_name: str, ranges: List[Tuple[str, str, str]]) -> str:
+    result: str = f"static UniRange {array_name}[] = {{\n"
+
+    for start, end, block in ranges:
+        result += f'\t{{ {start}, {end}, U"{block}" }},\n'
+
+    result += """\t{ 0x10FFFF, 0x10FFFF, String() }
+};\n\n"""
+
+    return result
+
+
+def generate_unicode_ranges_inc() -> None:
+    parse_unicode_data()
+
+    source: str = generate_copyright_header("unicode_ranges.inc")
+
+    source += f"""
+// This file was generated using the `misc/scripts/unicode_ranges_fetch.py` script.
+
+#ifndef UNICODE_RANGES_INC
+#define UNICODE_RANGES_INC
+
+// Unicode Character Blocks
+// Source: {URL}
+
+struct UniRange {{
+\tint32_t start;
+\tint32_t end;
+\tString name;
+}};\n\n"""
+
+    source += make_array("unicode_ranges", ranges)
+
+    source += "#endif // UNICODE_RANGES_INC\n"
+
+    unicode_ranges_path: str = os.path.join(os.path.dirname(__file__), "../../editor/import/unicode_ranges.inc")
+    with open(unicode_ranges_path, "w", newline="\n") as f:
+        f.write(source)
+
+    print("`unicode_ranges.inc` generated successfully.")
+
+
+if __name__ == "__main__":
+    generate_unicode_ranges_inc()