mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
add property tables
R=rsc DELTA=1087 (1001 added, 78 deleted, 8 changed) OCL=34137 CL=34147
This commit is contained in:
parent
04a77ac78c
commit
1e55e4a3e6
3 changed files with 1008 additions and 85 deletions
|
|
@ -25,7 +25,8 @@ func main() {
|
|||
flag.Parse();
|
||||
loadChars(); // always needed
|
||||
printCategories();
|
||||
printScripts();
|
||||
printScriptOrProperty(false);
|
||||
printScriptOrProperty(true);
|
||||
printCases();
|
||||
}
|
||||
|
||||
|
|
@ -39,6 +40,9 @@ var tablelist = flag.String("tables",
|
|||
var scriptlist = flag.String("scripts",
|
||||
"all",
|
||||
"comma-separated list of which script tables to generate");
|
||||
var proplist = flag.String("props",
|
||||
"all",
|
||||
"comma-separated list of which property tables to generate");
|
||||
var cases = flag.Bool("cases",
|
||||
true,
|
||||
"generate case tables");
|
||||
|
|
@ -117,8 +121,11 @@ type Script struct {
|
|||
|
||||
var chars = make([]Char, MaxChar+1)
|
||||
var scripts = make(map[string] []Script)
|
||||
var props = make(map[string] []Script) // a property looks like a script; can share the format
|
||||
|
||||
var lastChar uint32 = 0;
|
||||
var lastChar uint32 = 0
|
||||
|
||||
const scriptParseExpression = `([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)`
|
||||
|
||||
// In UnicodeData.txt, some ranges are marked like this:
|
||||
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
|
|
@ -217,7 +224,7 @@ func allCategories() []string {
|
|||
return a;
|
||||
}
|
||||
|
||||
func allScripts() []string {
|
||||
func all(scripts map[string] []Script) []string {
|
||||
a := make([]string, len(scripts));
|
||||
i := 0;
|
||||
for k := range scripts {
|
||||
|
|
@ -462,7 +469,7 @@ func verifyRange(name string, inCategory Op, table []unicode.Range) {
|
|||
}
|
||||
}
|
||||
|
||||
func parseScript(line string) {
|
||||
func parseScript(line string, scripts map[string] []Script) {
|
||||
comment := strings.Index(line, "#");
|
||||
if comment >= 0 {
|
||||
line = line[0:comment]
|
||||
|
|
@ -504,84 +511,6 @@ func parseScript(line string) {
|
|||
scripts[name] = s;
|
||||
}
|
||||
|
||||
func printScripts() {
|
||||
if *scriptlist == "" {
|
||||
return
|
||||
}
|
||||
var err os.Error;
|
||||
scriptRe, err = regexp.Compile(`([0-9A-F]+)(\.\.[0-9A-F]+)? +; ([A-Za-z_]+)`);
|
||||
if err != nil {
|
||||
die.Log("re error:", err)
|
||||
}
|
||||
resp, _, err := http.Get(*url + "Scripts.txt");
|
||||
if err != nil {
|
||||
die.Log(err);
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
die.Log("bad GET status for Scripts.txt", resp.Status);
|
||||
}
|
||||
input := bufio.NewReader(resp.Body);
|
||||
for {
|
||||
line, err := input.ReadString('\n');
|
||||
if err != nil {
|
||||
if err == os.EOF {
|
||||
break;
|
||||
}
|
||||
die.Log(err);
|
||||
}
|
||||
parseScript(line[0:len(line)-1]);
|
||||
}
|
||||
resp.Body.Close();
|
||||
|
||||
// Find out which scripts to dump
|
||||
list := strings.Split(*scriptlist, ",", 0);
|
||||
if *scriptlist == "all" {
|
||||
list = allScripts();
|
||||
}
|
||||
if *test {
|
||||
fullScriptTest(list);
|
||||
return;
|
||||
}
|
||||
|
||||
fmt.Printf(
|
||||
"// Generated by running\n"
|
||||
"// maketables --scripts=%s --url=%s\n"
|
||||
"// DO NOT EDIT\n\n",
|
||||
*scriptlist,
|
||||
*url
|
||||
);
|
||||
if *scriptlist == "all" {
|
||||
fmt.Println("// Scripts is the set of Unicode script tables.");
|
||||
fmt.Println("var Scripts = map[string] []Range {");
|
||||
for k, _ := range scripts {
|
||||
fmt.Printf("\t%q: %s,\n", k, k);
|
||||
}
|
||||
fmt.Printf("}\n\n");
|
||||
}
|
||||
|
||||
decl := make(sort.StringArray, len(list));
|
||||
ndecl := 0;
|
||||
for _, name := range list {
|
||||
decl[ndecl] = fmt.Sprintf(
|
||||
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
|
||||
name, name, name, name
|
||||
);
|
||||
ndecl++;
|
||||
fmt.Printf("var _%s = []Range {\n", name);
|
||||
ranges := foldAdjacent(scripts[name]);
|
||||
for _, s := range ranges {
|
||||
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
|
||||
}
|
||||
fmt.Printf("}\n\n");
|
||||
}
|
||||
decl.Sort();
|
||||
fmt.Println("var (");
|
||||
for _, d := range decl {
|
||||
fmt.Print(d);
|
||||
}
|
||||
fmt.Println(")\n");
|
||||
}
|
||||
|
||||
// The script tables have a lot of adjacent elements. Fold them together.
|
||||
func foldAdjacent(r []Script) []unicode.Range {
|
||||
s := make([]unicode.Range, 0, len(r));
|
||||
|
|
@ -598,18 +527,18 @@ func foldAdjacent(r []Script) []unicode.Range {
|
|||
return s;
|
||||
}
|
||||
|
||||
func fullScriptTest(list []string) {
|
||||
func fullScriptTest(list []string, installed map[string] []unicode.Range, scripts map[string] []Script) {
|
||||
for _, name := range list {
|
||||
if _, ok := scripts[name]; !ok {
|
||||
die.Log("unknown script", name);
|
||||
}
|
||||
r, ok := unicode.Scripts[name];
|
||||
r, ok := installed[name];
|
||||
if !ok {
|
||||
die.Log("unknown table", name);
|
||||
}
|
||||
for _, script := range scripts[name] {
|
||||
for r := script.lo; r <= script.hi; r++ {
|
||||
if !unicode.Is(unicode.Scripts[name], int(r)) {
|
||||
if !unicode.Is(installed[name], int(r)) {
|
||||
fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name);
|
||||
}
|
||||
}
|
||||
|
|
@ -617,6 +546,110 @@ func fullScriptTest(list []string) {
|
|||
}
|
||||
}
|
||||
|
||||
// PropList.txt has the same format as Scripts.txt so we can share its parser.
|
||||
func printScriptOrProperty(doProps bool) {
|
||||
flag := "scripts";
|
||||
flaglist := *scriptlist;
|
||||
file := "Scripts.txt";
|
||||
table := scripts;
|
||||
installed := unicode.Scripts;
|
||||
if doProps {
|
||||
flag = "props";
|
||||
flaglist = *proplist;
|
||||
file = "PropList.txt";
|
||||
table = props;
|
||||
installed = unicode.Props;
|
||||
}
|
||||
if flaglist == "" {
|
||||
return
|
||||
}
|
||||
var err os.Error;
|
||||
scriptRe, err = regexp.Compile(scriptParseExpression);
|
||||
if err != nil {
|
||||
die.Log("re error:", err)
|
||||
}
|
||||
resp, _, err := http.Get(*url + file);
|
||||
if err != nil {
|
||||
die.Log(err);
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
die.Log("bad GET status for ", file, ":", resp.Status);
|
||||
}
|
||||
input := bufio.NewReader(resp.Body);
|
||||
for {
|
||||
line, err := input.ReadString('\n');
|
||||
if err != nil {
|
||||
if err == os.EOF {
|
||||
break;
|
||||
}
|
||||
die.Log(err);
|
||||
}
|
||||
parseScript(line[0:len(line)-1], table);
|
||||
}
|
||||
resp.Body.Close();
|
||||
|
||||
// Find out which scripts to dump
|
||||
list := strings.Split(flaglist, ",", 0);
|
||||
if flaglist == "all" {
|
||||
list = all(table);
|
||||
}
|
||||
if *test {
|
||||
fullScriptTest(list, installed, table);
|
||||
return;
|
||||
}
|
||||
|
||||
fmt.Printf(
|
||||
"// Generated by running\n"
|
||||
"// maketables --%s=%s --url=%s\n"
|
||||
"// DO NOT EDIT\n\n",
|
||||
flag,
|
||||
flaglist,
|
||||
*url
|
||||
);
|
||||
if flaglist == "all" {
|
||||
if doProps {
|
||||
fmt.Println("// Props is the set of Unicode property tables.");
|
||||
fmt.Println("var Props = map[string] []Range {");
|
||||
} else {
|
||||
fmt.Println("// Scripts is the set of Unicode script tables.");
|
||||
fmt.Println("var Scripts = map[string] []Range {");
|
||||
}
|
||||
for k, _ := range table {
|
||||
fmt.Printf("\t%q: %s,\n", k, k);
|
||||
}
|
||||
fmt.Printf("}\n\n");
|
||||
}
|
||||
|
||||
decl := make(sort.StringArray, len(list));
|
||||
ndecl := 0;
|
||||
for _, name := range list {
|
||||
if doProps {
|
||||
decl[ndecl] = fmt.Sprintf(
|
||||
"\t%s = _%s;\t// %s is the set of Unicode characters with property %s.\n",
|
||||
name, name, name, name
|
||||
);
|
||||
} else {
|
||||
decl[ndecl] = fmt.Sprintf(
|
||||
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
|
||||
name, name, name, name
|
||||
);
|
||||
}
|
||||
ndecl++;
|
||||
fmt.Printf("var _%s = []Range {\n", name);
|
||||
ranges := foldAdjacent(table[name]);
|
||||
for _, s := range ranges {
|
||||
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
|
||||
}
|
||||
fmt.Printf("}\n\n");
|
||||
}
|
||||
decl.Sort();
|
||||
fmt.Println("var (");
|
||||
for _, d := range decl {
|
||||
fmt.Print(d);
|
||||
}
|
||||
fmt.Println(")\n");
|
||||
}
|
||||
|
||||
const (
|
||||
CaseUpper = 1 << iota;
|
||||
CaseLower;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue