add property tables

R=rsc
DELTA=1087  (1001 added, 78 deleted, 8 changed)
OCL=34137
CL=34147
This commit is contained in:
Rob Pike 2009-08-31 16:43:17 -07:00
parent 04a77ac78c
commit 1e55e4a3e6
3 changed files with 1008 additions and 85 deletions

View file

@ -25,7 +25,8 @@ func main() {
flag.Parse();
loadChars(); // always needed
printCategories();
printScripts();
printScriptOrProperty(false);
printScriptOrProperty(true);
printCases();
}
@ -39,6 +40,9 @@ var tablelist = flag.String("tables",
var scriptlist = flag.String("scripts",
"all",
"comma-separated list of which script tables to generate");
var proplist = flag.String("props",
"all",
"comma-separated list of which property tables to generate");
var cases = flag.Bool("cases",
true,
"generate case tables");
@ -117,8 +121,11 @@ type Script struct {
var chars = make([]Char, MaxChar+1)
var scripts = make(map[string] []Script)
var props = make(map[string] []Script) // a property looks like a script; can share the format
var lastChar uint32 = 0;
var lastChar uint32 = 0
const scriptParseExpression = `([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)`
// In UnicodeData.txt, some ranges are marked like this:
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
@ -217,7 +224,7 @@ func allCategories() []string {
return a;
}
func allScripts() []string {
func all(scripts map[string] []Script) []string {
a := make([]string, len(scripts));
i := 0;
for k := range scripts {
@ -462,7 +469,7 @@ func verifyRange(name string, inCategory Op, table []unicode.Range) {
}
}
func parseScript(line string) {
func parseScript(line string, scripts map[string] []Script) {
comment := strings.Index(line, "#");
if comment >= 0 {
line = line[0:comment]
@ -504,84 +511,6 @@ func parseScript(line string) {
scripts[name] = s;
}
func printScripts() {
if *scriptlist == "" {
return
}
var err os.Error;
scriptRe, err = regexp.Compile(`([0-9A-F]+)(\.\.[0-9A-F]+)? +; ([A-Za-z_]+)`);
if err != nil {
die.Log("re error:", err)
}
resp, _, err := http.Get(*url + "Scripts.txt");
if err != nil {
die.Log(err);
}
if resp.StatusCode != 200 {
die.Log("bad GET status for Scripts.txt", resp.Status);
}
input := bufio.NewReader(resp.Body);
for {
line, err := input.ReadString('\n');
if err != nil {
if err == os.EOF {
break;
}
die.Log(err);
}
parseScript(line[0:len(line)-1]);
}
resp.Body.Close();
// Find out which scripts to dump
list := strings.Split(*scriptlist, ",", 0);
if *scriptlist == "all" {
list = allScripts();
}
if *test {
fullScriptTest(list);
return;
}
fmt.Printf(
"// Generated by running\n"
"// maketables --scripts=%s --url=%s\n"
"// DO NOT EDIT\n\n",
*scriptlist,
*url
);
if *scriptlist == "all" {
fmt.Println("// Scripts is the set of Unicode script tables.");
fmt.Println("var Scripts = map[string] []Range {");
for k, _ := range scripts {
fmt.Printf("\t%q: %s,\n", k, k);
}
fmt.Printf("}\n\n");
}
decl := make(sort.StringArray, len(list));
ndecl := 0;
for _, name := range list {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
name, name, name, name
);
ndecl++;
fmt.Printf("var _%s = []Range {\n", name);
ranges := foldAdjacent(scripts[name]);
for _, s := range ranges {
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
}
fmt.Printf("}\n\n");
}
decl.Sort();
fmt.Println("var (");
for _, d := range decl {
fmt.Print(d);
}
fmt.Println(")\n");
}
// The script tables have a lot of adjacent elements. Fold them together.
func foldAdjacent(r []Script) []unicode.Range {
s := make([]unicode.Range, 0, len(r));
@ -598,18 +527,18 @@ func foldAdjacent(r []Script) []unicode.Range {
return s;
}
func fullScriptTest(list []string) {
func fullScriptTest(list []string, installed map[string] []unicode.Range, scripts map[string] []Script) {
for _, name := range list {
if _, ok := scripts[name]; !ok {
die.Log("unknown script", name);
}
r, ok := unicode.Scripts[name];
r, ok := installed[name];
if !ok {
die.Log("unknown table", name);
}
for _, script := range scripts[name] {
for r := script.lo; r <= script.hi; r++ {
if !unicode.Is(unicode.Scripts[name], int(r)) {
if !unicode.Is(installed[name], int(r)) {
fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name);
}
}
@ -617,6 +546,110 @@ func fullScriptTest(list []string) {
}
}
// PropList.txt has the same format as Scripts.txt so we can share its parser.
func printScriptOrProperty(doProps bool) {
flag := "scripts";
flaglist := *scriptlist;
file := "Scripts.txt";
table := scripts;
installed := unicode.Scripts;
if doProps {
flag = "props";
flaglist = *proplist;
file = "PropList.txt";
table = props;
installed = unicode.Props;
}
if flaglist == "" {
return
}
var err os.Error;
scriptRe, err = regexp.Compile(scriptParseExpression);
if err != nil {
die.Log("re error:", err)
}
resp, _, err := http.Get(*url + file);
if err != nil {
die.Log(err);
}
if resp.StatusCode != 200 {
die.Log("bad GET status for ", file, ":", resp.Status);
}
input := bufio.NewReader(resp.Body);
for {
line, err := input.ReadString('\n');
if err != nil {
if err == os.EOF {
break;
}
die.Log(err);
}
parseScript(line[0:len(line)-1], table);
}
resp.Body.Close();
// Find out which scripts to dump
list := strings.Split(flaglist, ",", 0);
if flaglist == "all" {
list = all(table);
}
if *test {
fullScriptTest(list, installed, table);
return;
}
fmt.Printf(
"// Generated by running\n"
"// maketables --%s=%s --url=%s\n"
"// DO NOT EDIT\n\n",
flag,
flaglist,
*url
);
if flaglist == "all" {
if doProps {
fmt.Println("// Props is the set of Unicode property tables.");
fmt.Println("var Props = map[string] []Range {");
} else {
fmt.Println("// Scripts is the set of Unicode script tables.");
fmt.Println("var Scripts = map[string] []Range {");
}
for k, _ := range table {
fmt.Printf("\t%q: %s,\n", k, k);
}
fmt.Printf("}\n\n");
}
decl := make(sort.StringArray, len(list));
ndecl := 0;
for _, name := range list {
if doProps {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters with property %s.\n",
name, name, name, name
);
} else {
decl[ndecl] = fmt.Sprintf(
"\t%s = _%s;\t// %s is the set of Unicode characters in script %s.\n",
name, name, name, name
);
}
ndecl++;
fmt.Printf("var _%s = []Range {\n", name);
ranges := foldAdjacent(table[name]);
for _, s := range ranges {
fmt.Printf(format, s.Lo, s.Hi, s.Stride);
}
fmt.Printf("}\n\n");
}
decl.Sort();
fmt.Println("var (");
for _, d := range decl {
fmt.Print(d);
}
fmt.Println(")\n");
}
const (
CaseUpper = 1 << iota;
CaseLower;