mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
add the Upper/Lower sequence optimization.
tables shrink 900 lines. mapping code gets a little slower R=rsc DELTA=1124 (105 added, 952 deleted, 67 changed) OCL=34079 CL=34089
This commit is contained in:
parent
be219c5e9b
commit
3c098e2789
4 changed files with 171 additions and 1018 deletions
|
|
@ -636,13 +636,13 @@ type caseState struct {
|
|||
// Is d a continuation of the state of c?
|
||||
func (c *caseState) adjacent(d *caseState) bool {
|
||||
if d.point < c.point {
|
||||
return d.adjacent(c)
|
||||
c, d = d, c
|
||||
}
|
||||
switch {
|
||||
case d.point != c.point+1:
|
||||
return false
|
||||
case d._case != c._case:
|
||||
case d.point != c.point+1: // code points not adjacent (shouldn't happen)
|
||||
return false
|
||||
case d._case != c._case: // different cases
|
||||
return c.upperLowerAdjacent(d);
|
||||
case c._case == CaseNone:
|
||||
return false
|
||||
case c._case == CaseMissing:
|
||||
|
|
@ -657,6 +657,70 @@ func (c *caseState) adjacent(d *caseState) bool {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Is d the same as c, but opposite in upper/lower case? this would make it
|
||||
// an element of an UpperLower sequence.
|
||||
func (c *caseState) upperLowerAdjacent(d *caseState) bool {
|
||||
// check they're a matched case pair. we know they have adjacent values
|
||||
switch {
|
||||
case c._case == CaseUpper && d._case != CaseLower:
|
||||
return false
|
||||
case c._case == CaseLower && d._case != CaseUpper:
|
||||
return false
|
||||
}
|
||||
// matched pair (at least in upper/lower). make the order Upper Lower
|
||||
if c._case == CaseLower {
|
||||
c, d = d, c
|
||||
}
|
||||
// for an Upper Lower sequence the deltas have to be in order
|
||||
// c: 0 1 0
|
||||
// d: -1 0 -1
|
||||
switch {
|
||||
case c.deltaToUpper != 0:
|
||||
return false
|
||||
case c.deltaToLower != 1:
|
||||
return false
|
||||
case c.deltaToTitle != 0:
|
||||
return false
|
||||
case d.deltaToUpper != -1:
|
||||
return false
|
||||
case d.deltaToLower != 0:
|
||||
return false
|
||||
case d.deltaToTitle != -1:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Does this character start an UpperLower sequence?
|
||||
func (c *caseState) isUpperLower() bool {
|
||||
// for an Upper Lower sequence the deltas have to be in order
|
||||
// c: 0 1 0
|
||||
switch {
|
||||
case c.deltaToUpper != 0:
|
||||
return false
|
||||
case c.deltaToLower != 1:
|
||||
return false
|
||||
case c.deltaToTitle != 0:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Does this character start a LowerUpper sequence?
|
||||
func (c *caseState) isLowerUpper() bool {
|
||||
// for an Upper Lower sequence the deltas have to be in order
|
||||
// c: -1 0 -1
|
||||
switch {
|
||||
case c.deltaToUpper != -1:
|
||||
return false
|
||||
case c.deltaToLower != 0:
|
||||
return false
|
||||
case c.deltaToTitle != -1:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func getCaseState(i int) (c *caseState) {
|
||||
c = &caseState{ point: i, _case: CaseNone };
|
||||
ch := &chars[i];
|
||||
|
|
@ -729,9 +793,19 @@ func printCaseRange(lo, hi *caseState) {
|
|||
// character represents itself in all cases - no need to mention it
|
||||
return
|
||||
}
|
||||
fmt.Printf("\tCaseRange{0x%04X, 0x%04X, d{%d, %d, %d}},\n",
|
||||
lo.point, hi.point,
|
||||
lo.deltaToUpper, lo.deltaToLower, lo.deltaToTitle)
|
||||
switch {
|
||||
case hi.point > lo.point && lo.isUpperLower():
|
||||
fmt.Printf("\tCaseRange{0x%04X, 0x%04X, d{UpperLower, UpperLower, UpperLower}},\n",
|
||||
lo.point, hi.point)
|
||||
case hi.point > lo.point && lo.isLowerUpper():
|
||||
die.Log("LowerUpper sequence: should not happen: U+%04X\n", lo.point);
|
||||
fmt.Printf("\tCaseRange{0x%04X, 0x%04X, d{LowerUpper, LowerUpper, LowerUpper}},\n",
|
||||
lo.point, hi.point)
|
||||
default:
|
||||
fmt.Printf("\tCaseRange{0x%04X, 0x%04X, d{%d, %d, %d}},\n",
|
||||
lo.point, hi.point,
|
||||
lo.deltaToUpper, lo.deltaToLower, lo.deltaToTitle)
|
||||
}
|
||||
}
|
||||
|
||||
// If the cased value in the Char is 0, it means use the rune itself.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue