compress: reduce copies of new text for compression testing

The previous book was 387 KiB decompressed and 119 KiB compressed, the
new book is 567 KiB decompressed and 132 KiB compressed. Overall, this
change will reduce the release binary size by 196 KiB. The new book will
allow for slightly more extensive compression testing with a larger
text.

Command to run the benchmark tests used with benchstat:
`../bin/go test -run='^$' -count=4 -bench=. compress/bzip2 compress/flate`

When running the benchmarks locally, changed "Newton" to "Twain" and
filtered the tests with the -bench flag to include only those which were
relevant to these changes.

benchstat results below:

name                            old time/op    new time/op     delta
DecodeTwain-8                     19.6ms ± 2%     24.1ms ± 1%  +23.04%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e4-8         140µs ± 3%      139µs ± 5%     ~     (p=0.886 n=4+4)
Decode/Twain/Huffman/1e5-8        1.27ms ± 3%     1.26ms ± 1%     ~     (p=1.000 n=4+4)
Decode/Twain/Huffman/1e6-8        12.4ms ± 0%     13.2ms ± 1%   +6.42%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8           133µs ± 1%      123µs ± 1%   -7.35%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e5-8          1.20ms ± 0%     1.02ms ± 3%  -15.32%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8          12.0ms ± 2%     10.1ms ± 3%  -15.89%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8         131µs ± 6%      108µs ± 5%  -17.84%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8        1.06ms ± 2%     0.80ms ± 1%  -24.97%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8        10.0ms ± 3%      8.0ms ± 3%  -20.06%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8     128µs ± 4%      115µs ± 4%   -9.70%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8    1.04ms ± 2%     0.83ms ± 4%  -20.37%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8    10.4ms ± 4%      8.1ms ± 5%  -22.25%  (p=0.029 n=4+4)
Encode/Twain/Huffman/1e4-8        55.7µs ± 2%     55.6µs ± 1%     ~     (p=1.000 n=4+4)
Encode/Twain/Huffman/1e5-8         441µs ± 0%      435µs ± 2%     ~     (p=0.343 n=4+4)
Encode/Twain/Huffman/1e6-8        4.31ms ± 4%     4.30ms ± 4%     ~     (p=0.886 n=4+4)
Encode/Twain/Speed/1e4-8           193µs ± 1%      166µs ± 2%  -14.09%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e5-8          1.54ms ± 1%     1.22ms ± 1%  -20.53%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e6-8          15.3ms ± 1%     12.2ms ± 3%  -20.62%  (p=0.029 n=4+4)
Encode/Twain/Default/1e4-8         393µs ± 1%      390µs ± 1%     ~     (p=0.114 n=4+4)
Encode/Twain/Default/1e5-8        6.12ms ± 4%     6.02ms ± 5%     ~     (p=0.486 n=4+4)
Encode/Twain/Default/1e6-8        69.4ms ± 5%     59.0ms ± 4%  -15.07%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e4-8     423µs ± 2%      379µs ± 2%  -10.34%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e5-8    7.00ms ± 1%     7.88ms ± 3%  +12.49%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e6-8    76.6ms ± 5%     80.9ms ± 3%     ~     (p=0.114 n=4+4)

name                            old speed      new speed       delta
DecodeTwain-8                   19.8MB/s ± 2%   23.6MB/s ± 1%  +18.84%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e4-8      71.7MB/s ± 3%   72.1MB/s ± 6%     ~     (p=0.943 n=4+4)
Decode/Twain/Huffman/1e5-8      78.8MB/s ± 3%   79.5MB/s ± 1%     ~     (p=1.000 n=4+4)
Decode/Twain/Huffman/1e6-8      80.5MB/s ± 0%   75.6MB/s ± 1%   -6.03%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8        75.2MB/s ± 1%   81.2MB/s ± 1%   +7.93%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e5-8        83.4MB/s ± 0%   98.6MB/s ± 3%  +18.16%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8        83.6MB/s ± 2%   99.5MB/s ± 3%  +18.91%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8      76.3MB/s ± 6%   92.8MB/s ± 4%  +21.62%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8      94.4MB/s ± 3%  125.7MB/s ± 1%  +33.24%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8       100MB/s ± 3%    125MB/s ± 3%  +25.12%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8  78.4MB/s ± 4%   86.8MB/s ± 4%  +10.73%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8  95.7MB/s ± 2%  120.3MB/s ± 4%  +25.65%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8  96.4MB/s ± 4%  124.0MB/s ± 5%  +28.64%  (p=0.029 n=4+4)
Encode/Twain/Huffman/1e4-8       179MB/s ± 2%    180MB/s ± 1%     ~     (p=1.000 n=4+4)
Encode/Twain/Huffman/1e5-8       227MB/s ± 0%    230MB/s ± 2%     ~     (p=0.343 n=4+4)
Encode/Twain/Huffman/1e6-8       232MB/s ± 4%    233MB/s ± 4%     ~     (p=0.886 n=4+4)
Encode/Twain/Speed/1e4-8        51.8MB/s ± 1%   60.4MB/s ± 2%  +16.43%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e5-8        65.1MB/s ± 1%   81.9MB/s ± 1%  +25.83%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e6-8        65.2MB/s ± 1%   82.2MB/s ± 3%  +26.00%  (p=0.029 n=4+4)
Encode/Twain/Default/1e4-8      25.4MB/s ± 1%   25.6MB/s ± 1%     ~     (p=0.114 n=4+4)
Encode/Twain/Default/1e5-8      16.4MB/s ± 4%   16.6MB/s ± 5%     ~     (p=0.486 n=4+4)
Encode/Twain/Default/1e6-8      14.4MB/s ± 6%   17.0MB/s ± 4%  +17.67%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e4-8  23.6MB/s ± 2%   26.4MB/s ± 2%  +11.54%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e5-8  14.3MB/s ± 1%   12.7MB/s ± 3%  -11.08%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e6-8  13.1MB/s ± 4%   12.4MB/s ± 3%     ~     (p=0.114 n=4+4)

name                            old alloc/op   new alloc/op    delta
DecodeTwain-8                     3.63MB ± 0%     3.63MB ± 0%   +0.15%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e4-8        42.0kB ± 0%     41.3kB ± 0%   -1.62%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e5-8        43.5kB ± 0%     45.1kB ± 0%   +3.74%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e6-8        71.7kB ± 0%     80.0kB ± 0%  +11.55%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8          41.2kB ± 0%     41.3kB ± 0%     ~     (p=0.286 n=4+4)
Decode/Twain/Speed/1e5-8          45.1kB ± 0%     43.9kB ± 0%   -2.80%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8          72.8kB ± 0%     81.3kB ± 0%  +11.72%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8        41.2kB ± 0%     41.2kB ± 0%   -0.22%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8        44.4kB ± 0%     43.0kB ± 0%   -3.02%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8        71.0kB ± 0%     61.8kB ± 0%  -13.00%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8    41.3kB ± 0%     41.2kB ± 0%   -0.29%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8    43.3kB ± 0%     43.0kB ± 0%   -0.72%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8    69.1kB ± 0%     63.7kB ± 0%   -7.90%  (p=0.029 n=4+4)

name                            old allocs/op  new allocs/op   delta
DecodeTwain-8                       51.0 ± 0%       51.2 ± 1%     ~     (p=1.000 n=4+4)
Decode/Twain/Huffman/1e4-8          15.0 ± 0%       14.0 ± 0%   -6.67%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e5-8          20.0 ± 0%       23.0 ± 0%  +15.00%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e6-8           134 ± 0%        161 ± 0%  +20.15%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8            17.0 ± 0%       18.0 ± 0%   +5.88%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e5-8            30.0 ± 0%       31.0 ± 0%   +3.33%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8             193 ± 0%        228 ± 0%  +18.13%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8          17.0 ± 0%       15.0 ± 0%  -11.76%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8          28.0 ± 0%       32.0 ± 0%  +14.29%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8           199 ± 0%        158 ± 0%  -20.60%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8      17.0 ± 0%       15.0 ± 0%  -11.76%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8      28.0 ± 0%       32.0 ± 0%  +14.29%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8       196 ± 0%        150 ± 0%  -23.47%  (p=0.029 n=4+4)

Updates #27151

Change-Id: I6c439694ed16a33bb4c63fbfb8570c7de46b4f2d
Reviewed-on: https://go-review.googlesource.com/135495
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
This commit is contained in:
Katie Hockman 2018-09-14 17:07:56 -04:00
parent 5b3aafe2b5
commit fdceb2a11b
10 changed files with 9307 additions and 16949 deletions

View file

@ -177,6 +177,8 @@ go src=..
strconv strconv
testdata testdata
+ +
testdata
+
text text
template template
testdata testdata

View file

@ -214,7 +214,7 @@ func TestZeroRead(t *testing.T) {
var ( var (
digits = mustLoadFile("testdata/e.txt.bz2") digits = mustLoadFile("testdata/e.txt.bz2")
twain = mustLoadFile("testdata/Mark.Twain-Tom.Sawyer.txt.bz2") newton = mustLoadFile("testdata/Isaac.Newton-Opticks.txt.bz2")
random = mustLoadFile("testdata/random.data.bz2") random = mustLoadFile("testdata/random.data.bz2")
) )
@ -236,5 +236,5 @@ func benchmarkDecode(b *testing.B, compressed []byte) {
} }
func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) } func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
func BenchmarkDecodeTwain(b *testing.B) { benchmarkDecode(b, twain) } func BenchmarkDecodeNewton(b *testing.B) { benchmarkDecode(b, newton) }
func BenchmarkDecodeRand(b *testing.B) { benchmarkDecode(b, random) } func BenchmarkDecodeRand(b *testing.B) { benchmarkDecode(b, random) }

Binary file not shown.

View file

@ -371,9 +371,9 @@ var deflateInflateStringTests = []deflateInflateStringTest{
[...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683}, [...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683},
}, },
{ {
"../testdata/Mark.Twain-Tom.Sawyer.txt", "../../testdata/Isaac.Newton-Opticks.txt",
"Mark.Twain-Tom.Sawyer", "Isaac.Newton-Opticks",
[...]int{407330, 187598, 180361, 172974, 169160, 163476, 160936, 160506, 160295, 160295, 233460}, [...]int{567248, 218338, 198211, 193152, 181100, 175427, 175427, 173597, 173422, 173422, 325240},
}, },
} }
@ -654,7 +654,7 @@ func (w *failWriter) Write(b []byte) (int, error) {
func TestWriterPersistentError(t *testing.T) { func TestWriterPersistentError(t *testing.T) {
t.Parallel() t.Parallel()
d, err := ioutil.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") d, err := ioutil.ReadFile("../../testdata/Isaac.Newton-Opticks.txt")
if err != nil { if err != nil {
t.Fatalf("ReadFile: %v", err) t.Fatalf("ReadFile: %v", err)
} }

View file

@ -27,8 +27,8 @@ var suites = []struct{ name, file string }{
// does not repeat, but there are only 10 possible digits, so it should be // does not repeat, but there are only 10 possible digits, so it should be
// reasonably compressible. // reasonably compressible.
{"Digits", "../testdata/e.txt"}, {"Digits", "../testdata/e.txt"},
// Twain is Mark Twain's classic English novel. // Newton is Isaac Newtons's educational text on Opticks.
{"Twain", "../testdata/Mark.Twain-Tom.Sawyer.txt"}, {"Newton", "../../testdata/Isaac.Newton-Opticks.txt"},
} }
func BenchmarkDecode(b *testing.B) { func BenchmarkDecode(b *testing.B) {

File diff suppressed because it is too large Load diff

View file

@ -17,9 +17,9 @@ import (
) )
const ( const (
twain = "testdata/Mark.Twain-Tom.Sawyer.txt" newton = "../testdata/Isaac.Newton-Opticks.txt"
twainLen = 387851 newtonLen = 567198
twainSHA256 = "461eb7cb2d57d293fc680c836464c9125e4382be3596f7d415093ae9db8fcb0e" newtonSHA256 = "d4a9ac22462b35e7821a4f2706c211093da678620a8f9997989ee7cf8d507bbd"
) )
func TestSendfile(t *testing.T) { func TestSendfile(t *testing.T) {
@ -43,7 +43,7 @@ func TestSendfile(t *testing.T) {
defer close(errc) defer close(errc)
defer conn.Close() defer conn.Close()
f, err := os.Open(twain) f, err := os.Open(newton)
if err != nil { if err != nil {
errc <- err errc <- err
return return
@ -58,8 +58,8 @@ func TestSendfile(t *testing.T) {
return return
} }
if sbytes != twainLen { if sbytes != newtonLen {
errc <- fmt.Errorf("sent %d bytes; expected %d", sbytes, twainLen) errc <- fmt.Errorf("sent %d bytes; expected %d", sbytes, newtonLen)
return return
} }
}() }()
@ -79,11 +79,11 @@ func TestSendfile(t *testing.T) {
t.Error(err) t.Error(err)
} }
if rbytes != twainLen { if rbytes != newtonLen {
t.Errorf("received %d bytes; expected %d", rbytes, twainLen) t.Errorf("received %d bytes; expected %d", rbytes, newtonLen)
} }
if res := hex.EncodeToString(h.Sum(nil)); res != twainSHA256 { if res := hex.EncodeToString(h.Sum(nil)); res != newtonSHA256 {
t.Error("retrieved data hash did not match") t.Error("retrieved data hash did not match")
} }
@ -113,7 +113,7 @@ func TestSendfileParts(t *testing.T) {
defer close(errc) defer close(errc)
defer conn.Close() defer conn.Close()
f, err := os.Open(twain) f, err := os.Open(newton)
if err != nil { if err != nil {
errc <- err errc <- err
return return
@ -174,7 +174,7 @@ func TestSendfileSeeked(t *testing.T) {
defer close(errc) defer close(errc)
defer conn.Close() defer conn.Close()
f, err := os.Open(twain) f, err := os.Open(newton)
if err != nil { if err != nil {
errc <- err errc <- err
return return

File diff suppressed because it is too large Load diff

9286
src/testdata/Isaac.Newton-Opticks.txt vendored Normal file

File diff suppressed because it is too large Load diff