Merge branch 'master' of git@github.com:msgpack/msgpack

2026-02-07 02:09:59 +00:00 · 2010-10-17 14:37:35 +09:00 · 2010-10-17 14:37:35 +09:00 · cf254ea240
commit cf254ea240
parent 2d05110239 0017f4fce8
9 changed files with 156 additions and 106 deletions
--- a/perl/Changes
+++ b/perl/Changes
@ -1,4 +1,13 @@

+0.33
+
+    - fix tests (gfx)
+    - optimize unpacking routines in Data::MessagePack::PP (gfx)
+
+0.32
+
+    - add tests to detect Alpha problems reported via CPAN testers (gfx)
+
 0.31

    - update Module::Install::XSUtil for ccache support (gfx)
--- a/perl/lib/Data/MessagePack.pm
+++ b/perl/lib/Data/MessagePack.pm
@ -3,7 +3,7 @@ use strict;
 use warnings;
 use 5.008001;

-our $VERSION = '0.31';
+our $VERSION = '0.33';
 our $PreferInteger = 0;

 sub true () {
--- a/perl/lib/Data/MessagePack/PP.pm
+++ b/perl/lib/Data/MessagePack/PP.pm
@ -17,6 +17,8 @@ BEGIN {
    my $unpack_uint64_slow;

    if(!eval { pack 'Q', 1 }) { # don't have quad types
+        # emulates quad types with Math::BigInt.
+        # very slow but works well.
        $unpack_int64_slow = sub {
            require Math::BigInt;
            my $high = unpack_uint32( $_[0], $_[1] );
@ -35,7 +37,7 @@ BEGIN {
        };
        $unpack_uint64_slow = sub {
            require Math::BigInt;
-            my $high = Math::BigInt->new( unpack_uint32(  $_[0], $_[1]) );
+            my $high = Math::BigInt->new( unpack_uint32( $_[0], $_[1]) );
            my $low  = Math::BigInt->new( unpack_uint32( $_[0], $_[1] + 4) );
            return +($high << 32 | $low)->bstr;
        };
@ -104,7 +106,8 @@ BEGIN {
            *unpack_uint64 = $unpack_uint64_slow || sub { unpack 'Q', substr( $_[0], $_[1], 8 ); };
        }
    }
-    else {
+    else { # 5.10.0 or later
+        # pack_int64/uint64 are used only when the perl support quad types
        *pack_uint64   = sub { return pack 'CQ>', 0xcf, $_[0]; };
        *pack_int64    = sub { return pack 'Cq>', 0xd3, $_[0]; };
        *pack_double   = sub { return pack 'Cd>', 0xcb, $_[0]; };
@ -161,7 +164,7 @@ sub _pack {

    if ( ref($value) eq 'ARRAY' ) {
        my $num = @$value;
-        my $header = 
+        my $header =
              $num < 16          ? CORE::pack( 'C',  0x90 + $num )
            : $num < 2 ** 16 - 1 ? CORE::pack( 'Cn', 0xdc,  $num )
            : $num < 2 ** 32 - 1 ? CORE::pack( 'CN', 0xdd,  $num )
@ -172,7 +175,7 @@ sub _pack {

    elsif ( ref($value) eq 'HASH' ) {
        my $num = keys %$value;
-        my $header = 
+        my $header =
              $num < 16          ? CORE::pack( 'C',  0x80 + $num )
            : $num < 2 ** 16 - 1 ? CORE::pack( 'Cn', 0xde,  $num )
            : $num < 2 ** 32 - 1 ? CORE::pack( 'CN', 0xdf,  $num )
@ -211,25 +214,28 @@ sub _pack {

        if ( $Data::MessagePack::PreferInteger ) {
            if ( $value =~ /^-?[0-9]+$/ ) { # ok?
-                my $value2 = 0 + $value;
-                if ( $value > 0xFFFFFFFF or $value < '-'.0x80000000 or # <- needless but for XS compat
-                    0 + $value != B::svref_2object( \$value2 )->int_value
-                ) {
-                    local $Data::MessagePack::PreferInteger; # avoid for PV => NV
-                    return _pack( "$value" );
+                # checks whether $value is in (u)int32
+                my $ivalue = 0 + $value;
+                if (!(
+                       $ivalue > 0xFFFFFFFF
+                    or $ivalue < '-'.0x80000000 # for XS compat
+                    or $ivalue != B::svref_2object(\$ivalue)->int_value
+                )) {
+                    return _pack( $ivalue );
                }
-                return _pack( $value + 0 );
+                # fallthrough
            }
+            # fallthrough
        }

        utf8::encode( $value ) if utf8::is_utf8( $value );

        my $num = length $value;
-        my $header = 
+        my $header =
              $num < 32          ? CORE::pack( 'C',  0xa0 + $num )
            : $num < 2 ** 16 - 1 ? CORE::pack( 'Cn', 0xda, $num )
            : $num < 2 ** 32 - 1 ? CORE::pack( 'CN', 0xdb, $num )
-            : _unexpected_number($num)
+            : _unexpected('number %d', $num)
        ;

        return $header . $value;
@ -260,14 +266,72 @@ sub unpack :method {
    return $data;
 }

+my $T_RAW             = 0x01;
+my $T_ARRAY           = 0x02;
+my $T_MAP             = 0x04;
+my $T_DIRECT          = 0x08; # direct mapping (e.g. 0xc0 <-> nil)
+
+my @typemap = ( (0x00) x 256 );
+
+$typemap[$_] |= $T_ARRAY for
+    0x90 .. 0x9f, # fix array
+    0xdc,         # array16
+    0xdd,         # array32
+;
+$typemap[$_] |= $T_MAP for
+    0x80 .. 0x8f, # fix map
+    0xde,         # map16
+    0xdf,         # map32
+;
+$typemap[$_] |= $T_RAW for
+    0xa0 .. 0xbf, # fix raw
+    0xda,         # raw16
+    0xdb,         # raw32
+;
+
+my @byte2value;
+foreach my $pair(
+    [0xc3, true],
+    [0xc2, false],
+    [0xc0, undef],
+
+    (map { [ $_, $_ ] }         0x00 .. 0x7f), # positive fixnum
+    (map { [ $_, $_ - 0x100 ] } 0xe0 .. 0xff), # negative fixnum
+) {
+    $typemap[    $pair->[0] ] |= $T_DIRECT;
+    $byte2value[ $pair->[0] ]  = $pair->[1];
+}

 sub _unpack {
    my ( $value ) = @_;
-    my $byte = CORE::unpack( 'C', substr( $value, $p++, 1 ) ); # get header
+    # get a header byte
+    my $byte = unpack "x$p C", $value; # "x$p" is faster than substr()
+    $p++;

-    die "invalid data" unless defined $byte;
+    Carp::croak("invalid data") unless defined $byte;

-    if ( ( $byte >= 0x90 and $byte <= 0x9f ) or $byte == 0xdc or $byte == 0xdd ) {
+    # +/- fixnum, nil, true, false
+    return $byte2value[$byte] if $typemap[$byte] & $T_DIRECT;
+
+    if ( $typemap[$byte] & $T_RAW ) {
+        my $num;
+        if ( $byte == 0xda ) {
+            $num = CORE::unpack 'n', substr( $value, $p, 2 );
+            $p += 2 + $num;
+        }
+        elsif ( $byte == 0xdb ) {
+            $num = CORE::unpack 'N', substr( $value, $p, 4 );
+            $p += 4 + $num;
+        }
+        else { # fix raw
+            $num = $byte & ~0xa0;
+            $p += $num;
+        }
+        my $s = substr( $value, $p - $num, $num );
+        utf8::decode($s) if $_utf8;
+        return $s;
+    }
+    elsif ( $typemap[$byte] & $T_ARRAY ) {
        my $num;
        if ( $byte == 0xdc ) { # array 16
            $num = CORE::unpack 'n', substr( $value, $p, 2 );
@ -281,11 +345,10 @@ sub _unpack {
            $num = $byte & ~0x90;
        }
        my @array;
-        push @array, _unpack( $value ) while $num-- > 0;
+        push @array, _unpack( $value ) while --$num >= 0;
        return \@array;
    }
-
-    elsif ( ( $byte >= 0x80 and $byte <= 0x8f ) or $byte == 0xde or $byte == 0xdf ) {
+    elsif ( $typemap[$byte] & $T_MAP ) {
        my $num;
        if ( $byte == 0xde ) { # map 16
            $num = CORE::unpack 'n', substr( $value, $p, 2 );
@ -299,7 +362,7 @@ sub _unpack {
            $num = $byte & ~0x80;
        }
        my %map;
-        for ( 0 .. $num - 1 ) {
+        while ( --$num >= 0 ) {
            no warnings; # for undef key case
            my $key = _unpack( $value );
            my $val = _unpack( $value );
@ -308,11 +371,8 @@ sub _unpack {
        return \%map;
    }

-    elsif ( $byte >= 0x00 and $byte <= 0x7f ) { # positive fixnum
-        return $byte;
-    }
    elsif ( $byte == 0xcc ) { # uint8
-        CORE::unpack( 'C', substr( $value, $p++, 1 ) );
+        return CORE::unpack( 'C', substr( $value, $p++, 1 ) );
    }
    elsif ( $byte == 0xcd ) { # uint16
        $p += 2;
@ -341,53 +401,17 @@ sub _unpack {
    elsif ( $byte == 0xd0 ) { # int8
        return CORE::unpack 'c',  substr( $value, $p++, 1 ); # c / C
    }
-    elsif ( $byte >= 0xe0 and $byte <= 0xff ) { # negative fixnum
-        return $byte - 256;
-    }
-
-    elsif ( ( $byte >= 0xa0 and $byte <= 0xbf ) or $byte == 0xda or $byte == 0xdb ) { # raw
-        my $num;
-        if ( $byte == 0xda ) {
-            $num = CORE::unpack 'n', substr( $value, $p, 2 );
-            $p += 2 + $num;
-        }
-        elsif ( $byte == 0xdb ) {
-            $num = CORE::unpack 'N', substr( $value, $p, 4 );
-            $p += 4 + $num;
-        }
-        else { # fix raw
-            $num = $byte & ~0xa0;
-            $p += $num;
-        }
-        my $s = substr( $value, $p - $num, $num );
-        utf8::decode($s) if $_utf8;
-        return $s;
-    }
-
-    elsif ( $byte == 0xc0 ) { # nil
-        return undef;
-    }
-    elsif ( $byte == 0xc2 ) { # boolean
-        return false;
-    }
-    elsif ( $byte == 0xc3 ) { # boolean
-        return true;
-    }
-
    elsif ( $byte == 0xcb ) { # double
        $p += 8;
        return unpack_double( $value, $p - 8 );
    }
-
    elsif ( $byte == 0xca ) { # float
        $p += 4;
        return unpack_float( $value, $p - 4 );
    }
-
    else {
        _unexpected("byte 0x%02x", $byte);
    }
-
 }


@ -450,7 +474,28 @@ sub _count {
    my ( $self, $value ) = @_;
    my $byte = unpack( 'C', substr( $value, $p++, 1 ) ); # get header

-    if ( ( $byte >= 0x90 and $byte <= 0x9f ) or $byte == 0xdc or $byte == 0xdd ) {
+    Carp::croak('invalid data') unless defined $byte;
+
+    # +/- fixnum, nil, true, false
+    return 1 if $typemap[$byte] & $T_DIRECT;
+
+    if ( $typemap[$byte] & $T_RAW ) {
+        my $num;
+        if ( $byte == 0xda ) {
+            $num = unpack 'n', substr( $value, $p, 2 );
+            $p += 2;
+        }
+        elsif ( $byte == 0xdb ) {
+            $num = unpack 'N', substr( $value, $p, 4 );
+            $p += 4;
+        }
+        else { # fix raw
+            $num = $byte & ~0xa0;
+        }
+        $p += $num;
+        return 1;
+    }
+    elsif ( $typemap[$byte] & $T_ARRAY ) {
        my $num;
        if ( $byte == 0xdc ) { # array 16
            $num = unpack 'n', substr( $value, $p, 2 );
@ -470,8 +515,7 @@ sub _count {

        return 1;
    }
-
-    elsif ( ( $byte >= 0x80 and $byte <= 0x8f ) or $byte == 0xde or $byte == 0xdf ) {
+    elsif ( $typemap[$byte] & $T_MAP ) {
        my $num;
        if ( $byte == 0xde ) { # map 16
            $num = unpack 'n', substr( $value, $p, 2 );
@ -492,20 +536,12 @@ sub _count {
        return 1;
    }

-    elsif ( $byte == 0xc0 or $byte == 0xc2 or $byte == 0xc3 ) { # nil, false, true
-        return 1;
-    }
-
-    elsif ( $byte >= 0x00 and $byte <= 0x7f ) { # positive fixnum
-        return 1;
-    }
-
    elsif ( $byte >= 0xcc and $byte <= 0xcf ) { # uint
        $p += $byte == 0xcc ? 1
            : $byte == 0xcd ? 2
            : $byte == 0xce ? 4
            : $byte == 0xcf ? 8
-            : _unexpected("byte 0x%02x", $byte);
+            : Data::MessagePack::PP::_unexpected("byte 0x%02x", $byte);
        return 1;
    }

@ -514,38 +550,15 @@ sub _count {
            : $byte == 0xd1 ? 2
            : $byte == 0xd2 ? 4
            : $byte == 0xd3 ? 8
-            : _unexpected("byte 0x%02x", $byte);
+            : Data::MessagePack::PP::_unexpected("byte 0x%02x", $byte);
        return 1;
    }
-
-    elsif ( $byte >= 0xe0 and $byte <= 0xff ) { # negative fixnum
-        return 1;
-    }
-
-    elsif ( $byte >= 0xca and $byte <= 0xcb ) { # float, double
+    elsif ( $byte == 0xca or $byte == 0xcb ) { # float, double
        $p += $byte == 0xca ? 4 : 8;
        return 1;
    }
-
-    elsif ( ( $byte >= 0xa0 and $byte <= 0xbf ) or $byte == 0xda or $byte == 0xdb ) {
-        my $num;
-        if ( $byte == 0xda ) {
-            $num = unpack 'n', substr( $value, $p, 2 );
-            $p += 2;
-        }
-        elsif ( $byte == 0xdb ) {
-            $num = unpack 'N', substr( $value, $p, 4 );
-            $p += 4;
-        }
-        else { # fix raw
-            $num = $byte & ~0xa0;
-        }
-        $p += $num;
-        return 1;
-    }
-
    else {
-        _unexpected("byte 0x%02x", $byte);
+        Data::MessagePack::PP::_unexpected("byte 0x%02x", $byte);
    }

    return 0;
@ -596,6 +609,6 @@ makamaka
 =head1 COPYRIGHT AND LICENSE

 This library is free software; you can redistribute it and/or modify
-it under the same terms as Perl itself. 
+it under the same terms as Perl itself.

 =cut
--- a/perl/t/01_pack.t
+++ b/perl/t/01_pack.t
@ -55,6 +55,8 @@ my @dat = (
    [0, -128, -1, 0, -32768, -1, 0, -2147483648, -1], '99 00 d0 80 ff 00 d1 80 00 ff 00 d2 80 00 00 00 ff',
    2147483648, 'ce 80 00 00 00',
    -2147483648, 'd2 80 00 00 00',
+    'a' x 0x0100, 'da 01 00' . (' 61' x 0x0100),
+    [(undef) x 0x0100], 'dc 01 00' . (' c0' x 0x0100),
 );
 plan tests => 1*(scalar(@dat)/2);

--- a/perl/t/02_unpack.t
+++ b/perl/t/02_unpack.t
@ -15,7 +15,7 @@ sub pis ($$) {
        or diag( explain(unpackit($_[0])) );
 }

-my @dat = do 't/data.pl';
+my @dat = do 't/data.pl' or die $@;

 plan tests => 1*(scalar(@dat)/2);

--- a/perl/t/06_stream_unpack2.t
+++ b/perl/t/06_stream_unpack2.t
@ -1,15 +1,16 @@
 use strict;
 use warnings;
 use Data::MessagePack;
-use Test::More tests => 9;
+use Test::More tests => 61;
 use t::Util;

 my $input = [
    false,true,null,0,0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,
    127,127,255,65535,4294967295,-32,-32,-128,-32768,
-    -2147483648,0.0,-0.0,1.0,-1.0,"a","a","a","","","",
+    -2147483648,0.0,-0.0, 3.0,-3.0,"a","a",("a" x 70000),"","","",
    [0],[0],[0],[],[],[],{},{},{},
-    {"a" => 97},{"a" => 97},{"a" => 97},[[]],[["a"]]
+    {"a" => 97},{"abc" => 97},{"xyz" => 97},[[]], [["foo"], ["bar"]],
+    [["foo", true, false, null, 42]],
 ];

 my $packed = Data::MessagePack->pack($input);
@ -35,9 +36,24 @@ is_deeply(Data::MessagePack->unpack($packed), $input);
        $offset = $up->execute($packed, $offset);
        ok $up->is_finished, 'finished';
        my $data = $up->data;
-        is_deeply $data, $input;
+        is_deeply $data, $input, "block $i, offset $offset";
        $up->reset();
    }
 }

+{
+    my $s    = '';
+    foreach my $datum(reverse @{$input}) {
+        $s .= Data::MessagePack->pack($datum);
+    }
+
+    my $up = Data::MessagePack::Unpacker->new();
+
+    my $offset = 0;
+    for my $datum(reverse @{$input}) {
+        $offset = $up->execute($s, $offset);
+        is_deeply $up->data, $datum, "offset $offset/" . length($s);
+        $up->reset();
+    }
+}

--- a/perl/t/12_stream_unpack4.t
+++ b/perl/t/12_stream_unpack4.t
@ -5,11 +5,14 @@ use Test::More;
 use t::Util;

 my @input = (
-    +[[]],
+    [[]],
    [[],[]],
    [{"a" => 97},{"a" => 97}],
    [{"a" => 97},{"a" => 97},{"a" => 97}],
    [ map { +{ "foo $_" => "bar $_" } } 'aa' .. 'zz' ],
+    [42, null],
+    [42, true],
+    [42, false],
 );

 plan tests => @input * 2;
--- a/perl/t/data.pl
+++ b/perl/t/data.pl
@ -22,10 +22,14 @@ no warnings; # i need this, i need this.
    '82 c2 81 c0 c0 c3 81 c0 80', {false,{undef,undef}, true,{undef,{}}},
    '96 de 00 00 de 00 01 c0 c2 de 00 02 c0 c2 c3 c2 df 00 00 00 00 df 00 00 00 01 c0 c2 df 00 00 00 02 c0 c2 c3 c2',
        [{}, {undef,false}, {true,false, undef,false}, {}, {undef,false}, {true,false, undef,false}],
+    'dc 01 00'       . (' c0' x 0x0100),     [(undef) x 0x0100],
    'ce 00 ff ff ff' => ''.0xFFFFFF,
    'aa 34 32 39 34 39 36 37 32 39 35' => ''.0xFFFFFFFF,
    'ab 36 38 37 31 39 34 37 36 37 33 35' => ''.0xFFFFFFFFF,

+    'ca 00 00 00 00' => 0.0, # float
+    'ca 40 2c cc cd' => unpack('f', pack 'f', 2.7),
+
    'd2 80 00 00 01' => '-2147483647', # int32_t
    'ce 80 00 00 01' => '2147483649',  # uint32_t

--- a/perl/xs-src/unpack.c
+++ b/perl/xs-src/unpack.c
@ -65,6 +65,9 @@ load_bool(pTHX_ const char* const name) {
    LEAVE;
    assert(sv);
    assert(sv_isobject(sv));
+    if(!SvOK(sv)) {
+        croak("Oops: Failed to load %"SVf, name);
+    }
    return sv;
 }