2021-06-05 16:06:55 -04:00
/*
* Copyright ( c ) 2021 , Hunter Salyer < thefalsehonesty @ gmail . com >
2023-06-24 03:21:32 -05:00
* Copyright ( c ) 2022 - 2023 , Gregory Bertilson < Zaggy1024 @ gmail . com >
2021-06-05 16:06:55 -04:00
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
2023-01-02 21:07:18 +01:00
# include <AK/Debug.h>
2021-06-05 16:06:55 -04:00
# include <AK/Function.h>
# include <AK/Optional.h>
2022-11-11 17:14:27 -06:00
# include <AK/Time.h>
2021-06-05 16:06:55 -04:00
# include <AK/Utf8View.h>
2021-11-23 11:32:25 +01:00
# include <LibCore/MappedFile.h>
2021-06-05 16:06:55 -04:00
2022-11-09 19:47:56 -06:00
# include "Reader.h"
namespace Video : : Matroska {
2021-06-05 16:06:55 -04:00
2022-11-09 23:38:50 -06:00
# define TRY_READ(expression) DECODER_TRY(DecoderErrorCategory::Corrupted, expression)
2021-06-05 16:06:55 -04:00
2023-06-24 03:21:32 -05:00
// RFC 8794 - Extensible Binary Meta Language
// https://datatracker.ietf.org/doc/html/rfc8794
2021-06-05 16:06:55 -04:00
constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3 ;
2023-06-24 03:21:32 -05:00
constexpr u32 EBML_CRC32_ELEMENT_ID = 0xBF ;
constexpr u32 EBML_VOID_ELEMENT_ID = 0xEC ;
// Matroska elements' IDs and types are listed at this URL:
// https://www.matroska.org/technical/elements.html
2021-06-05 16:06:55 -04:00
constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067 ;
constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282 ;
constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287 ;
2022-11-11 17:14:27 -06:00
constexpr u32 SEEK_HEAD_ELEMENT_ID = 0x114D9B74 ;
constexpr u32 SEEK_ELEMENT_ID = 0x4DBB ;
constexpr u32 SEEK_ID_ELEMENT_ID = 0x53AB ;
constexpr u32 SEEK_POSITION_ELEMENT_ID = 0x53AC ;
2021-06-05 16:06:55 -04:00
constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966 ;
constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B ;
constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675 ;
constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1 ;
constexpr u32 MUXING_APP_ID = 0x4D80 ;
constexpr u32 WRITING_APP_ID = 0x5741 ;
2022-10-29 17:01:01 -05:00
constexpr u32 DURATION_ID = 0x4489 ;
2022-10-10 05:04:28 -05:00
// Tracks
2021-06-05 16:06:55 -04:00
constexpr u32 TRACK_ENTRY_ID = 0xAE ;
constexpr u32 TRACK_NUMBER_ID = 0xD7 ;
constexpr u32 TRACK_UID_ID = 0x73C5 ;
constexpr u32 TRACK_TYPE_ID = 0x83 ;
constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C ;
constexpr u32 TRACK_CODEC_ID = 0x86 ;
2022-11-12 13:09:07 -06:00
constexpr u32 TRACK_TIMESTAMP_SCALE_ID = 0x23314F ;
constexpr u32 TRACK_OFFSET_ID = 0x537F ;
2021-06-05 16:06:55 -04:00
constexpr u32 TRACK_VIDEO_ID = 0xE0 ;
constexpr u32 TRACK_AUDIO_ID = 0xE1 ;
2022-10-10 05:04:28 -05:00
// Video
2021-06-05 16:06:55 -04:00
constexpr u32 PIXEL_WIDTH_ID = 0xB0 ;
constexpr u32 PIXEL_HEIGHT_ID = 0xBA ;
2022-10-10 05:04:28 -05:00
constexpr u32 COLOR_ENTRY_ID = 0x55B0 ;
constexpr u32 PRIMARIES_ID = 0x55BB ;
constexpr u32 TRANSFER_CHARACTERISTICS_ID = 0x55BA ;
constexpr u32 MATRIX_COEFFICIENTS_ID = 0x55B1 ;
constexpr u32 BITS_PER_CHANNEL_ID = 0x55B2 ;
// Audio
2021-06-05 16:06:55 -04:00
constexpr u32 CHANNELS_ID = 0x9F ;
constexpr u32 BIT_DEPTH_ID = 0x6264 ;
2022-10-10 05:04:28 -05:00
// Clusters
2021-06-05 16:06:55 -04:00
constexpr u32 SIMPLE_BLOCK_ID = 0xA3 ;
constexpr u32 TIMESTAMP_ID = 0xE7 ;
2022-11-13 19:28:56 -06:00
// Cues
constexpr u32 CUES_ID = 0x1C53BB6B ;
constexpr u32 CUE_POINT_ID = 0xBB ;
constexpr u32 CUE_TIME_ID = 0xB3 ;
constexpr u32 CUE_TRACK_POSITIONS_ID = 0xB7 ;
constexpr u32 CUE_TRACK_ID = 0xF7 ;
constexpr u32 CUE_CLUSTER_POSITION_ID = 0xF1 ;
constexpr u32 CUE_RELATIVE_POSITION_ID = 0xF0 ;
constexpr u32 CUE_CODEC_STATE_ID = 0xEA ;
constexpr u32 CUE_REFERENCE_ID = 0xDB ;
2022-11-11 17:14:27 -06:00
DecoderErrorOr < Reader > Reader : : from_file ( StringView path )
2021-06-05 16:06:55 -04:00
{
2022-11-09 23:38:50 -06:00
auto mapped_file = DECODER_TRY ( DecoderErrorCategory : : IO , Core : : MappedFile : : map ( path ) ) ;
2023-09-26 00:54:34 +02:00
return from_mapped_file ( move ( mapped_file ) ) ;
2023-05-11 12:25:52 +01:00
}
2023-09-26 00:54:34 +02:00
DecoderErrorOr < Reader > Reader : : from_mapped_file ( NonnullOwnPtr < Core : : MappedFile > mapped_file )
2023-05-11 12:25:52 +01:00
{
2022-11-11 17:14:27 -06:00
auto reader = TRY ( from_data ( mapped_file - > bytes ( ) ) ) ;
2023-09-26 00:54:34 +02:00
reader . m_mapped_file = make_ref_counted < Core : : SharedMappedFile > ( move ( mapped_file ) ) ;
2022-11-11 17:14:27 -06:00
return reader ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < Reader > Reader : : from_data ( ReadonlyBytes data )
2021-06-05 16:06:55 -04:00
{
2022-11-10 16:58:53 -06:00
Reader reader ( data ) ;
2022-11-11 17:14:27 -06:00
TRY ( reader . parse_initial_data ( ) ) ;
return reader ;
2021-06-05 16:06:55 -04:00
}
2023-06-24 03:21:32 -05:00
// Returns the position of the first element that is read from this master element.
static DecoderErrorOr < size_t > parse_master_element ( Streamer & streamer , [[maybe_unused]] StringView element_name , Function < DecoderErrorOr < IterationDecision > ( u64 ) > element_consumer )
2021-06-05 16:06:55 -04:00
{
2022-11-11 19:09:53 -06:00
auto element_data_size = TRY_READ ( streamer . read_variable_size_integer ( ) ) ;
2022-11-09 23:38:50 -06:00
dbgln_if ( MATROSKA_DEBUG , " {} has {} octets of data. " , element_name , element_data_size ) ;
2021-06-05 16:06:55 -04:00
2023-06-24 03:21:32 -05:00
bool first_element = true ;
auto first_element_position = streamer . position ( ) ;
2022-11-11 19:09:53 -06:00
streamer . push_octets_read ( ) ;
while ( streamer . octets_read ( ) < element_data_size ) {
2021-06-05 16:06:55 -04:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " ====== Reading element ====== " ) ;
2022-11-11 19:09:53 -06:00
auto element_id = TRY_READ ( streamer . read_variable_size_integer ( false ) ) ;
2022-11-11 17:14:27 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " {:s} element ID is {:#010x} " , element_name , element_id ) ;
2023-06-24 03:21:32 -05:00
if ( element_id = = EBML_CRC32_ELEMENT_ID ) {
// The CRC-32 Element contains a 32-bit Cyclic Redundancy Check value of all the
// Element Data of the Parent Element as stored except for the CRC-32 Element itself.
// When the CRC-32 Element is present, the CRC-32 Element MUST be the first ordered
// EBML Element within its Parent Element for easier reading.
if ( ! first_element )
return DecoderError : : corrupted ( " CRC32 element must be the first child " sv ) ;
// All Top-Level Elements of an EBML Document that are Master Elements SHOULD include a
// CRC-32 Element as a Child Element. The CRC in use is the IEEE-CRC-32 algorithm as used
// in the [ISO3309] standard and in Section 8.1.1.6.2 of [ITU.V42], with initial value of
// 0xFFFFFFFF. The CRC value MUST be computed on a little-endian bytestream and MUST use
// little-endian storage.
// FIXME: Currently we skip the CRC-32 Element instead of checking it. It may be worth
// verifying the contents of the SeekHead, Segment Info, and Tracks Elements.
// Note that Cluster Elements tend to be quite large, so verifying their integrity
// will result in longer buffering times in streamed contexts, so it may not be
// worth the effort checking those. It would also prevent error correction in
// video codecs from taking effect.
TRY_READ ( streamer . read_unknown_element ( ) ) ;
continue ;
}
if ( element_id = = EBML_VOID_ELEMENT_ID ) {
// Used to void data or to avoid unexpected behaviors when using damaged data.
// The content is discarded. Also used to reserve space in a subelement for later use.
TRY_READ ( streamer . read_unknown_element ( ) ) ;
continue ;
}
auto result = element_consumer ( element_id ) ;
2022-11-11 17:14:27 -06:00
if ( result . is_error ( ) )
return DecoderError : : format ( result . error ( ) . category ( ) , " {} -> {} " , element_name , result . error ( ) . description ( ) ) ;
if ( result . release_value ( ) = = IterationDecision : : Break )
break ;
2021-06-05 16:06:55 -04:00
2022-11-11 19:09:53 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read {} octets of the {} so far. " , streamer . octets_read ( ) , element_name ) ;
2023-06-24 03:21:32 -05:00
first_element = false ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 19:09:53 -06:00
streamer . pop_octets_read ( ) ;
2021-06-05 16:06:55 -04:00
2023-06-24 03:21:32 -05:00
return first_element_position ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 19:09:53 -06:00
static DecoderErrorOr < EBMLHeader > parse_ebml_header ( Streamer & streamer )
2021-06-05 16:06:55 -04:00
{
EBMLHeader header ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " Header " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-09 23:38:50 -06:00
switch ( element_id ) {
case DOCTYPE_ELEMENT_ID :
2022-11-11 19:09:53 -06:00
header . doc_type = TRY_READ ( streamer . read_string ( ) ) ;
2022-11-09 23:38:50 -06:00
dbgln_if ( MATROSKA_DEBUG , " Read DocType attribute: {} " , header . doc_type ) ;
break ;
case DOCTYPE_VERSION_ELEMENT_ID :
2022-11-11 19:09:53 -06:00
header . doc_type_version = TRY_READ ( streamer . read_u64 ( ) ) ;
2022-11-09 23:38:50 -06:00
dbgln_if ( MATROSKA_DEBUG , " Read DocTypeVersion attribute: {} " , header . doc_type_version ) ;
break ;
default :
2022-11-11 19:09:53 -06:00
TRY_READ ( streamer . read_unknown_element ( ) ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2022-11-09 23:38:50 -06:00
} ) ) ;
2021-06-05 16:06:55 -04:00
return header ;
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < void > Reader : : parse_initial_data ( )
{
Streamer streamer { m_data } ;
auto first_element_id = TRY_READ ( streamer . read_variable_size_integer ( false ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " First element ID is {:#010x} \n " , first_element_id ) ;
if ( first_element_id ! = EBML_MASTER_ELEMENT_ID )
return DecoderError : : corrupted ( " First element was not an EBML header " sv ) ;
m_header = TRY ( parse_ebml_header ( streamer ) ) ;
dbgln_if ( MATROSKA_DEBUG , " Parsed EBML header " ) ;
auto root_element_id = TRY_READ ( streamer . read_variable_size_integer ( false ) ) ;
if ( root_element_id ! = SEGMENT_ELEMENT_ID )
return DecoderError : : corrupted ( " Second element was not a segment element " sv ) ;
m_segment_contents_size = TRY_READ ( streamer . read_variable_size_integer ( ) ) ;
m_segment_contents_position = streamer . position ( ) ;
dbgln_if ( true , " Segment is at {} with size {}, available size is {} " , m_segment_contents_position , m_segment_contents_size , m_data . size ( ) - m_segment_contents_position ) ;
m_segment_contents_size = min ( m_segment_contents_size , m_data . size ( ) - m_segment_contents_position ) ;
return { } ;
}
2022-11-11 19:09:53 -06:00
2022-11-11 17:14:27 -06:00
static DecoderErrorOr < void > parse_seek_head ( Streamer & streamer , size_t base_position , HashMap < u32 , size_t > & table )
2021-06-05 16:06:55 -04:00
{
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " SeekHead " sv , [ & ] ( u64 seek_head_child_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-11 17:14:27 -06:00
if ( seek_head_child_id = = SEEK_ELEMENT_ID ) {
Optional < u64 > seek_id ;
Optional < u64 > seek_position ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " Seek " sv , [ & ] ( u64 seek_entry_child_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-11 17:14:27 -06:00
switch ( seek_entry_child_id ) {
case SEEK_ID_ELEMENT_ID :
seek_id = TRY_READ ( streamer . read_u64 ( ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Seek Element ID value {:#010x} " , seek_id . value ( ) ) ;
break ;
case SEEK_POSITION_ELEMENT_ID :
seek_position = TRY_READ ( streamer . read_u64 ( ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Seek Position value {} " , seek_position . value ( ) ) ;
break ;
default :
TRY_READ ( streamer . read_unknown_element ( ) ) ;
}
return IterationDecision : : Continue ;
} ) ) ;
if ( ! seek_id . has_value ( ) )
return DecoderError : : corrupted ( " Seek entry is missing the element ID " sv ) ;
if ( ! seek_position . has_value ( ) )
return DecoderError : : corrupted ( " Seek entry is missing the seeking position " sv ) ;
if ( seek_id . value ( ) > NumericLimits < u32 > : : max ( ) )
return DecoderError : : corrupted ( " Seek entry's element ID is too large " sv ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Seek entry found with ID {:#010x} and position {} offset from SeekHead at {} " , seek_id . value ( ) , seek_position . value ( ) , base_position ) ;
// FIXME: SeekHead can reference another SeekHead, we should recursively parse all SeekHeads.
if ( table . contains ( seek_id . value ( ) ) ) {
dbgln_if ( MATROSKA_DEBUG , " Warning: Duplicate seek entry with ID {:#010x} at position {} " , seek_id . value ( ) , seek_position . value ( ) ) ;
return IterationDecision : : Continue ;
}
DECODER_TRY_ALLOC ( table . try_set ( seek_id . release_value ( ) , base_position + seek_position . release_value ( ) ) ) ;
} else {
dbgln_if ( MATROSKA_TRACE_DEBUG , " Unknown SeekHead child element ID {:#010x} " , seek_head_child_id ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2023-06-24 03:21:32 -05:00
} ) ) ;
return { } ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < Optional < size_t > > Reader : : find_first_top_level_element_with_id ( [[maybe_unused]] StringView element_name , u32 element_id )
2022-11-11 19:09:53 -06:00
{
2022-11-11 17:14:27 -06:00
dbgln_if ( MATROSKA_DEBUG , " ====== Finding element {} with ID {:#010x} ====== " , element_name , element_id ) ;
2022-11-11 19:09:53 -06:00
2022-11-11 17:14:27 -06:00
if ( m_seek_entries . contains ( element_id ) ) {
dbgln_if ( MATROSKA_TRACE_DEBUG , " Cache hit! " ) ;
return m_seek_entries . get ( element_id ) . release_value ( ) ;
}
2022-11-11 19:09:53 -06:00
2022-11-11 17:14:27 -06:00
Streamer streamer { m_data } ;
if ( m_last_top_level_element_position ! = 0 )
TRY_READ ( streamer . seek_to_position ( m_last_top_level_element_position ) ) ;
else
TRY_READ ( streamer . seek_to_position ( m_segment_contents_position ) ) ;
Optional < size_t > position ;
while ( streamer . position ( ) < m_segment_contents_position + m_segment_contents_size ) {
auto found_element_id = TRY_READ ( streamer . read_variable_size_integer ( false ) ) ;
auto found_element_position = streamer . position ( ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Found element ID {:#010x} with position {}. " , found_element_id , found_element_position ) ;
if ( found_element_id = = SEEK_HEAD_ELEMENT_ID ) {
dbgln_if ( MATROSKA_TRACE_DEBUG , " Found SeekHead, parsing it into the lookup table. " ) ;
m_seek_entries . clear ( ) ;
TRY ( parse_seek_head ( streamer , found_element_position , m_seek_entries ) ) ;
m_last_top_level_element_position = 0 ;
if ( m_seek_entries . contains ( element_id ) ) {
dbgln_if ( MATROSKA_TRACE_DEBUG , " SeekHead hit! " ) ;
position = m_seek_entries . get ( element_id ) . release_value ( ) ;
break ;
}
continue ;
}
auto result = streamer . read_unknown_element ( ) ;
if ( result . is_error ( ) )
return DecoderError : : format ( DecoderErrorCategory : : Corrupted , " While seeking to {}: {} " , element_name , result . release_error ( ) . string_literal ( ) ) ;
m_last_top_level_element_position = streamer . position ( ) ;
DECODER_TRY_ALLOC ( m_seek_entries . try_set ( found_element_id , found_element_position ) ) ;
if ( found_element_id = = element_id ) {
position = found_element_position ;
break ;
}
dbgln_if ( MATROSKA_TRACE_DEBUG , " Skipped to position {}. " , m_last_top_level_element_position ) ;
}
2022-11-11 19:09:53 -06:00
2022-11-11 17:14:27 -06:00
return position ;
2022-11-11 19:09:53 -06:00
}
2022-11-11 17:14:27 -06:00
static DecoderErrorOr < SegmentInformation > parse_information ( Streamer & streamer )
2021-06-05 16:06:55 -04:00
{
2022-11-11 17:14:27 -06:00
SegmentInformation segment_information ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " Segment Information " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-09 23:38:50 -06:00
switch ( element_id ) {
case TIMESTAMP_SCALE_ID :
2022-11-11 17:14:27 -06:00
segment_information . set_timestamp_scale ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
dbgln_if ( MATROSKA_DEBUG , " Read TimestampScale attribute: {} " , segment_information . timestamp_scale ( ) ) ;
2022-11-09 23:38:50 -06:00
break ;
case MUXING_APP_ID :
2022-11-11 17:14:27 -06:00
segment_information . set_muxing_app ( TRY_READ ( streamer . read_string ( ) ) ) ;
dbgln_if ( MATROSKA_DEBUG , " Read MuxingApp attribute: {} " , segment_information . muxing_app ( ) . as_string ( ) ) ;
2022-11-09 23:38:50 -06:00
break ;
case WRITING_APP_ID :
2022-11-11 17:14:27 -06:00
segment_information . set_writing_app ( TRY_READ ( streamer . read_string ( ) ) ) ;
dbgln_if ( MATROSKA_DEBUG , " Read WritingApp attribute: {} " , segment_information . writing_app ( ) . as_string ( ) ) ;
2022-11-09 23:38:50 -06:00
break ;
case DURATION_ID :
2022-11-13 19:13:50 -06:00
segment_information . set_duration_unscaled ( TRY_READ ( streamer . read_float ( ) ) ) ;
dbgln_if ( MATROSKA_DEBUG , " Read Duration attribute: {} " , segment_information . duration_unscaled ( ) . value ( ) ) ;
2022-11-09 23:38:50 -06:00
break ;
default :
2022-11-11 19:09:53 -06:00
TRY_READ ( streamer . read_unknown_element ( ) ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2022-11-09 23:38:50 -06:00
} ) ) ;
2021-06-05 16:06:55 -04:00
return segment_information ;
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < SegmentInformation > Reader : : segment_information ( )
{
if ( m_segment_information . has_value ( ) )
return m_segment_information . value ( ) ;
auto position = TRY ( find_first_top_level_element_with_id ( " Segment Information " sv , SEGMENT_INFORMATION_ELEMENT_ID ) ) ;
if ( ! position . has_value ( ) )
return DecoderError : : corrupted ( " No Segment Information element found " sv ) ;
Streamer streamer { m_data } ;
TRY_READ ( streamer . seek_to_position ( position . release_value ( ) ) ) ;
m_segment_information = TRY ( parse_information ( streamer ) ) ;
return m_segment_information . value ( ) ;
}
DecoderErrorOr < void > Reader : : ensure_tracks_are_parsed ( )
{
if ( ! m_tracks . is_empty ( ) )
return { } ;
auto position = TRY ( find_first_top_level_element_with_id ( " Tracks " sv , TRACK_ELEMENT_ID ) ) ;
if ( ! position . has_value ( ) )
return DecoderError : : corrupted ( " No Tracks element found " sv ) ;
Streamer streamer { m_data } ;
TRY_READ ( streamer . seek_to_position ( position . release_value ( ) ) ) ;
TRY ( parse_tracks ( streamer ) ) ;
return { } ;
}
2022-11-11 19:09:53 -06:00
static DecoderErrorOr < TrackEntry : : ColorFormat > parse_video_color_information ( Streamer & streamer )
2022-10-10 05:04:28 -05:00
{
TrackEntry : : ColorFormat color_format { } ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " Colour " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-10-10 05:04:28 -05:00
switch ( element_id ) {
2022-11-09 23:38:50 -06:00
case PRIMARIES_ID :
2022-11-11 19:09:53 -06:00
color_format . color_primaries = static_cast < ColorPrimaries > ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
2022-10-10 05:04:28 -05:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Colour's Primaries attribute: {} " , color_primaries_to_string ( color_format . color_primaries ) ) ;
break ;
2022-11-09 23:38:50 -06:00
case TRANSFER_CHARACTERISTICS_ID :
2022-11-11 19:09:53 -06:00
color_format . transfer_characteristics = static_cast < TransferCharacteristics > ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
2022-10-10 05:04:28 -05:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Colour's TransferCharacteristics attribute: {} " , transfer_characteristics_to_string ( color_format . transfer_characteristics ) ) ;
break ;
2022-11-09 23:38:50 -06:00
case MATRIX_COEFFICIENTS_ID :
2022-11-11 19:09:53 -06:00
color_format . matrix_coefficients = static_cast < MatrixCoefficients > ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
2022-10-10 05:04:28 -05:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Colour's MatrixCoefficients attribute: {} " , matrix_coefficients_to_string ( color_format . matrix_coefficients ) ) ;
break ;
2022-11-09 23:38:50 -06:00
case BITS_PER_CHANNEL_ID :
2022-11-11 19:09:53 -06:00
color_format . bits_per_channel = TRY_READ ( streamer . read_u64 ( ) ) ;
2022-10-10 05:04:28 -05:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Colour's BitsPerChannel attribute: {} " , color_format . bits_per_channel ) ;
break ;
default :
2022-11-11 19:09:53 -06:00
TRY_READ ( streamer . read_unknown_element ( ) ) ;
2022-10-10 05:04:28 -05:00
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2022-11-09 23:38:50 -06:00
} ) ) ;
2022-10-10 05:04:28 -05:00
return color_format ;
}
2022-11-11 19:09:53 -06:00
static DecoderErrorOr < TrackEntry : : VideoTrack > parse_video_track_information ( Streamer & streamer )
2021-06-05 16:06:55 -04:00
{
TrackEntry : : VideoTrack video_track { } ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " VideoTrack " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-09 23:38:50 -06:00
switch ( element_id ) {
case PIXEL_WIDTH_ID :
2022-11-11 19:09:53 -06:00
video_track . pixel_width = TRY_READ ( streamer . read_u64 ( ) ) ;
2022-11-09 23:38:50 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read VideoTrack's PixelWidth attribute: {} " , video_track . pixel_width ) ;
break ;
case PIXEL_HEIGHT_ID :
2022-11-11 19:09:53 -06:00
video_track . pixel_height = TRY_READ ( streamer . read_u64 ( ) ) ;
2022-11-09 23:38:50 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read VideoTrack's PixelHeight attribute: {} " , video_track . pixel_height ) ;
break ;
case COLOR_ENTRY_ID :
2022-11-11 19:09:53 -06:00
video_track . color_format = TRY ( parse_video_color_information ( streamer ) ) ;
2022-11-09 23:38:50 -06:00
break ;
default :
2022-11-11 19:09:53 -06:00
TRY_READ ( streamer . read_unknown_element ( ) ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2022-11-09 23:38:50 -06:00
} ) ) ;
2021-06-05 16:06:55 -04:00
return video_track ;
}
2022-11-11 19:09:53 -06:00
static DecoderErrorOr < TrackEntry : : AudioTrack > parse_audio_track_information ( Streamer & streamer )
2021-06-05 16:06:55 -04:00
{
TrackEntry : : AudioTrack audio_track { } ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " AudioTrack " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-09 23:38:50 -06:00
switch ( element_id ) {
case CHANNELS_ID :
2022-11-11 19:09:53 -06:00
audio_track . channels = TRY_READ ( streamer . read_u64 ( ) ) ;
2022-11-09 23:38:50 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read AudioTrack's Channels attribute: {} " , audio_track . channels ) ;
break ;
case BIT_DEPTH_ID :
2022-11-11 19:09:53 -06:00
audio_track . bit_depth = TRY_READ ( streamer . read_u64 ( ) ) ;
2022-11-09 23:38:50 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read AudioTrack's BitDepth attribute: {} " , audio_track . bit_depth ) ;
break ;
default :
2022-11-11 19:09:53 -06:00
TRY_READ ( streamer . read_unknown_element ( ) ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2022-11-09 23:38:50 -06:00
} ) ) ;
2021-06-05 16:06:55 -04:00
return audio_track ;
}
2022-11-11 17:14:27 -06:00
static DecoderErrorOr < TrackEntry > parse_track_entry ( Streamer & streamer )
2022-11-11 19:09:53 -06:00
{
2022-11-11 17:14:27 -06:00
TrackEntry track_entry ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " Track " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-11 19:09:53 -06:00
switch ( element_id ) {
case TRACK_NUMBER_ID :
2022-11-11 17:14:27 -06:00
track_entry . set_track_number ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read TrackNumber attribute: {} " , track_entry . track_number ( ) ) ;
2022-11-11 19:09:53 -06:00
break ;
case TRACK_UID_ID :
2022-11-11 17:14:27 -06:00
track_entry . set_track_uid ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read TrackUID attribute: {} " , track_entry . track_uid ( ) ) ;
2022-11-11 19:09:53 -06:00
break ;
case TRACK_TYPE_ID :
2022-11-11 17:14:27 -06:00
track_entry . set_track_type ( static_cast < TrackEntry : : TrackType > ( TRY_READ ( streamer . read_u64 ( ) ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read TrackType attribute: {} " , to_underlying ( track_entry . track_type ( ) ) ) ;
2022-11-11 19:09:53 -06:00
break ;
case TRACK_LANGUAGE_ID :
2024-01-02 15:45:53 +01:00
track_entry . set_language ( DECODER_TRY_ALLOC ( FlyString : : from_utf8 ( TRY_READ ( streamer . read_string ( ) ) . view ( ) ) ) ) ;
2022-11-11 17:14:27 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Track's Language attribute: {} " , track_entry . language ( ) ) ;
2022-11-11 19:09:53 -06:00
break ;
case TRACK_CODEC_ID :
2024-01-02 15:45:53 +01:00
track_entry . set_codec_id ( DECODER_TRY_ALLOC ( FlyString : : from_utf8 ( TRY_READ ( streamer . read_string ( ) ) . view ( ) ) ) ) ;
2022-11-11 17:14:27 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Track's CodecID attribute: {} " , track_entry . codec_id ( ) ) ;
2022-11-11 19:09:53 -06:00
break ;
2022-11-12 13:09:07 -06:00
case TRACK_TIMESTAMP_SCALE_ID :
track_entry . set_timestamp_scale ( TRY_READ ( streamer . read_float ( ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Track's TrackTimestampScale attribute: {} " , track_entry . timestamp_scale ( ) ) ;
break ;
case TRACK_OFFSET_ID :
track_entry . set_timestamp_offset ( TRY_READ ( streamer . read_variable_size_signed_integer ( ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read Track's TrackOffset attribute: {} " , track_entry . timestamp_offset ( ) ) ;
break ;
2022-11-11 19:09:53 -06:00
case TRACK_VIDEO_ID :
2022-11-11 17:14:27 -06:00
track_entry . set_video_track ( TRY ( parse_video_track_information ( streamer ) ) ) ;
2022-11-11 19:09:53 -06:00
break ;
case TRACK_AUDIO_ID :
2022-11-11 17:14:27 -06:00
track_entry . set_audio_track ( TRY ( parse_audio_track_information ( streamer ) ) ) ;
2022-11-11 19:09:53 -06:00
break ;
default :
TRY_READ ( streamer . read_unknown_element ( ) ) ;
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2022-11-11 19:09:53 -06:00
} ) ) ;
return track_entry ;
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < void > Reader : : parse_tracks ( Streamer & streamer )
2022-11-11 19:09:53 -06:00
{
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " Tracks " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-11 19:09:53 -06:00
if ( element_id = = TRACK_ENTRY_ID ) {
auto track_entry = TRY ( parse_track_entry ( streamer ) ) ;
2022-11-11 17:14:27 -06:00
dbgln_if ( MATROSKA_DEBUG , " Parsed track {} " , track_entry . track_number ( ) ) ;
DECODER_TRY_ALLOC ( m_tracks . try_set ( track_entry . track_number ( ) , track_entry ) ) ;
2022-11-11 19:09:53 -06:00
} else {
TRY_READ ( streamer . read_unknown_element ( ) ) ;
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2023-06-24 03:21:32 -05:00
} ) ) ;
return { } ;
2022-11-11 19:09:53 -06:00
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < void > Reader : : for_each_track ( TrackEntryCallback callback )
{
TRY ( ensure_tracks_are_parsed ( ) ) ;
for ( auto const & track_entry : m_tracks ) {
auto decision = TRY ( callback ( track_entry . value ) ) ;
if ( decision = = IterationDecision : : Break )
break ;
}
return { } ;
}
DecoderErrorOr < void > Reader : : for_each_track_of_type ( TrackEntry : : TrackType type , TrackEntryCallback callback )
{
return for_each_track ( [ & ] ( TrackEntry const & track_entry ) - > DecoderErrorOr < IterationDecision > {
if ( track_entry . track_type ( ) ! = type )
return IterationDecision : : Continue ;
return callback ( track_entry ) ;
} ) ;
}
DecoderErrorOr < TrackEntry > Reader : : track_for_track_number ( u64 track_number )
{
TRY ( ensure_tracks_are_parsed ( ) ) ;
auto optional_track_entry = m_tracks . get ( track_number ) ;
if ( ! optional_track_entry . has_value ( ) )
return DecoderError : : format ( DecoderErrorCategory : : Invalid , " No track found with number {} " , track_number ) ;
return optional_track_entry . release_value ( ) ;
}
DecoderErrorOr < size_t > Reader : : track_count ( )
{
TRY ( ensure_tracks_are_parsed ( ) ) ;
return m_tracks . size ( ) ;
}
constexpr size_t get_element_id_size ( u32 element_id )
{
return sizeof ( element_id ) - ( count_leading_zeroes ( element_id ) / 8 ) ;
}
2022-11-11 19:09:53 -06:00
2022-11-12 02:28:15 -06:00
static DecoderErrorOr < Cluster > parse_cluster ( Streamer & streamer , u64 timestamp_scale )
2021-06-05 16:06:55 -04:00
{
2022-11-11 17:14:27 -06:00
Optional < u64 > timestamp ;
2021-06-05 16:06:55 -04:00
2023-06-24 03:21:32 -05:00
auto first_element_position = TRY ( parse_master_element ( streamer , " Cluster " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-09 23:38:50 -06:00
switch ( element_id ) {
case TIMESTAMP_ID :
2022-11-11 17:14:27 -06:00
timestamp = TRY_READ ( streamer . read_u64 ( ) ) ;
return IterationDecision : : Break ;
2022-11-09 23:38:50 -06:00
default :
2022-11-11 19:09:53 -06:00
TRY_READ ( streamer . read_unknown_element ( ) ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
return IterationDecision : : Continue ;
2022-11-09 23:38:50 -06:00
} ) ) ;
2022-11-11 17:14:27 -06:00
if ( ! timestamp . has_value ( ) )
return DecoderError : : corrupted ( " Cluster was missing a timestamp " sv ) ;
if ( first_element_position = = 0 )
return DecoderError : : corrupted ( " Cluster had no children " sv ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Seeking back to position {} " , first_element_position ) ;
TRY_READ ( streamer . seek_to_position ( first_element_position ) ) ;
Cluster cluster ;
2023-03-13 16:30:34 +01:00
cluster . set_timestamp ( Duration : : from_nanoseconds ( timestamp . release_value ( ) * timestamp_scale ) ) ;
2021-06-05 16:06:55 -04:00
return cluster ;
}
2023-03-13 16:30:34 +01:00
static DecoderErrorOr < Block > parse_simple_block ( Streamer & streamer , Duration cluster_timestamp , u64 segment_timestamp_scale , TrackEntry track )
2021-06-05 16:06:55 -04:00
{
2022-11-11 17:14:27 -06:00
Block block ;
2021-06-05 16:06:55 -04:00
2022-11-11 19:09:53 -06:00
auto content_size = TRY_READ ( streamer . read_variable_size_integer ( ) ) ;
2021-06-05 16:06:55 -04:00
2022-11-11 17:14:27 -06:00
auto position_before_track_number = streamer . position ( ) ;
block . set_track_number ( TRY_READ ( streamer . read_variable_size_integer ( ) ) ) ;
2021-06-05 16:06:55 -04:00
2022-11-12 13:09:07 -06:00
// https://www.matroska.org/technical/notes.html
// Block Timestamps:
// The Block Element and SimpleBlock Element store their timestamps as signed integers,
// relative to the Cluster\Timestamp value of the Cluster they are stored in. To get the
// timestamp of a Block or SimpleBlock in nanoseconds you have to use the following formula:
// `( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale`
//
2022-12-26 08:26:09 -05:00
// When a CodecDelay Element is set, its value MUST be subtracted from each Block timestamp
2022-11-12 13:09:07 -06:00
// of that track. To get the timestamp in nanoseconds of the first frame in a Block or
// SimpleBlock, the formula becomes:
// `( ( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale ) - CodecDelay`
2023-03-13 16:30:34 +01:00
Duration timestamp_offset = Duration : : from_nanoseconds ( static_cast < i64 > ( static_cast < double > ( TRY_READ ( streamer . read_i16 ( ) ) * segment_timestamp_scale ) * track . timestamp_scale ( ) ) ) ;
timestamp_offset - = Duration : : from_nanoseconds ( static_cast < i64 > ( track . codec_delay ( ) ) ) ;
2022-11-12 13:09:07 -06:00
// This is only mentioned in the elements specification under TrackOffset.
// https://www.matroska.org/technical/elements.html
2023-03-13 16:30:34 +01:00
timestamp_offset + = Duration : : from_nanoseconds ( static_cast < i64 > ( track . timestamp_offset ( ) ) ) ;
2022-11-12 13:09:07 -06:00
block . set_timestamp ( cluster_timestamp + timestamp_offset ) ;
2021-06-05 16:06:55 -04:00
2022-11-11 19:09:53 -06:00
auto flags = TRY_READ ( streamer . read_octet ( ) ) ;
2022-11-11 17:14:27 -06:00
block . set_only_keyframes ( ( flags & ( 1u < < 7u ) ) ! = 0 ) ;
block . set_invisible ( ( flags & ( 1u < < 3u ) ) ! = 0 ) ;
block . set_lacing ( static_cast < Block : : Lacing > ( ( flags & 0b110u ) > > 1u ) ) ;
block . set_discardable ( ( flags & 1u ) ! = 0 ) ;
auto total_frame_content_size = content_size - ( streamer . position ( ) - position_before_track_number ) ;
2021-06-05 16:06:55 -04:00
2022-11-11 17:14:27 -06:00
Vector < ReadonlyBytes > frames ;
if ( block . lacing ( ) = = Block : : Lacing : : EBML ) {
2022-11-11 19:09:53 -06:00
auto octets_read_before_frame_sizes = streamer . octets_read ( ) ;
auto frame_count = TRY_READ ( streamer . read_octet ( ) ) + 1 ;
2021-06-05 16:06:55 -04:00
Vector < u64 > frame_sizes ;
frame_sizes . ensure_capacity ( frame_count ) ;
u64 frame_size_sum = 0 ;
u64 previous_frame_size ;
2022-11-11 19:09:53 -06:00
auto first_frame_size = TRY_READ ( streamer . read_variable_size_integer ( ) ) ;
2022-11-09 23:38:50 -06:00
frame_sizes . append ( first_frame_size ) ;
frame_size_sum + = first_frame_size ;
previous_frame_size = first_frame_size ;
2021-06-05 16:06:55 -04:00
for ( int i = 0 ; i < frame_count - 2 ; i + + ) {
2022-11-11 19:09:53 -06:00
auto frame_size_difference = TRY_READ ( streamer . read_variable_size_signed_integer ( ) ) ;
2021-06-05 16:06:55 -04:00
u64 frame_size ;
2022-11-11 17:14:27 -06:00
// FIXME: x - (-y) == x + y?
2022-11-09 23:38:50 -06:00
if ( frame_size_difference < 0 )
frame_size = previous_frame_size - ( - frame_size_difference ) ;
2021-06-05 16:06:55 -04:00
else
2022-11-09 23:38:50 -06:00
frame_size = previous_frame_size + frame_size_difference ;
2021-06-05 16:06:55 -04:00
frame_sizes . append ( frame_size ) ;
frame_size_sum + = frame_size ;
previous_frame_size = frame_size ;
}
2022-11-11 19:09:53 -06:00
frame_sizes . append ( total_frame_content_size - frame_size_sum - ( streamer . octets_read ( ) - octets_read_before_frame_sizes ) ) ;
2021-06-05 16:06:55 -04:00
for ( int i = 0 ; i < frame_count ; i + + ) {
2022-11-09 23:38:50 -06:00
// FIXME: ReadonlyBytes instead of copying the frame data?
2021-06-05 16:06:55 -04:00
auto current_frame_size = frame_sizes . at ( i ) ;
2022-11-11 17:14:27 -06:00
frames . append ( TRY_READ ( streamer . read_raw_octets ( current_frame_size ) ) ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
} else if ( block . lacing ( ) = = Block : : Lacing : : FixedSize ) {
2022-11-11 19:09:53 -06:00
auto frame_count = TRY_READ ( streamer . read_octet ( ) ) + 1 ;
2021-06-05 16:06:55 -04:00
auto individual_frame_size = total_frame_content_size / frame_count ;
2022-11-11 17:14:27 -06:00
for ( int i = 0 ; i < frame_count ; i + + )
frames . append ( TRY_READ ( streamer . read_raw_octets ( individual_frame_size ) ) ) ;
2021-06-05 16:06:55 -04:00
} else {
2022-11-11 17:14:27 -06:00
frames . append ( TRY_READ ( streamer . read_raw_octets ( total_frame_content_size ) ) ) ;
2021-06-05 16:06:55 -04:00
}
2022-11-11 17:14:27 -06:00
block . set_frames ( move ( frames ) ) ;
2021-06-05 16:06:55 -04:00
return block ;
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < SampleIterator > Reader : : create_sample_iterator ( u64 track_number )
{
auto optional_position = TRY ( find_first_top_level_element_with_id ( " Cluster " sv , CLUSTER_ELEMENT_ID ) ) ;
if ( ! optional_position . has_value ( ) )
return DecoderError : : corrupted ( " No clusters are present in the segment " sv ) ;
ReadonlyBytes segment_view = m_data . slice ( m_segment_contents_position , m_segment_contents_size ) ;
// We need to have the element ID included so that the iterator knows where it is.
auto position = optional_position . value ( ) - get_element_id_size ( CLUSTER_ELEMENT_ID ) - m_segment_contents_position ;
dbgln_if ( MATROSKA_DEBUG , " Creating sample iterator starting at {} relative to segment at {} " , position , m_segment_contents_position ) ;
2022-11-12 13:09:07 -06:00
return SampleIterator ( this - > m_mapped_file , segment_view , TRY ( track_for_track_number ( track_number ) ) , TRY ( segment_information ( ) ) . timestamp_scale ( ) , position ) ;
2022-11-11 17:14:27 -06:00
}
2022-11-13 19:28:56 -06:00
static DecoderErrorOr < CueTrackPosition > parse_cue_track_position ( Streamer & streamer )
{
CueTrackPosition track_position ;
bool had_cluster_position = false ;
2023-06-24 03:21:32 -05:00
TRY_READ ( parse_master_element ( streamer , " CueTrackPositions " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-13 19:28:56 -06:00
switch ( element_id ) {
case CUE_TRACK_ID :
track_position . set_track_number ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read CueTrackPositions track number {} " , track_position . track_number ( ) ) ;
break ;
case CUE_CLUSTER_POSITION_ID :
track_position . set_cluster_position ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read CueTrackPositions cluster position {} " , track_position . cluster_position ( ) ) ;
had_cluster_position = true ;
break ;
case CUE_RELATIVE_POSITION_ID :
track_position . set_block_offset ( TRY_READ ( streamer . read_u64 ( ) ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read CueTrackPositions relative position {} " , track_position . block_offset ( ) ) ;
break ;
case CUE_CODEC_STATE_ID :
// Mandatory in spec, but not present in files? 0 means use TrackEntry's codec state.
// FIXME: Do something with this value.
dbgln_if ( MATROSKA_DEBUG , " Found CodecState, skipping " ) ;
TRY_READ ( streamer . read_unknown_element ( ) ) ;
break ;
case CUE_REFERENCE_ID :
return DecoderError : : not_implemented ( ) ;
default :
TRY_READ ( streamer . read_unknown_element ( ) ) ;
break ;
}
return IterationDecision : : Continue ;
} ) ) ;
if ( track_position . track_number ( ) = = 0 )
return DecoderError : : corrupted ( " Track number was not present or 0 " sv ) ;
if ( ! had_cluster_position )
return DecoderError : : corrupted ( " Cluster was missing the cluster position " sv ) ;
return track_position ;
}
static DecoderErrorOr < CuePoint > parse_cue_point ( Streamer & streamer , u64 timestamp_scale )
{
CuePoint cue_point ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " CuePoint " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-13 19:28:56 -06:00
switch ( element_id ) {
case CUE_TIME_ID : {
// On https://www.matroska.org/technical/elements.html, spec says of the CueTime element:
// > Absolute timestamp of the seek point, expressed in Matroska Ticks -- ie in nanoseconds; see timestamp-ticks.
// Matroska Ticks are specified in https://www.matroska.org/technical/notes.html:
// > For such elements, the timestamp value is stored directly in nanoseconds.
// However, my test files appear to use Segment Ticks, which uses the segment's timestamp scale, and Mozilla's nestegg parser agrees:
// https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1941
// https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L2411-L2416
// https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1383-L1392
// Other fields that specify Matroska Ticks may also use Segment Ticks instead, who knows :^(
2023-03-13 16:30:34 +01:00
auto timestamp = Duration : : from_nanoseconds ( static_cast < i64 > ( TRY_READ ( streamer . read_u64 ( ) ) * timestamp_scale ) ) ;
2022-11-13 19:28:56 -06:00
cue_point . set_timestamp ( timestamp ) ;
dbgln_if ( MATROSKA_DEBUG , " Read CuePoint timestamp {}ms " , cue_point . timestamp ( ) . to_milliseconds ( ) ) ;
break ;
}
case CUE_TRACK_POSITIONS_ID : {
auto track_position = TRY_READ ( parse_cue_track_position ( streamer ) ) ;
DECODER_TRY_ALLOC ( cue_point . track_positions ( ) . try_set ( track_position . track_number ( ) , track_position ) ) ;
break ;
}
default :
TRY_READ ( streamer . read_unknown_element ( ) ) ;
break ;
}
return IterationDecision : : Continue ;
} ) ) ;
if ( cue_point . timestamp ( ) . is_negative ( ) )
return DecoderError : : corrupted ( " CuePoint was missing a timestamp " sv ) ;
if ( cue_point . track_positions ( ) . is_empty ( ) )
return DecoderError : : corrupted ( " CuePoint was missing track positions " sv ) ;
return cue_point ;
}
DecoderErrorOr < void > Reader : : parse_cues ( Streamer & streamer )
{
m_cues . clear ( ) ;
2023-06-24 03:21:32 -05:00
TRY ( parse_master_element ( streamer , " Cues " sv , [ & ] ( u64 element_id ) - > DecoderErrorOr < IterationDecision > {
2022-11-13 19:28:56 -06:00
switch ( element_id ) {
case CUE_POINT_ID : {
auto cue_point = TRY ( parse_cue_point ( streamer , TRY ( segment_information ( ) ) . timestamp_scale ( ) ) ) ;
// FIXME: Verify that these are already in order of timestamp. If they are not, return a corrupted error for now,
// but if it turns out that Matroska files with out-of-order cue points are valid, sort them instead.
for ( auto track_position_entry : cue_point . track_positions ( ) ) {
if ( ! m_cues . contains ( track_position_entry . key ) )
DECODER_TRY_ALLOC ( m_cues . try_set ( track_position_entry . key , Vector < CuePoint > ( ) ) ) ;
Vector < CuePoint > & cue_points_for_track = m_cues . get ( track_position_entry . key ) . release_value ( ) ;
cue_points_for_track . append ( cue_point ) ;
}
break ;
}
default :
return DecoderError : : format ( DecoderErrorCategory : : Corrupted , " Unknown Cues child ID {:#010x} " , element_id ) ;
}
return IterationDecision : : Continue ;
} ) ) ;
return { } ;
}
DecoderErrorOr < void > Reader : : ensure_cues_are_parsed ( )
{
if ( m_cues_have_been_parsed )
return { } ;
auto position = TRY ( find_first_top_level_element_with_id ( " Cues " sv , CUES_ID ) ) ;
if ( ! position . has_value ( ) )
return DecoderError : : corrupted ( " No Tracks element found " sv ) ;
Streamer streamer { m_data } ;
TRY_READ ( streamer . seek_to_position ( position . release_value ( ) ) ) ;
TRY ( parse_cues ( streamer ) ) ;
m_cues_have_been_parsed = true ;
return { } ;
}
2023-03-13 16:30:34 +01:00
DecoderErrorOr < void > Reader : : seek_to_cue_for_timestamp ( SampleIterator & iterator , Duration const & timestamp )
2022-11-13 19:28:56 -06:00
{
auto const & cue_points = MUST ( cue_points_for_track ( iterator . m_track . track_number ( ) ) ) . release_value ( ) ;
// Take a guess at where in the cues the timestamp will be and correct from there.
auto duration = TRY ( segment_information ( ) ) . duration ( ) ;
size_t index = 0 ;
if ( duration . has_value ( ) )
index = clamp ( ( ( timestamp . to_nanoseconds ( ) * cue_points . size ( ) ) / TRY ( segment_information ( ) ) . duration ( ) - > to_nanoseconds ( ) ) , 0 , cue_points . size ( ) - 1 ) ;
CuePoint const * prev_cue_point = & cue_points [ index ] ;
2023-02-05 17:45:56 -06:00
dbgln_if ( MATROSKA_DEBUG , " Finding Matroska cue points for timestamp {}ms starting from cue at {}ms " , timestamp . to_milliseconds ( ) , prev_cue_point - > timestamp ( ) . to_milliseconds ( ) ) ;
2022-11-13 19:28:56 -06:00
if ( prev_cue_point - > timestamp ( ) = = timestamp ) {
TRY ( iterator . seek_to_cue_point ( * prev_cue_point ) ) ;
return { } ;
}
if ( prev_cue_point - > timestamp ( ) > timestamp ) {
2023-02-05 17:45:56 -06:00
while ( index > 0 & & prev_cue_point - > timestamp ( ) > timestamp ) {
2022-11-13 19:28:56 -06:00
prev_cue_point = & cue_points [ - - index ] ;
2023-02-05 17:45:56 -06:00
dbgln_if ( MATROSKA_DEBUG , " Checking previous cue point {}ms " , prev_cue_point - > timestamp ( ) . to_milliseconds ( ) ) ;
}
2022-11-13 19:28:56 -06:00
TRY ( iterator . seek_to_cue_point ( * prev_cue_point ) ) ;
return { } ;
}
2023-02-11 20:08:09 -06:00
while ( + + index < cue_points . size ( ) ) {
auto const & cue_point = cue_points [ index ] ;
2023-02-05 17:45:56 -06:00
dbgln_if ( MATROSKA_DEBUG , " Checking future cue point {}ms " , cue_point . timestamp ( ) . to_milliseconds ( ) ) ;
2022-11-13 19:28:56 -06:00
if ( cue_point . timestamp ( ) > timestamp )
break ;
prev_cue_point = & cue_point ;
}
TRY ( iterator . seek_to_cue_point ( * prev_cue_point ) ) ;
return { } ;
}
2023-03-13 16:30:34 +01:00
static DecoderErrorOr < void > search_clusters_for_keyframe_before_timestamp ( SampleIterator & iterator , Duration const & timestamp )
2022-11-12 04:04:13 -06:00
{
# if MATROSKA_DEBUG
size_t inter_frames_count ;
# endif
Optional < SampleIterator > last_keyframe ;
while ( true ) {
SampleIterator rewind_iterator = iterator ;
auto block = TRY ( iterator . next_block ( ) ) ;
if ( block . only_keyframes ( ) ) {
last_keyframe . emplace ( rewind_iterator ) ;
# if MATROSKA_DEBUG
inter_frames_count = 0 ;
# endif
}
if ( block . timestamp ( ) > timestamp )
break ;
# if MATROSKA_DEBUG
inter_frames_count + + ;
# endif
}
if ( last_keyframe . has_value ( ) ) {
# if MATROSKA_DEBUG
dbgln ( " Seeked to a keyframe with {} inter frames to skip " , inter_frames_count ) ;
# endif
iterator = last_keyframe . release_value ( ) ;
}
2023-02-06 01:25:02 -06:00
return { } ;
2022-11-12 04:04:13 -06:00
}
2022-11-13 19:28:56 -06:00
DecoderErrorOr < bool > Reader : : has_cues_for_track ( u64 track_number )
{
TRY ( ensure_cues_are_parsed ( ) ) ;
return m_cues . contains ( track_number ) ;
}
2023-03-13 16:30:34 +01:00
DecoderErrorOr < SampleIterator > Reader : : seek_to_random_access_point ( SampleIterator iterator , Duration timestamp )
2022-11-11 17:14:27 -06:00
{
2022-11-13 19:28:56 -06:00
if ( TRY ( has_cues_for_track ( iterator . m_track . track_number ( ) ) ) ) {
2023-02-06 01:25:02 -06:00
TRY ( seek_to_cue_for_timestamp ( iterator , timestamp ) ) ;
2023-04-12 04:36:58 -05:00
VERIFY ( iterator . last_timestamp ( ) . has_value ( ) ) ;
2023-02-06 01:25:02 -06:00
return iterator ;
2022-11-13 19:28:56 -06:00
}
2022-11-12 04:04:13 -06:00
2023-02-06 01:25:02 -06:00
if ( ! iterator . last_timestamp ( ) . has_value ( ) | | timestamp < iterator . last_timestamp ( ) . value ( ) ) {
2022-11-12 04:04:13 -06:00
// If the timestamp is before the iterator's current position, then we need to start from the beginning of the Segment.
2022-11-12 13:09:07 -06:00
iterator = TRY ( create_sample_iterator ( iterator . m_track . track_number ( ) ) ) ;
2023-02-06 01:25:02 -06:00
TRY ( search_clusters_for_keyframe_before_timestamp ( iterator , timestamp ) ) ;
return iterator ;
2022-11-12 04:04:13 -06:00
}
2023-02-06 01:25:02 -06:00
TRY ( search_clusters_for_keyframe_before_timestamp ( iterator , timestamp ) ) ;
return iterator ;
2022-11-11 17:14:27 -06:00
}
2022-11-13 19:28:56 -06:00
DecoderErrorOr < Optional < Vector < CuePoint > const & > > Reader : : cue_points_for_track ( u64 track_number )
{
TRY ( ensure_cues_are_parsed ( ) ) ;
return m_cues . get ( track_number ) ;
}
2022-11-11 17:14:27 -06:00
DecoderErrorOr < Block > SampleIterator : : next_block ( )
{
if ( m_position > = m_data . size ( ) )
return DecoderError : : with_description ( DecoderErrorCategory : : EndOfStream , " Still at end of stream :^) " sv ) ;
Streamer streamer { m_data } ;
TRY_READ ( streamer . seek_to_position ( m_position ) ) ;
Optional < Block > block ;
while ( streamer . has_octet ( ) ) {
# if MATROSKA_TRACE_DEBUG
auto element_position = streamer . position ( ) ;
# endif
auto element_id = TRY_READ ( streamer . read_variable_size_integer ( false ) ) ;
# if MATROSKA_TRACE_DEBUG
dbgln ( " Iterator found element with ID {:#010x} at offset {} within the segment. " , element_id , element_position ) ;
# endif
if ( element_id = = CLUSTER_ELEMENT_ID ) {
dbgln_if ( MATROSKA_DEBUG , " Iterator is parsing new cluster. " ) ;
2022-11-12 13:09:07 -06:00
m_current_cluster = TRY ( parse_cluster ( streamer , m_segment_timestamp_scale ) ) ;
2022-11-11 17:14:27 -06:00
} else if ( element_id = = SIMPLE_BLOCK_ID ) {
dbgln_if ( MATROSKA_TRACE_DEBUG , " Iterator is parsing new block. " ) ;
2022-11-12 13:09:07 -06:00
auto candidate_block = TRY ( parse_simple_block ( streamer , m_current_cluster - > timestamp ( ) , m_segment_timestamp_scale , m_track ) ) ;
if ( candidate_block . track_number ( ) = = m_track . track_number ( ) )
2022-11-11 17:14:27 -06:00
block = move ( candidate_block ) ;
} else {
dbgln_if ( MATROSKA_TRACE_DEBUG , " Iterator is skipping unknown element with ID {:#010x}. " , element_id ) ;
TRY_READ ( streamer . read_unknown_element ( ) ) ;
}
m_position = streamer . position ( ) ;
2022-11-12 04:04:13 -06:00
if ( block . has_value ( ) ) {
m_last_timestamp = block - > timestamp ( ) ;
2022-11-11 17:14:27 -06:00
return block . release_value ( ) ;
2022-11-12 04:04:13 -06:00
}
2022-11-11 17:14:27 -06:00
}
m_current_cluster . clear ( ) ;
return DecoderError : : with_description ( DecoderErrorCategory : : EndOfStream , " End of stream " sv ) ;
}
2022-11-13 19:28:56 -06:00
DecoderErrorOr < void > SampleIterator : : seek_to_cue_point ( CuePoint const & cue_point )
{
// This is a private function. The position getter can return optional, but the caller should already know that this track has a position.
auto const & cue_position = cue_point . position_for_track ( m_track . track_number ( ) ) . release_value ( ) ;
Streamer streamer { m_data } ;
TRY_READ ( streamer . seek_to_position ( cue_position . cluster_position ( ) ) ) ;
auto element_id = TRY_READ ( streamer . read_variable_size_integer ( false ) ) ;
if ( element_id ! = CLUSTER_ELEMENT_ID )
return DecoderError : : corrupted ( " Cue point's cluster position didn't point to a cluster " sv ) ;
m_current_cluster = TRY ( parse_cluster ( streamer , m_segment_timestamp_scale ) ) ;
dbgln_if ( MATROSKA_DEBUG , " SampleIterator set to cue point at timestamp {}ms " , m_current_cluster - > timestamp ( ) . to_milliseconds ( ) ) ;
m_position = streamer . position ( ) + cue_position . block_offset ( ) ;
m_last_timestamp = cue_point . timestamp ( ) ;
return { } ;
}
2023-12-16 17:49:34 +03:30
ErrorOr < ByteString > Streamer : : read_string ( )
2021-06-05 16:06:55 -04:00
{
2022-11-09 23:38:50 -06:00
auto string_length = TRY ( read_variable_size_integer ( ) ) ;
if ( remaining ( ) < string_length )
return Error : : from_string_literal ( " String length extends past the end of the stream " ) ;
2023-11-27 10:12:47 +01:00
auto string_data = data_as_chars ( ) ;
2023-12-16 17:49:34 +03:30
auto string_value = ByteString ( string_data , strnlen ( string_data , string_length ) ) ;
2022-11-11 17:14:27 -06:00
TRY ( read_raw_octets ( string_length ) ) ;
2021-06-05 16:06:55 -04:00
return string_value ;
}
2022-11-11 19:09:53 -06:00
ErrorOr < u8 > Streamer : : read_octet ( )
2021-06-05 16:06:55 -04:00
{
2022-11-09 23:38:50 -06:00
if ( ! has_octet ( ) ) {
dbgln_if ( MATROSKA_TRACE_DEBUG , " Ran out of stream data " ) ;
return Error : : from_string_literal ( " Stream is out of data " ) ;
}
2022-11-10 16:58:53 -06:00
u8 byte = * data ( ) ;
2022-11-09 23:38:50 -06:00
m_octets_read . last ( ) + + ;
2022-11-10 16:58:53 -06:00
m_position + + ;
return byte ;
2022-11-09 23:38:50 -06:00
}
2022-11-11 19:09:53 -06:00
ErrorOr < i16 > Streamer : : read_i16 ( )
2022-11-09 23:38:50 -06:00
{
return ( TRY ( read_octet ( ) ) < < 8 ) | TRY ( read_octet ( ) ) ;
}
2022-11-11 19:09:53 -06:00
ErrorOr < u64 > Streamer : : read_variable_size_integer ( bool mask_length )
2022-11-09 23:38:50 -06:00
{
2023-06-24 03:21:32 -05:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Reading VINT from offset {:p} " , position ( ) ) ;
2022-11-09 23:38:50 -06:00
auto length_descriptor = TRY ( read_octet ( ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Reading VINT, first byte is {:#02x} " , length_descriptor ) ;
if ( length_descriptor = = 0 )
return Error : : from_string_literal ( " read_variable_size_integer: Length descriptor has no terminating set bit " ) ;
size_t length = 0 ;
while ( length < 8 ) {
if ( ( ( length_descriptor > > ( 8 - length ) ) & 1 ) = = 1 )
break ;
length + + ;
}
dbgln_if ( MATROSKA_TRACE_DEBUG , " Reading VINT of total length {} " , length ) ;
if ( length > 8 )
return Error : : from_string_literal ( " read_variable_size_integer: Length is too large " ) ;
u64 result ;
if ( mask_length )
result = length_descriptor & ~ ( 1u < < ( 8 - length ) ) ;
else
result = length_descriptor ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Beginning of VINT is {:#02x} " , result ) ;
for ( size_t i = 1 ; i < length ; i + + ) {
u8 next_octet = TRY ( read_octet ( ) ) ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " Read octet of {:#02x} " , next_octet ) ;
result = ( result < < 8u ) | next_octet ;
dbgln_if ( MATROSKA_TRACE_DEBUG , " New result is {:#010x} " , result ) ;
}
return result ;
}
2022-11-11 19:09:53 -06:00
ErrorOr < i64 > Streamer : : read_variable_size_signed_integer ( )
2022-11-09 23:38:50 -06:00
{
auto length_descriptor = TRY ( read_octet ( ) ) ;
if ( length_descriptor = = 0 )
return Error : : from_string_literal ( " read_variable_sized_signed_integer: Length descriptor has no terminating set bit " ) ;
i64 length = 0 ;
while ( length < 8 ) {
if ( ( ( length_descriptor > > ( 8 - length ) ) & 1 ) = = 1 )
break ;
length + + ;
}
if ( length > 8 )
return Error : : from_string_literal ( " read_variable_size_integer: Length is too large " ) ;
i64 result = length_descriptor & ~ ( 1u < < ( 8 - length ) ) ;
for ( i64 i = 1 ; i < length ; i + + ) {
u8 next_octet = TRY ( read_octet ( ) ) ;
result = ( result < < 8u ) | next_octet ;
}
result - = AK : : exp2 < i64 > ( length * 7 - 1 ) - 1 ;
return result ;
}
2022-11-11 17:14:27 -06:00
ErrorOr < ReadonlyBytes > Streamer : : read_raw_octets ( size_t num_octets )
2022-11-09 23:38:50 -06:00
{
if ( remaining ( ) < num_octets )
return Error : : from_string_literal ( " Tried to drop octets past the end of the stream " ) ;
2022-11-11 17:14:27 -06:00
ReadonlyBytes result = { data ( ) , num_octets } ;
2022-11-10 16:58:53 -06:00
m_position + = num_octets ;
2022-11-09 23:38:50 -06:00
m_octets_read . last ( ) + = num_octets ;
2022-11-11 17:14:27 -06:00
return result ;
2022-11-09 23:38:50 -06:00
}
2022-11-11 19:09:53 -06:00
ErrorOr < u64 > Streamer : : read_u64 ( )
2022-11-09 23:38:50 -06:00
{
auto integer_length = TRY ( read_variable_size_integer ( ) ) ;
2021-06-05 16:06:55 -04:00
u64 result = 0 ;
2022-11-09 23:38:50 -06:00
for ( size_t i = 0 ; i < integer_length ; i + + ) {
result = ( result < < 8u ) + TRY ( read_octet ( ) ) ;
2021-06-05 16:06:55 -04:00
}
return result ;
}
2022-11-11 19:09:53 -06:00
ErrorOr < double > Streamer : : read_float ( )
2022-10-29 17:01:01 -05:00
{
2022-11-09 23:38:50 -06:00
auto length = TRY ( read_variable_size_integer ( ) ) ;
2022-10-29 17:01:01 -05:00
if ( length ! = 4u & & length ! = 8u )
2022-11-09 23:38:50 -06:00
return Error : : from_string_literal ( " Float size must be 4 or 8 bytes " ) ;
2022-10-29 17:01:01 -05:00
union {
u64 value ;
float float_value ;
double double_value ;
} read_data ;
read_data . value = 0 ;
2022-11-09 23:38:50 -06:00
for ( size_t i = 0 ; i < length ; i + + ) {
read_data . value = ( read_data . value < < 8u ) + TRY ( read_octet ( ) ) ;
2022-10-29 17:01:01 -05:00
}
if ( length = = 4u )
return read_data . float_value ;
return read_data . double_value ;
}
2022-11-11 19:09:53 -06:00
ErrorOr < void > Streamer : : read_unknown_element ( )
2021-06-05 16:06:55 -04:00
{
2022-11-09 23:38:50 -06:00
auto element_length = TRY ( read_variable_size_integer ( ) ) ;
2022-11-11 17:14:27 -06:00
dbgln_if ( MATROSKA_TRACE_DEBUG , " Skipping unknown element of size {}. " , element_length ) ;
TRY ( read_raw_octets ( element_length ) ) ;
return { } ;
}
ErrorOr < void > Streamer : : seek_to_position ( size_t position )
{
if ( position > = m_data . size ( ) )
return Error : : from_string_literal ( " Attempted to seek past the end of the stream " ) ;
m_position = position ;
return { } ;
2021-06-05 16:06:55 -04:00
}
}