Full Theora video support in VideoStreamPlayer

- Implement `set_stream_position` and `get_stream_length`.
- Don't show blank frame when stopping the video (smooth loops).
- Fix audio for videos with up to 8 channels.
- Improve internal audio handling.
This commit is contained in:
Bernat Arlandis 2025-01-28 17:39:46 +01:00
parent 4a44078451
commit b9bebf7081
6 changed files with 541 additions and 218 deletions

View file

@ -16,7 +16,6 @@
<return type="float" />
<description>
The length of the current stream, in seconds.
[b]Note:[/b] For [VideoStreamTheora] streams (the built-in format supported by Godot), this value will always be zero, as getting the stream length is not implemented yet. The feature may be supported by video formats implemented by a GDExtension add-on.
</description>
</method>
<method name="get_stream_name" qualifiers="const">
@ -79,7 +78,6 @@
</member>
<member name="stream_position" type="float" setter="set_stream_position" getter="get_stream_position">
The current position of the stream, in seconds.
[b]Note:[/b] Changing this value won't have any effect as seeking is not implemented yet, except in video formats implemented by a GDExtension add-on.
</member>
<member name="volume" type="float" setter="set_volume" getter="get_volume">
Audio volume as a linear value.

View file

@ -41,17 +41,15 @@ int VideoStreamPlaybackTheora::buffer_data() {
uint64_t bytes = file->get_buffer((uint8_t *)buffer, 4096);
ogg_sync_wrote(&oy, bytes);
return (bytes);
return bytes;
}
int VideoStreamPlaybackTheora::queue_page(ogg_page *page) {
if (theora_p) {
ogg_stream_pagein(&to, page);
if (to.e_o_s) {
theora_eos = true;
}
ogg_stream_pagein(&to, page);
if (to.e_o_s) {
theora_eos = true;
}
if (vorbis_p) {
if (has_audio) {
ogg_stream_pagein(&vo, page);
if (vo.e_o_s) {
vorbis_eos = true;
@ -60,6 +58,179 @@ int VideoStreamPlaybackTheora::queue_page(ogg_page *page) {
return 0;
}
int VideoStreamPlaybackTheora::read_page(ogg_page *page) {
int ret = 0;
while (ret <= 0) {
ret = ogg_sync_pageout(&oy, page);
if (ret <= 0) {
int bytes = buffer_data();
if (bytes == 0) {
return 0;
}
}
}
return ret;
}
double VideoStreamPlaybackTheora::get_page_time(ogg_page *page) {
uint64_t granulepos = ogg_page_granulepos(page);
int page_serialno = ogg_page_serialno(page);
double page_time = -1;
if (page_serialno == to.serialno) {
page_time = th_granule_time(td, granulepos);
}
if (has_audio && page_serialno == vo.serialno) {
page_time = vorbis_granule_time(&vd, granulepos);
}
return page_time;
}
// Read one buffer worth of pages and feed them to the streams.
int VideoStreamPlaybackTheora::feed_pages() {
int pages = 0;
ogg_page og;
while (pages == 0) {
while (ogg_sync_pageout(&oy, &og) > 0) {
queue_page(&og);
pages++;
}
if (pages == 0) {
int bytes = buffer_data();
if (bytes == 0) {
break;
}
}
}
return pages;
}
// Seek the video and audio streams simultaneously to find the granulepos where we should start decoding.
// It will return the position where we should start reading pages, and the video and audio granulepos.
int64_t VideoStreamPlaybackTheora::seek_streams(double p_time, int64_t &cur_video_granulepos, int64_t &cur_audio_granulepos) {
// Backtracking less than this is probably a waste of time.
const int64_t min_seek = 512 * 1024;
int64_t target_video_granulepos;
int64_t target_audio_granulepos;
double target_time = 0;
int64_t seek_pos;
// Make a guess where we should start reading in the file, and scan from there.
// We base the guess on the mean bitrate of the streams. It would be theoretically faster to use the bisect method but
// in practice there's a lot of linear scanning to do to find the right pages.
// We want to catch the previous keyframe to the seek time. Since we only know the max GOP, we use that.
if (p_time == -1) { // This is a special case to find the last packets and calculate the video length.
seek_pos = MAX(stream_data_size - min_seek, stream_data_offset);
target_video_granulepos = INT64_MAX;
target_audio_granulepos = INT64_MAX;
} else {
int64_t video_frame = (int64_t)(p_time / frame_duration);
target_video_granulepos = MAX(1LL, video_frame - (1LL << ti.keyframe_granule_shift)) << ti.keyframe_granule_shift;
target_audio_granulepos = 0;
seek_pos = MAX(((target_video_granulepos >> ti.keyframe_granule_shift) - 1) * frame_duration * stream_data_size / stream_length, stream_data_offset);
target_time = th_granule_time(td, target_video_granulepos);
if (has_audio) {
target_audio_granulepos = video_frame * frame_duration * vi.rate;
target_time = MIN(target_time, vorbis_granule_time(&vd, target_audio_granulepos));
}
}
int64_t video_seek_pos = seek_pos;
int64_t audio_seek_pos = seek_pos;
double backtrack_time = 0;
bool video_catch = false;
bool audio_catch = false;
int64_t last_video_granule_seek_pos = seek_pos;
int64_t last_audio_granule_seek_pos = seek_pos;
cur_video_granulepos = -1;
cur_audio_granulepos = -1;
while (!video_catch || (has_audio && !audio_catch)) { // Backtracking loop
if (seek_pos < stream_data_offset) {
seek_pos = stream_data_offset;
}
file->seek(seek_pos);
ogg_sync_reset(&oy);
backtrack_time = 0;
last_video_granule_seek_pos = seek_pos;
last_audio_granule_seek_pos = seek_pos;
while (!video_catch || (has_audio && !audio_catch)) { // Page scanning loop
ogg_page page;
uint64_t last_seek_pos = file->get_position() - oy.fill + oy.returned;
int ret = read_page(&page);
if (ret <= 0) { // End of file.
if (seek_pos < stream_data_offset) { // We've already searched the whole file
return -1;
}
seek_pos -= min_seek;
break;
}
int64_t cur_granulepos = ogg_page_granulepos(&page);
if (cur_granulepos >= 0) {
int page_serialno = ogg_page_serialno(&page);
if (!video_catch && page_serialno == to.serialno) {
if (cur_granulepos >= target_video_granulepos) {
video_catch = true;
if (cur_video_granulepos < 0) {
// Adding 1s helps catching the start of the page and avoids backtrack_time = 0.
backtrack_time = MAX(backtrack_time, 1 + th_granule_time(td, cur_granulepos) - target_time);
}
} else {
video_seek_pos = last_video_granule_seek_pos;
cur_video_granulepos = cur_granulepos;
}
last_video_granule_seek_pos = last_seek_pos;
}
if ((has_audio && !audio_catch) && page_serialno == vo.serialno) {
if (cur_granulepos >= target_audio_granulepos) {
audio_catch = true;
if (cur_audio_granulepos < 0) {
// Adding 1s helps catching the start of the page and avoids backtrack_time = 0.
backtrack_time = MAX(backtrack_time, 1 + vorbis_granule_time(&vd, cur_granulepos) - target_time);
}
} else {
audio_seek_pos = last_audio_granule_seek_pos;
cur_audio_granulepos = cur_granulepos;
}
last_audio_granule_seek_pos = last_seek_pos;
}
}
}
if (backtrack_time > 0) {
if (seek_pos <= stream_data_offset) {
break;
}
int64_t delta_seek = MAX(backtrack_time * stream_data_size / stream_length, min_seek);
seek_pos -= delta_seek;
}
video_catch = cur_video_granulepos != -1;
audio_catch = cur_audio_granulepos != -1;
}
if (cur_video_granulepos < (1LL << ti.keyframe_granule_shift)) {
video_seek_pos = stream_data_offset;
cur_video_granulepos = 1LL << ti.keyframe_granule_shift;
}
if (has_audio) {
if (cur_audio_granulepos == -1) {
audio_seek_pos = stream_data_offset;
cur_audio_granulepos = 0;
}
seek_pos = MIN(video_seek_pos, audio_seek_pos);
} else {
seek_pos = video_seek_pos;
}
return seek_pos;
}
void VideoStreamPlaybackTheora::video_write(th_ycbcr_buffer yuv) {
uint8_t *w = frame_data.ptrw();
char *dst = (char *)w;
@ -77,83 +248,53 @@ void VideoStreamPlaybackTheora::video_write(th_ycbcr_buffer yuv) {
Ref<Image> img;
img.instantiate(region.size.x, region.size.y, false, Image::FORMAT_RGBA8, frame_data); //zero copy image creation
texture->update(img); //zero copy send to rendering server
texture->update(img); // Zero-copy send to rendering server.
}
void VideoStreamPlaybackTheora::clear() {
if (file.is_null()) {
return;
if (!file.is_null()) {
file.unref();
}
if (vorbis_p) {
ogg_stream_clear(&vo);
if (vorbis_p >= 3) {
vorbis_block_clear(&vb);
vorbis_dsp_clear(&vd);
}
if (has_audio) {
vorbis_block_clear(&vb);
vorbis_dsp_clear(&vd);
vorbis_comment_clear(&vc);
vorbis_info_clear(&vi);
vorbis_p = 0;
ogg_stream_clear(&vo);
if (audio_buffer_size) {
memdelete_arr(audio_buffer);
}
}
if (theora_p) {
ogg_stream_clear(&to);
if (has_video) {
th_decode_free(td);
th_comment_clear(&tc);
th_info_clear(&ti);
theora_p = 0;
ogg_stream_clear(&to);
ogg_sync_clear(&oy);
}
ogg_sync_clear(&oy);
theora_p = 0;
vorbis_p = 0;
next_frame_time = 0;
current_frame_time = 0;
audio_buffer = nullptr;
playing = false;
has_video = false;
has_audio = false;
theora_eos = false;
vorbis_eos = false;
video_ready = false;
video_done = false;
audio_done = false;
file.unref();
playing = false;
}
void VideoStreamPlaybackTheora::set_file(const String &p_file) {
ERR_FAIL_COND(playing);
void VideoStreamPlaybackTheora::find_streams(th_setup_info *&ts) {
ogg_stream_state test;
ogg_packet op;
th_setup_info *ts = nullptr;
file_name = p_file;
file = FileAccess::open(p_file, FileAccess::READ);
ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'.");
ogg_sync_init(&oy);
/* init supporting Vorbis structures needed in header parsing */
vorbis_info_init(&vi);
vorbis_comment_init(&vc);
/* init supporting Theora structures needed in header parsing */
th_comment_init(&tc);
th_info_init(&ti);
theora_eos = false;
vorbis_eos = false;
/* Ogg file open; parse the headers */
/* Only interested in Vorbis/Theora streams */
ogg_page og;
int stateflag = 0;
int audio_track_skip = audio_track;
/* Only interested in Vorbis/Theora streams */
while (!stateflag) {
int ret = buffer_data();
if (ret == 0) {
if (!ret) {
break;
}
while (ogg_sync_pageout(&oy, &og) > 0) {
ogg_stream_state test;
/* is this a mandated initial header? If not, stop parsing */
if (!ogg_page_bos(&og)) {
/* don't leak the page; get it into the appropriate stream */
@ -167,11 +308,11 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) {
ogg_stream_packetout(&test, &op);
/* identify the codec: try theora */
if (!theora_p && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) {
if (!has_video && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) {
/* it is theora */
memcpy(&to, &test, sizeof(test));
theora_p = 1;
} else if (!vorbis_p && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) {
has_video = true;
} else if (!has_audio && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) {
/* it is vorbis */
if (audio_track_skip) {
vorbis_info_clear(&vi);
@ -179,141 +320,165 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) {
ogg_stream_clear(&test);
vorbis_info_init(&vi);
vorbis_comment_init(&vc);
audio_track_skip--;
} else {
memcpy(&vo, &test, sizeof(test));
vorbis_p = 1;
has_audio = true;
}
} else {
/* whatever it is, we don't care about it */
ogg_stream_clear(&test);
}
}
/* fall through to non-bos page parsing */
}
}
void VideoStreamPlaybackTheora::read_headers(th_setup_info *&ts) {
ogg_packet op;
int theora_header_packets = 1;
int vorbis_header_packets = 1;
/* we're expecting more header packets. */
while ((theora_p && theora_p < 3) || (vorbis_p && vorbis_p < 3)) {
int ret = 0;
while (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) {
/* look for further theora headers */
if (theora_p && theora_p < 3) {
ret = ogg_stream_packetout(&to, &op);
}
while (theora_p && theora_p < 3 && ret) {
if (ret < 0) {
fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n");
clear();
return;
// The API says there can be more than three but only three are mandatory.
while (theora_header_packets < 3 && ogg_stream_packetout(&to, &op) > 0) {
if (th_decode_headerin(&ti, &tc, &ts, &op) > 0) {
theora_header_packets++;
}
if (!th_decode_headerin(&ti, &tc, &ts, &op)) {
fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n");
clear();
return;
}
ret = ogg_stream_packetout(&to, &op);
theora_p++;
}
/* look for more vorbis header packets */
if (vorbis_p && vorbis_p < 3) {
ret = ogg_stream_packetout(&vo, &op);
while (has_audio && vorbis_header_packets < 3 && ogg_stream_packetout(&vo, &op) > 0) {
if (!vorbis_synthesis_headerin(&vi, &vc, &op)) {
vorbis_header_packets++;
}
}
while (vorbis_p && vorbis_p < 3 && ret) {
if (ret < 0) {
fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n");
clear();
return;
}
ret = vorbis_synthesis_headerin(&vi, &vc, &op);
if (ret) {
fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n");
clear();
return;
}
vorbis_p++;
if (vorbis_p == 3) {
/* The header pages/packets will arrive before anything else we care about, or the stream is not obeying spec */
if (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) {
ogg_page page;
if (read_page(&page)) {
queue_page(&page);
} else {
fprintf(stderr, "End of file while searching for codec headers.\n");
break;
}
ret = ogg_stream_packetout(&vo, &op);
}
}
/* The header pages/packets will arrive before anything else we
care about, or the stream is not obeying spec */
has_video = theora_header_packets == 3;
has_audio = vorbis_header_packets == 3;
}
if (ogg_sync_pageout(&oy, &og) > 0) {
queue_page(&og); /* demux into the appropriate stream */
} else {
int ret2 = buffer_data(); /* someone needs more data */
if (ret2 == 0) {
fprintf(stderr, "End of file while searching for codec headers.\n");
clear();
return;
}
void VideoStreamPlaybackTheora::set_file(const String &p_file) {
ERR_FAIL_COND(playing);
th_setup_info *ts = nullptr;
clear();
file = FileAccess::open(p_file, FileAccess::READ);
ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'.");
file_name = p_file;
ogg_sync_init(&oy);
/* init supporting Vorbis structures needed in header parsing */
vorbis_info_init(&vi);
vorbis_comment_init(&vc);
/* init supporting Theora structures needed in header parsing */
th_comment_init(&tc);
th_info_init(&ti);
/* Zero stream state structs so they can be checked later. */
memset(&to, 0, sizeof(to));
memset(&vo, 0, sizeof(vo));
/* Ogg file open; parse the headers */
find_streams(ts);
read_headers(ts);
if (!has_audio) {
vorbis_comment_clear(&vc);
vorbis_info_clear(&vi);
if (!ogg_stream_check(&vo)) {
ogg_stream_clear(&vo);
}
}
// One video stream is mandatory.
if (!has_video) {
th_setup_free(ts);
th_comment_clear(&tc);
th_info_clear(&ti);
if (!ogg_stream_check(&to)) {
ogg_stream_clear(&to);
}
file.unref();
return;
}
/* And now we have it all. Initialize decoders. */
if (theora_p) {
td = th_decode_alloc(&ti, ts);
px_fmt = ti.pixel_fmt;
switch (ti.pixel_fmt) {
case TH_PF_420:
//printf(" 4:2:0 video\n");
break;
case TH_PF_422:
//printf(" 4:2:2 video\n");
break;
case TH_PF_444:
//printf(" 4:4:4 video\n");
break;
case TH_PF_RSVD:
default:
printf(" video\n (UNKNOWN Chroma sampling!)\n");
break;
}
th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max,
sizeof(pp_level_max));
pp_level = 0;
th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level));
pp_inc = 0;
size.x = ti.frame_width;
size.y = ti.frame_height;
region.position.x = ti.pic_x;
region.position.y = ti.pic_y;
region.size.x = ti.pic_width;
region.size.y = ti.pic_height;
Ref<Image> img = Image::create_empty(region.size.x, region.size.y, false, Image::FORMAT_RGBA8);
texture->set_image(img);
frame_data.resize(region.size.x * region.size.y * 4);
frame_duration = (double)ti.fps_denominator / ti.fps_numerator;
} else {
/* tear down the partial theora setup */
th_info_clear(&ti);
th_comment_clear(&tc);
}
td = th_decode_alloc(&ti, ts);
th_setup_free(ts);
px_fmt = ti.pixel_fmt;
switch (ti.pixel_fmt) {
case TH_PF_420:
case TH_PF_422:
case TH_PF_444:
break;
default:
WARN_PRINT(" video\n (UNKNOWN Chroma sampling!)\n");
break;
}
th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max, sizeof(pp_level_max));
pp_level = 0;
th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level));
pp_inc = 0;
if (vorbis_p) {
size.x = ti.frame_width;
size.y = ti.frame_height;
region.position.x = ti.pic_x;
region.position.y = ti.pic_y;
region.size.x = ti.pic_width;
region.size.y = ti.pic_height;
Ref<Image> img = Image::create_empty(region.size.x, region.size.y, false, Image::FORMAT_RGBA8);
texture->set_image(img);
frame_data.resize(region.size.x * region.size.y * 4);
frame_duration = (double)ti.fps_denominator / ti.fps_numerator;
if (has_audio) {
vorbis_synthesis_init(&vd, &vi);
vorbis_block_init(&vd, &vb);
//_setup(vi.channels, vi.rate);
} else {
/* tear down the partial vorbis setup */
vorbis_info_clear(&vi);
vorbis_comment_clear(&vc);
audio_buffer_size = MIN(vi.channels, 8) * 1024;
audio_buffer = memnew_arr(float, audio_buffer_size);
}
playing = false;
buffering = true;
time = 0;
video_done = !theora_p;
audio_done = !vorbis_p;
stream_data_offset = file->get_position() - oy.fill + oy.returned;
stream_data_size = file->get_length() - stream_data_offset;
// Sync to last page to find video length.
int64_t seek_pos = MAX(stream_data_offset, (int64_t)file->get_length() - 64 * 1024);
int64_t video_granulepos = INT64_MAX;
int64_t audio_granulepos = INT64_MAX;
file->seek(seek_pos);
seek_pos = seek_streams(-1, video_granulepos, audio_granulepos);
file->seek(seek_pos);
ogg_sync_reset(&oy);
stream_length = 0;
ogg_page page;
while (read_page(&page) > 0) {
// Use MAX because, even though pages are ordered, page time can be -1
// for pages without full frames. Streams could be truncated too.
stream_length = MAX(stream_length, get_page_time(&page));
}
seek(0);
}
double VideoStreamPlaybackTheora::get_time() const {
@ -346,28 +511,32 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
ogg_packet op;
while (!audio_ready && !audio_done) {
// Send remaining frames
if (!send_audio()) {
audio_ready = true;
break;
}
float **pcm;
int ret = vorbis_synthesis_pcmout(&vd, &pcm);
if (ret > 0) {
const int AUXBUF_LEN = 4096;
int to_read = ret;
float aux_buffer[AUXBUF_LEN];
while (to_read) {
int m = MIN(AUXBUF_LEN / vi.channels, to_read);
int frames_read = 0;
while (frames_read < ret) {
int m = MIN(audio_buffer_size / vi.channels, ret - frames_read);
int count = 0;
for (int j = 0; j < m; j++) {
for (int i = 0; i < vi.channels; i++) {
aux_buffer[count++] = pcm[i][j];
audio_buffer[count++] = pcm[i][frames_read + j];
}
}
int mixed = mix_callback(mix_udata, aux_buffer, m);
to_read -= mixed;
if (mixed != m) { //could mix no more
frames_read += m;
audio_ptr_end = m;
if (!send_audio()) {
audio_ready = true;
break;
}
}
vorbis_synthesis_read(&vd, ret - to_read);
vorbis_synthesis_read(&vd, frames_read);
} else {
/* no pending audio; is there a pending packet to decode? */
if (ogg_stream_packetout(&vo, &op) > 0) {
@ -383,17 +552,10 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
while (!video_ready && !video_done) {
if (ogg_stream_packetout(&to, &op) > 0) {
/*HACK: This should be set after a seek or a gap, but we might not have
a granulepos for the first packet (we only have them for the last
packet on a page), so we just set it as often as we get it.
To do this right, we should back-track from the last packet on the
page and compute the correct granulepos for the first packet after
a seek or a gap.*/
if (op.granulepos >= 0) {
th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos,
sizeof(op.granulepos));
th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, sizeof(op.granulepos));
}
ogg_int64_t videobuf_granulepos;
int64_t videobuf_granulepos;
int ret = th_decode_packetin(td, &op, &videobuf_granulepos);
if (ret == 0 || ret == TH_DUPFRAME) {
next_frame_time = th_granule_time(td, videobuf_granulepos);
@ -412,12 +574,8 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
}
if (!video_ready || !audio_ready) {
int ret = buffer_data();
if (ret > 0) {
while (ogg_sync_pageout(&oy, &og) > 0) {
queue_page(&og);
}
} else {
int ret = feed_pages();
if (ret == 0) {
vorbis_eos = true;
theora_eos = true;
break;
@ -452,10 +610,8 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
}
void VideoStreamPlaybackTheora::play() {
if (!playing) {
time = 0;
} else {
stop();
if (playing) {
return;
}
playing = true;
@ -464,12 +620,8 @@ void VideoStreamPlaybackTheora::play() {
}
void VideoStreamPlaybackTheora::stop() {
if (playing) {
clear();
set_file(file_name); //reset
}
playing = false;
time = 0;
seek(0);
}
bool VideoStreamPlaybackTheora::is_playing() const {
@ -485,7 +637,7 @@ bool VideoStreamPlaybackTheora::is_paused() const {
}
double VideoStreamPlaybackTheora::get_length() const {
return 0;
return stream_length;
}
double VideoStreamPlaybackTheora::get_playback_position() const {
@ -493,7 +645,123 @@ double VideoStreamPlaybackTheora::get_playback_position() const {
}
void VideoStreamPlaybackTheora::seek(double p_time) {
WARN_PRINT_ONCE("Seeking in Theora videos is not implemented yet (it's only supported for GDExtension-provided video streams).");
if (file.is_null()) {
return;
}
if (p_time >= stream_length) {
return;
}
video_ready = false;
next_frame_time = 0;
current_frame_time = -1;
dup_frame = false;
video_done = false;
audio_done = !has_audio;
theora_eos = false;
vorbis_eos = false;
audio_ptr_start = 0;
audio_ptr_end = 0;
ogg_stream_reset(&to);
if (has_audio) {
ogg_stream_reset(&vo);
vorbis_synthesis_restart(&vd);
}
int64_t seek_pos;
int64_t video_granulepos;
int64_t audio_granulepos;
// Find the granules we need so we can start playing at the seek time.
seek_pos = seek_streams(p_time, video_granulepos, audio_granulepos);
if (seek_pos < 0) {
return;
}
file->seek(seek_pos);
ogg_sync_reset(&oy);
time = p_time;
double last_audio_time = 0;
double last_video_time = 0;
bool first_frame_decoded = false;
bool start_audio = (audio_granulepos == 0);
bool start_video = (video_granulepos == (1LL << ti.keyframe_granule_shift));
bool keyframe_found = false;
uint64_t current_frame = 0;
// Read from the streams skipping pages until we reach the granules we want. We won't skip pages from both video and
// audio streams, only one of them, until decoding of both starts.
// video_granulepos and audio_granulepos are guaranteed to be found by checking the granulepos in the packets, no
// need to keep track of packets with granulepos == -1 until decoding starts.
while ((has_audio && last_audio_time < p_time) || (last_video_time <= p_time)) {
ogg_packet op;
if (feed_pages() == 0) {
break;
}
while (has_audio && last_audio_time < p_time && ogg_stream_packetout(&vo, &op) > 0) {
if (start_audio) {
if (vorbis_synthesis(&vb, &op) == 0) { /* test for success! */
vorbis_synthesis_blockin(&vd, &vb);
float **pcm;
int samples_left = ceil((p_time - last_audio_time) * vi.rate);
int samples_read = vorbis_synthesis_pcmout(&vd, &pcm);
int samples_consumed = MIN(samples_left, samples_read);
vorbis_synthesis_read(&vd, samples_consumed);
last_audio_time += (double)samples_consumed / vi.rate;
}
} else if (op.granulepos >= audio_granulepos) {
last_audio_time = vorbis_granule_time(&vd, op.granulepos);
// Start tracking audio now. This won't produce any samples but will update the decoder state.
if (vorbis_synthesis_trackonly(&vb, &op) == 0) {
vorbis_synthesis_blockin(&vd, &vb);
}
start_audio = true;
}
}
while (last_video_time <= p_time && ogg_stream_packetout(&to, &op) > 0) {
if (!start_video && (op.granulepos >= video_granulepos || video_granulepos == (1LL << ti.keyframe_granule_shift))) {
if (op.granulepos > 0) {
current_frame = th_granule_frame(td, op.granulepos);
}
start_video = true;
}
// Don't start decoding until a keyframe is found, but count frames.
if (start_video) {
if (!keyframe_found && th_packet_iskeyframe(&op)) {
keyframe_found = true;
int64_t cur_granulepos = (current_frame + 1) << ti.keyframe_granule_shift;
th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &cur_granulepos, sizeof(cur_granulepos));
}
if (keyframe_found) {
int64_t videobuf_granulepos;
if (op.granulepos >= 0) {
th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, sizeof(op.granulepos));
}
int ret = th_decode_packetin(td, &op, &videobuf_granulepos);
if (ret == 0 || ret == TH_DUPFRAME) {
last_video_time = th_granule_time(td, videobuf_granulepos);
first_frame_decoded = true;
}
} else {
current_frame++;
}
}
}
}
if (first_frame_decoded) {
if (is_playing()) {
// Draw the current frame.
th_ycbcr_buffer yuv;
th_decode_ycbcr_out(td, yuv);
video_write(yuv);
current_frame_time = last_video_time;
} else {
next_frame_time = current_frame_time;
video_ready = true;
}
}
}
int VideoStreamPlaybackTheora::get_channels() const {

View file

@ -51,8 +51,19 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
Point2i size;
Rect2i region;
float *audio_buffer = nullptr;
int audio_buffer_size = 0;
int audio_ptr_start = 0;
int audio_ptr_end = 0;
int buffer_data();
int queue_page(ogg_page *page);
int read_page(ogg_page *page);
int feed_pages();
double get_page_time(ogg_page *page);
int64_t seek_streams(double p_time, int64_t &video_granulepos, int64_t &audio_granulepos);
void find_streams(th_setup_info *&ts);
void read_headers(th_setup_info *&ts);
void video_write(th_ycbcr_buffer yuv);
double get_time() const;
@ -60,7 +71,6 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
bool vorbis_eos = false;
ogg_sync_state oy;
ogg_page og;
ogg_stream_state vo;
ogg_stream_state to;
th_info ti;
@ -71,19 +81,21 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
vorbis_block vb;
vorbis_comment vc;
th_pixel_fmt px_fmt;
double frame_duration;
double frame_duration = 0;
double stream_length = 0;
int64_t stream_data_offset = 0;
int64_t stream_data_size = 0;
int theora_p = 0;
int vorbis_p = 0;
int pp_level_max = 0;
int pp_level = 0;
int pp_inc = 0;
bool playing = false;
bool buffering = false;
bool paused = false;
bool dup_frame = false;
bool has_video = false;
bool has_audio = false;
bool video_ready = false;
bool video_done = false;
bool audio_done = false;
@ -100,6 +112,20 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
protected:
void clear();
_FORCE_INLINE_ bool send_audio() {
if (audio_ptr_end > 0) {
int mixed = mix_callback(mix_udata, &audio_buffer[audio_ptr_start * vi.channels], audio_ptr_end - audio_ptr_start);
audio_ptr_start += mixed;
if (audio_ptr_start == audio_ptr_end) {
audio_ptr_start = 0;
audio_ptr_end = 0;
} else {
return false;
}
}
return true;
}
public:
virtual void play() override;
virtual void stop() override;

View file

@ -339,7 +339,6 @@ void VideoStreamPlayer::play() {
if (playback.is_null()) {
return;
}
playback->stop();
playback->play();
set_process_internal(true);
last_audio_time = 0;
@ -468,7 +467,9 @@ double VideoStreamPlayer::get_stream_position() const {
void VideoStreamPlayer::set_stream_position(double p_position) {
if (playback.is_valid()) {
resampler.flush();
playback->seek(p_position);
last_audio_time = 0;
}
}

View file

@ -75,23 +75,37 @@ uint32_t AudioRBResampler::_resample(AudioFrame *p_dest, int p_todo, int32_t p_i
p_dest[i] = AudioFrame(v0, v1);
}
// This will probably never be used, but added anyway
// Downmix to stereo. Apply -3dB to center, and sides, -6dB to rear.
// four channels - channel order: front left, front right, rear left, rear right
if constexpr (C == 4) {
float v0 = rb[(pos << 2) + 0];
float v1 = rb[(pos << 2) + 1];
float v0n = rb[(pos_next << 2) + 0];
float v1n = rb[(pos_next << 2) + 1];
float v0 = rb[(pos << 2) + 0] + rb[(pos << 2) + 2] / 2;
float v1 = rb[(pos << 2) + 1] + rb[(pos << 2) + 3] / 2;
float v0n = rb[(pos_next << 2) + 0] + rb[(pos_next << 2) + 2] / 2;
float v1n = rb[(pos_next << 2) + 1] + rb[(pos_next << 2) + 3] / 2;
v0 += (v0n - v0) * frac;
v1 += (v1n - v1) * frac;
p_dest[i] = AudioFrame(v0, v1);
}
// six channels - channel order: front left, center, front right, rear left, rear right, LFE
if constexpr (C == 6) {
float v0 = rb[(pos * 6) + 0];
float v1 = rb[(pos * 6) + 1];
float v0n = rb[(pos_next * 6) + 0];
float v1n = rb[(pos_next * 6) + 1];
float v0 = rb[(pos * 6) + 0] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 3] / 2;
float v1 = rb[(pos * 6) + 2] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 4] / 2;
float v0n = rb[(pos_next * 6) + 0] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 3] / 2;
float v1n = rb[(pos_next * 6) + 2] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 4] / 2;
v0 += (v0n - v0) * frac;
v1 += (v1n - v1) * frac;
p_dest[i] = AudioFrame(v0, v1);
}
// eight channels - channel order: front left, center, front right, side left, side right, rear left, rear
// right, LFE
if constexpr (C == 8) {
float v0 = rb[(pos << 3) + 0] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 3] / Math::SQRT2 + rb[(pos << 3) + 5] / 2;
float v1 = rb[(pos << 3) + 2] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 4] / Math::SQRT2 + rb[(pos << 3) + 6] / 2;
float v0n = rb[(pos_next << 3) + 0] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 3] / Math::SQRT2 + rb[(pos_next << 3) + 5] / 2;
float v1n = rb[(pos_next << 3) + 2] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 4] / Math::SQRT2 + rb[(pos_next << 3) + 6] / 2;
v0 += (v0n - v0) * frac;
v1 += (v1n - v1) * frac;
p_dest[i] = AudioFrame(v0, v1);
@ -125,6 +139,9 @@ bool AudioRBResampler::mix(AudioFrame *p_dest, int p_frames) {
case 6:
src_read = _resample<6>(p_dest, target_todo, increment);
break;
case 8:
src_read = _resample<8>(p_dest, target_todo, increment);
break;
}
if (src_read > read_space) {
@ -159,7 +176,7 @@ int AudioRBResampler::get_num_of_ready_frames() {
}
Error AudioRBResampler::setup(int p_channels, int p_src_mix_rate, int p_target_mix_rate, int p_buffer_msec, int p_minbuff_needed) {
ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6 && p_channels != 8, ERR_INVALID_PARAMETER);
int desired_rb_bits = nearest_shift(MAX((p_buffer_msec / 1000.0) * p_src_mix_rate, p_minbuff_needed));

View file

@ -152,6 +152,19 @@ public:
wp = (wp + 1) & rb_mask;
}
} break;
case 8: {
for (uint32_t i = 0; i < p_frames; i++) {
rb[(wp << 3) + 0] = read_buf[(i << 3) + 0];
rb[(wp << 3) + 1] = read_buf[(i << 3) + 1];
rb[(wp << 3) + 2] = read_buf[(i << 3) + 2];
rb[(wp << 3) + 3] = read_buf[(i << 3) + 3];
rb[(wp << 3) + 4] = read_buf[(i << 3) + 4];
rb[(wp << 3) + 5] = read_buf[(i << 3) + 5];
rb[(wp << 3) + 6] = read_buf[(i << 3) + 6];
rb[(wp << 3) + 7] = read_buf[(i << 3) + 7];
wp = (wp + 1) & rb_mask;
}
} break;
}
rb_write_pos.set(wp);