Merge pull request #99504 from LunaCapra/audio-metadata

Add metadata tags to WAV and OGG audio streams
This commit is contained in:
Rémi Verschelde 2025-06-05 13:10:12 +02:00
commit 8131883b16
No known key found for this signature in database
GPG key ID: C3336907360768E1
9 changed files with 153 additions and 0 deletions

View file

@ -51,6 +51,13 @@
Override this method to customize the name assigned to this audio stream. Unused by the engine.
</description>
</method>
<method name="_get_tags" qualifiers="virtual const">
<return type="Dictionary" />
<description>
Override this method to customize the tags for this audio stream. Should return a [Dictionary] of strings with the tag as the key and its content as the value.
Commonly used tags include [code]title[/code], [code]artist[/code], [code]album[/code], [code]tracknumber[/code], and [code]date[/code].
</description>
</method>
<method name="_has_loop" qualifiers="virtual const">
<return type="bool" />
<description>

View file

@ -79,6 +79,12 @@
<member name="stereo" type="bool" setter="set_stereo" getter="is_stereo" default="false">
If [code]true[/code], audio is stereo.
</member>
<member name="tags" type="Dictionary" setter="set_tags" getter="get_tags" default="{}">
Contains user-defined tags if found in the WAV data.
Commonly used tags include [code]title[/code], [code]artist[/code], [code]album[/code], [code]tracknumber[/code], and [code]date[/code] ([code]date[/code] does not have a standard date format).
[b]Note:[/b] No tag is [i]guaranteed[/i] to be present in every file, so make sure to account for the keys not always existing.
[b]Note:[/b] Only WAV files using a [code]LIST[/code] chunk with an identifier of [code]INFO[/code] to encode the tags are currently supported.
</member>
</members>
<constants>
<constant name="FORMAT_8_BITS" value="0" enum="Format">

View file

@ -456,6 +456,23 @@ void AudioStreamOggVorbis::maybe_update_info() {
ERR_FAIL_COND_MSG(err != 0, "Error parsing header packet " + itos(i) + ": " + itos(err));
}
Dictionary dictionary;
for (int i = 0; i < comment.comments; i++) {
String c = String::utf8(comment.user_comments[i]);
int equals = c.find_char('=');
if (equals == -1) {
WARN_PRINT("Invalid comment in Ogg Vorbis file.");
continue;
}
String tag = c.substr(0, equals);
String tag_value = c.substr(equals + 1);
dictionary[tag.to_lower()] = tag_value;
}
tags = dictionary;
packet_sequence->set_sampling_rate(info.rate);
vorbis_comment_clear(&comment);
@ -524,6 +541,14 @@ int AudioStreamOggVorbis::get_bar_beats() const {
return bar_beats;
}
void AudioStreamOggVorbis::set_tags(const Dictionary &p_tags) {
tags = p_tags;
}
Dictionary AudioStreamOggVorbis::get_tags() const {
return tags;
}
bool AudioStreamOggVorbis::is_monophonic() const {
return false;
}
@ -692,10 +717,14 @@ void AudioStreamOggVorbis::_bind_methods() {
ClassDB::bind_method(D_METHOD("set_bar_beats", "count"), &AudioStreamOggVorbis::set_bar_beats);
ClassDB::bind_method(D_METHOD("get_bar_beats"), &AudioStreamOggVorbis::get_bar_beats);
ClassDB::bind_method(D_METHOD("set_tags", "tags"), &AudioStreamOggVorbis::set_tags);
ClassDB::bind_method(D_METHOD("get_tags"), &AudioStreamOggVorbis::get_tags);
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "packet_sequence", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_packet_sequence", "get_packet_sequence");
ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "bpm", PROPERTY_HINT_RANGE, "0,400,0.01,or_greater"), "set_bpm", "get_bpm");
ADD_PROPERTY(PropertyInfo(Variant::INT, "beat_count", PROPERTY_HINT_RANGE, "0,512,1,or_greater"), "set_beat_count", "get_beat_count");
ADD_PROPERTY(PropertyInfo(Variant::INT, "bar_beats", PROPERTY_HINT_RANGE, "2,32,1,or_greater"), "set_bar_beats", "get_bar_beats");
ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "tags", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_tags", "get_tags");
ADD_PROPERTY(PropertyInfo(Variant::BOOL, "loop"), "set_loop", "has_loop");
ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "loop_offset"), "set_loop_offset", "get_loop_offset");
}

View file

@ -133,6 +133,7 @@ class AudioStreamOggVorbis : public AudioStream {
double bpm = 0;
int beat_count = 0;
int bar_beats = 4;
Dictionary tags;
protected:
static void _bind_methods();
@ -156,6 +157,9 @@ public:
void set_bar_beats(int p_bar_beats);
virtual int get_bar_beats() const override;
void set_tags(const Dictionary &p_tags);
virtual Dictionary get_tags() const override;
virtual Ref<AudioStreamPlayback> instantiate_playback() override;
virtual String get_stream_name() const override;

View file

@ -41,5 +41,10 @@
<member name="packet_sequence" type="OggPacketSequence" setter="set_packet_sequence" getter="get_packet_sequence">
Contains the raw Ogg data for this stream.
</member>
<member name="tags" type="Dictionary" setter="set_tags" getter="get_tags" default="{}">
Contains user-defined tags if found in the Ogg Vorbis data.
Commonly used tags include [code]title[/code], [code]artist[/code], [code]album[/code], [code]tracknumber[/code], and [code]date[/code] ([code]date[/code] does not have a standard date format).
[b]Note:[/b] No tag is [i]guaranteed[/i] to be present in every file, so make sure to account for the keys not always existing.
</member>
</members>
</class>

View file

@ -477,6 +477,18 @@ bool AudioStreamWAV::is_stereo() const {
return stereo;
}
void AudioStreamWAV::set_tags(const Dictionary &p_tags) {
tags = p_tags;
}
Dictionary AudioStreamWAV::get_tags() const {
return tags;
}
HashMap<String, String>::ConstIterator AudioStreamWAV::remap_tag_id(const String &p_tag_id) {
return tag_id_remaps.find(p_tag_id);
}
double AudioStreamWAV::get_length() const {
int len = data_bytes;
switch (format) {
@ -703,6 +715,8 @@ Ref<AudioStreamWAV> AudioStreamWAV::load_from_buffer(const Vector<uint8_t> &p_st
Vector<float> data;
HashMap<String, String> tag_map;
while (!file->eof_reached()) {
/* chunk */
char chunk_id[4];
@ -858,6 +872,40 @@ Ref<AudioStreamWAV> AudioStreamWAV::load_from_buffer(const Vector<uint8_t> &p_st
loop_end = file->get_32();
}
}
if (chunk_id[0] == 'L' && chunk_id[1] == 'I' && chunk_id[2] == 'S' && chunk_id[3] == 'T') {
// RIFF 'LIST' chunk.
// See https://www.recordingblogs.com/wiki/list-chunk-of-a-wave-file
char list_id[4];
file->get_buffer((uint8_t *)&list_id, 4);
if (list_id[0] == 'I' && list_id[1] == 'N' && list_id[2] == 'F' && list_id[3] == 'O') {
// 'INFO' list type.
// The size of an entry can be arbitrary.
uint32_t end_of_chunk = file_pos + chunksize - 4;
while (file->get_position() < end_of_chunk) {
char info_id[4];
file->get_buffer((uint8_t *)&info_id, 4);
uint32_t text_size = file->get_32();
Vector<char> text;
text.resize(text_size);
file->get_buffer((uint8_t *)&text[0], text_size);
// The data is always an ASCII string. ASCII is a subset of UTF-8.
String tag;
tag.append_utf8(&info_id[0], 4);
String tag_value;
tag_value.append_utf8(&text[0], text_size);
tag_map[tag] = tag_value;
}
}
}
// Move to the start of the next chunk. Note that RIFF requires a padding byte for odd
// chunk sizes.
file->seek(file_pos + chunksize + (chunksize & 1));
@ -1097,6 +1145,18 @@ Ref<AudioStreamWAV> AudioStreamWAV::load_from_buffer(const Vector<uint8_t> &p_st
sample->set_loop_begin(loop_begin);
sample->set_loop_end(loop_end);
sample->set_stereo(format_channels == 2);
Dictionary tag_dictionary;
for (const KeyValue<String, String> &E : tag_map) {
HashMap<String, String>::ConstIterator remap = sample->remap_tag_id(E.key);
if (remap) {
tag_map.replace_key(E.key, remap->value);
}
tag_dictionary[E.key] = E.value;
}
sample->set_tags(tag_dictionary);
return sample;
}
@ -1131,6 +1191,9 @@ void AudioStreamWAV::_bind_methods() {
ClassDB::bind_method(D_METHOD("set_stereo", "stereo"), &AudioStreamWAV::set_stereo);
ClassDB::bind_method(D_METHOD("is_stereo"), &AudioStreamWAV::is_stereo);
ClassDB::bind_method(D_METHOD("set_tags", "tags"), &AudioStreamWAV::set_tags);
ClassDB::bind_method(D_METHOD("get_tags"), &AudioStreamWAV::get_tags);
ClassDB::bind_method(D_METHOD("save_to_wav", "path"), &AudioStreamWAV::save_to_wav);
ADD_PROPERTY(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "data", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_data", "get_data");
@ -1140,6 +1203,7 @@ void AudioStreamWAV::_bind_methods() {
ADD_PROPERTY(PropertyInfo(Variant::INT, "loop_end"), "set_loop_end", "get_loop_end");
ADD_PROPERTY(PropertyInfo(Variant::INT, "mix_rate"), "set_mix_rate", "get_mix_rate");
ADD_PROPERTY(PropertyInfo(Variant::BOOL, "stereo"), "set_stereo", "is_stereo");
ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "tags", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_tags", "get_tags");
BIND_ENUM_CONSTANT(FORMAT_8_BITS);
BIND_ENUM_CONSTANT(FORMAT_16_BITS);
@ -1151,3 +1215,22 @@ void AudioStreamWAV::_bind_methods() {
BIND_ENUM_CONSTANT(LOOP_PINGPONG);
BIND_ENUM_CONSTANT(LOOP_BACKWARD);
}
AudioStreamWAV::AudioStreamWAV() {
// Used to make the metadata tags more unified across different AudioStreams.
// See https://www.recordingblogs.com/wiki/list-chunk-of-a-wave-file
tag_id_remaps["IARL"] = "location";
tag_id_remaps["IART"] = "artist";
tag_id_remaps["ICMS"] = "organization";
tag_id_remaps["ICMT"] = "comments";
tag_id_remaps["ICOP"] = "copyright";
tag_id_remaps["ICRD"] = "date";
tag_id_remaps["IGNR"] = "genre";
tag_id_remaps["IKEY"] = "keywords";
tag_id_remaps["IMED"] = "medium";
tag_id_remaps["INAM"] = "title";
tag_id_remaps["IPRD"] = "album";
tag_id_remaps["ISBJ"] = "description";
tag_id_remaps["ISFT"] = "software";
tag_id_remaps["ITRK"] = "tracknumber";
}

View file

@ -124,6 +124,9 @@ private:
LocalVector<uint8_t> data;
uint32_t data_bytes = 0;
HashMap<String, String> tag_id_remaps;
Dictionary tags;
protected:
static void _bind_methods();
@ -149,6 +152,11 @@ public:
void set_stereo(bool p_enable);
bool is_stereo() const;
void set_tags(const Dictionary &p_tags);
virtual Dictionary get_tags() const override;
HashMap<String, String>::ConstIterator remap_tag_id(const String &p_tag_id);
virtual double get_length() const override; //if supported, otherwise return 0
virtual bool is_monophonic() const override;
@ -284,6 +292,8 @@ public:
dst_ptr += qoa_encode_frame(data16.ptr(), p_desc, frame_len, dst_ptr);
}
}
AudioStreamWAV();
};
VARIANT_ENUM_CAST(AudioStreamWAV::Format)

View file

@ -297,6 +297,12 @@ int AudioStream::get_beat_count() const {
return ret;
}
Dictionary AudioStream::get_tags() const {
Dictionary ret;
GDVIRTUAL_CALL(_get_tags, ret);
return ret;
}
void AudioStream::tag_used(float p_offset) {
if (tagged_frame != AudioServer::get_singleton()->get_mixed_frames()) {
offset_count = 0;
@ -350,6 +356,7 @@ void AudioStream::_bind_methods() {
GDVIRTUAL_BIND(_is_monophonic);
GDVIRTUAL_BIND(_get_bpm)
GDVIRTUAL_BIND(_get_beat_count)
GDVIRTUAL_BIND(_get_tags);
GDVIRTUAL_BIND(_get_parameter_list)
GDVIRTUAL_BIND(_has_loop);
GDVIRTUAL_BIND(_get_bar_beats);

View file

@ -178,6 +178,7 @@ protected:
GDVIRTUAL0RC(bool, _has_loop)
GDVIRTUAL0RC(int, _get_bar_beats)
GDVIRTUAL0RC(int, _get_beat_count)
GDVIRTUAL0RC(Dictionary, _get_tags);
GDVIRTUAL0RC(TypedArray<Dictionary>, _get_parameter_list)
public:
@ -188,6 +189,7 @@ public:
virtual bool has_loop() const;
virtual int get_bar_beats() const;
virtual int get_beat_count() const;
virtual Dictionary get_tags() const;
virtual double get_length() const;
virtual bool is_monophonic() const;