From 987f5e325920704f8490725572ae82658c0a6c94 Mon Sep 17 00:00:00 2001 From: Victor Gaydov Date: Sat, 20 Jul 2024 20:17:28 +0400 Subject: [PATCH] [spo] gh-731: PLC plugin refinements PLC sample rate & channel layout actually match network encoding, not local frame encoding. So now we pass roc_media_encoding dynamically to PLC instead of stating that encoding is the same as receiver_config.frame_encoding. Since PLC encoding does not match frame encoding, we don't use format from frame encoding, but instead we always use ROC_FORMAT_PCM_FLOAT32 and state this in documentation. Sponsored-by: waspd --- .../roc_pipeline/receiver_session.cpp | 21 +--- .../roc_pipeline/receiver_session.h | 2 - src/public_api/examples/plugin_plc.c | 97 +++++++++++++++---- src/public_api/include/roc/plugin.h | 32 ++++-- src/public_api/src/adapters.cpp | 77 ++++++++++++++- src/public_api/src/adapters.h | 7 +- src/public_api/src/plugin_plc.cpp | 12 ++- src/tests/public_api/test_plugin_plc.cpp | 7 +- 8 files changed, 200 insertions(+), 55 deletions(-) diff --git a/src/internal_modules/roc_pipeline/receiver_session.cpp b/src/internal_modules/roc_pipeline/receiver_session.cpp index 35c62ef19..72aa3493c 100644 --- a/src/internal_modules/roc_pipeline/receiver_session.cpp +++ b/src/internal_modules/roc_pipeline/receiver_session.cpp @@ -186,31 +186,12 @@ ReceiverSession::ReceiverSession(const ReceiverSessionConfig& session_config, return; } - if (plc_->sample_spec() != out_spec) { - plc_pre_mapper_.reset(new (plc_pre_mapper_) audio::PcmMapperReader( - *frm_reader, frame_factory, out_spec, plc_->sample_spec())); - if ((init_status_ = plc_pre_mapper_->init_status()) != status::StatusOK) { - return; - } - frm_reader = plc_pre_mapper_.get(); - } - plc_reader_.reset(new (plc_reader_) audio::PlcReader( - *frm_reader, frame_factory, *plc_, plc_->sample_spec())); + *frm_reader, frame_factory, *plc_, out_spec)); if ((init_status_ = plc_reader_->init_status()) != status::StatusOK) { return; } frm_reader = plc_reader_.get(); - - if (plc_->sample_spec() != out_spec) { - plc_post_mapper_.reset(new (plc_post_mapper_) audio::PcmMapperReader( - *frm_reader, frame_factory, plc_->sample_spec(), out_spec)); - if ((init_status_ = plc_post_mapper_->init_status()) - != status::StatusOK) { - return; - } - frm_reader = plc_post_mapper_.get(); - } } if (session_config.watchdog.no_playback_timeout >= 0 diff --git a/src/internal_modules/roc_pipeline/receiver_session.h b/src/internal_modules/roc_pipeline/receiver_session.h index 1256166f7..3d5f534ed 100644 --- a/src/internal_modules/roc_pipeline/receiver_session.h +++ b/src/internal_modules/roc_pipeline/receiver_session.h @@ -147,10 +147,8 @@ class ReceiverSession : public core::RefCounted timestamp_injector_; core::Optional depacketizer_; - core::Optional plc_pre_mapper_; core::ScopedPtr plc_; core::Optional plc_reader_; - core::Optional plc_post_mapper_; core::Optional watchdog_; core::Optional channel_mapper_reader_; core::SharedPtr resampler_; diff --git a/src/public_api/examples/plugin_plc.c b/src/public_api/examples/plugin_plc.c index ac38a132a..5c65fc731 100644 --- a/src/public_api/examples/plugin_plc.c +++ b/src/public_api/examples/plugin_plc.c @@ -6,7 +6,7 @@ * repair lost packets. * * Building: - * cc -o plugin_plc plugin_plc.c -lroc + * cc -o plugin_plc plugin_plc.c -lroc -lm * * Running: * ./plugin_plc @@ -15,6 +15,7 @@ * public domain */ +#include #include #include #include @@ -29,9 +30,10 @@ /* Audio parameters. */ #define MY_SAMPLE_RATE 44100 #define MY_CHANNEL_COUNT 2 +#define MY_SINE_RATE 440 /* How much sample after a gap PLC needs for interpolation. */ -#define MY_LOOKAHEAD_SIZE 4410 /* 100 ms */ +#define MY_LOOKAHEAD_LEN_MS 100 #define oops() \ do { \ @@ -46,15 +48,49 @@ struct my_plc { /* Here we could put state needed for interpolation. */ unsigned int history_frame_counter; unsigned int lost_frame_counter; + unsigned int sample_rate; + unsigned int channel_count; }; /* Create plugin instance. */ -static void* my_plc_new(roc_plugin_plc* plugin) { - return calloc(1, sizeof(struct my_plc)); +static void* my_plc_new(roc_plugin_plc* plugin, const roc_media_encoding* encoding) { + printf("creating plc plugin instance\n"); + + /* Note that sample rate and channel layout may have arbitrary values, + * depending on the encoding used by the connection for which this + * instance is created. + * + * Sample format is, however, always ROC_FORMAT_PCM_FLOAT32. + */ + printf("using encoding:\n"); + printf(" sample_format = %u\n", encoding->format); + printf(" sample_rate = %u\n", encoding->rate); + printf(" channel_layout = %u\n", encoding->channels); + + struct my_plc* plc = calloc(1, sizeof(struct my_plc)); + + plc->sample_rate = encoding->rate; + + switch (encoding->channels) { + case ROC_CHANNEL_LAYOUT_MONO: + plc->channel_count = 1; + break; + case ROC_CHANNEL_LAYOUT_STEREO: + plc->channel_count = 2; + break; + default: + printf("unsupported channel layout"); + free(plc); + return NULL; + } + + return plc; } /* Delete plugin instance. */ static void my_plc_delete(void* plugin_instance) { + printf("deleting plc plugin instance\n"); + struct my_plc* plc = (struct my_plc*)plugin_instance; free(plc); @@ -65,7 +101,10 @@ static void my_plc_delete(void* plugin_instance) { * Returned value is measured as the number of samples per channel, * e.g. if sample rate is 44100Hz, length 4410 is 100ms */ static unsigned int my_plc_lookahead_len(void* plugin_instance) { - return MY_LOOKAHEAD_SIZE; + struct my_plc* plc = (struct my_plc*)plugin_instance; + + /* Convert milliseconds to number of samples. */ + return (unsigned int)(plc->sample_rate / 1000.0f * MY_LOOKAHEAD_LEN_MS); } /* Called when next frame is good (no loss). */ @@ -73,9 +112,24 @@ static void my_plc_process_history(void* plugin_instance, const roc_frame* history_frame) { struct my_plc* plc = (struct my_plc*)plugin_instance; - /* Here we can copy samples from history_frame to ring buffer. - * In this example we just ignore frame. */ + /* Here we can copy samples from history_frame to a ring buffer. + * In this example we just history ignore frame. + * Remember that history_frame will be invalidated after the callback + * returns, so we'd need to do a deep copy if we want to use it later. + */ plc->history_frame_counter++; + +#if 0 + /* Debug logs. In production code, it's not recommended to call functions like + * printf() from processing callbacks, because they may block real-time + * pipeline thread and cause priority inversion problems. You can either avoid + * logging in processing callbacks or use a lock-free logger if you have one. + */ + if (plc->history_frame_counter % 100 == 0) { + printf("plc: history_frame_counter=%u lost_frame_counter=%u\n", + plc->history_frame_counter, plc->lost_frame_counter); + } +#endif } /* Called when next frame is lost and we must fill it with interpolated data. @@ -91,20 +145,26 @@ static void my_plc_process_loss(void* plugin_instance, const roc_frame* lookahead_frame) { struct my_plc* plc = (struct my_plc*)plugin_instance; - /* Here we can implement interpolation. - * In this example we just fill frame with constants. - * Samples are float because we use ROC_FORMAT_PCM_FLOAT32. - * There are two channels because we use ROC_CHANNEL_LAYOUT_STEREO. */ + /* Here we can implement interpolation. In this example we just fill lost frame + * with a sine wave, thus we turn a loss into a beep. + * + * PLC plugin always uses ROC_FORMAT_PCM_FLOAT32, so we cast samples to float. + * + * PLC plugin may be asked to use arbitrary sample rate and channel layout, + * so we use plc->sample_rate and plc->channel_count instead of + * MY_SAMPLE_RATE and MY_CHANNEL_COUNT. + */ float* lost_samples = lost_frame->samples; size_t lost_sample_count = - lost_frame->samples_size / sizeof(float) / MY_CHANNEL_COUNT; + lost_frame->samples_size / sizeof(float) / plc->channel_count; for (unsigned ns = 0; ns < lost_sample_count; ns++) { - for (unsigned c = 0; c < MY_CHANNEL_COUNT; c++) { - lost_samples[0] = 0.123f; /* left channel */ - lost_samples[1] = 0.456f; /* right channel */ + const float s = + (float)sin(2 * 3.14159265359 * MY_SINE_RATE / plc->sample_rate * ns) * 0.1f; + + for (unsigned nc = 0; nc < plc->channel_count; nc++) { + *lost_samples++ = s; } - lost_samples += MY_CHANNEL_COUNT; } plc->lost_frame_counter++; @@ -140,9 +200,8 @@ int main() { roc_receiver_config receiver_config; memset(&receiver_config, 0, sizeof(receiver_config)); - /* Setup frame format. - * This format applies to frames that we read from receiver, as well as to - * the frames passed to PLC plugin. */ + /* Setup frame encoding that we read from receiver. + * Note that this encoding is different from the encoding used by PLC plugin. */ receiver_config.frame_encoding.rate = MY_SAMPLE_RATE; receiver_config.frame_encoding.format = ROC_FORMAT_PCM_FLOAT32; receiver_config.frame_encoding.channels = ROC_CHANNEL_LAYOUT_STEREO; diff --git a/src/public_api/include/roc/plugin.h b/src/public_api/include/roc/plugin.h index 2de3964f1..b65d27507 100644 --- a/src/public_api/include/roc/plugin.h +++ b/src/public_api/include/roc/plugin.h @@ -14,6 +14,7 @@ #ifndef ROC_PLUGIN_H_ #define ROC_PLUGIN_H_ +#include "roc/config.h" #include "roc/frame.h" #include "roc/platform.h" @@ -93,6 +94,19 @@ enum { * If lookahead_len_cb() returns non-zero, process_loss_cb() will be provided * with the frame following the lost one, if it is available. * + * **Encoding** + * + * Media encoding used for frames passed to PLC plugin is determined dynamically. + * When new_cb() is invoked, \c encoding argument defines what to use: + * + * - \c rate and \c channels are the same as used in network packets of + * this particular connection; PLC plugin must be ready to work with + * arbitrary values, unless it's known that only certain packet encoding + * may be used by sender + * + * - \c format is always \ref ROC_FORMAT_PCM_FLOAT32, PLC plugin doesn't + * need to support other formats + * * **Registration** * * PLC plugin should be registered using roc_context_register_plc() and then @@ -118,8 +132,10 @@ typedef struct roc_plugin_plc { * **Parameters** * - \p plugin is a pointer to plugin callback table passed to * roc_context_register_plc() + * - \p encoding defines encoding of the frames that will be passed to + * plugin callbacks */ - void* (*new_cb)(struct roc_plugin_plc* plugin); + void* (*new_cb)(struct roc_plugin_plc* plugin, const roc_media_encoding* encoding); /** Callback to delete plugin instance. * @@ -130,7 +146,7 @@ typedef struct roc_plugin_plc { */ void (*delete_cb)(void* plugin_instance); - /** Obtain PLC look-ahead length, as number of samples per channel. + /** Callback to obtain PLC look-ahead length, as number of samples per channel. * * Returned value defines how many samples following immediately after the lost frame * PLC wants to use for interpolation. See process_loss_cb() for details. @@ -146,11 +162,11 @@ typedef struct roc_plugin_plc { * If plugin wants to store frame for later use, it should copy its samples. * * The size of \p history_frame is arbitrary and may vary each call. The format of - * the frame is defined by \c frame_encoding field of \ref roc_receiver_config. + * the frame is defined by \c encoding argument of new_cb(). * * **Parameters** * - \p plugin_instance is a pointer to the instance returned by new_cb() - * - \p history_frame is a pointer to a read-only frame with decoded data + * - \p history_frame points to read-only frame with decoded data * * **Ownership** * - frame and its data can't be used after the callback returns @@ -170,14 +186,12 @@ typedef struct roc_plugin_plc { * It's present only if packets following the loss happened to arrive early enough. * * The size of both frames is arbitrary and may vary each call. The format of the - * frames is defined by \c frame_encoding field of \ref roc_receiver_config. + * frames is defined by \c encoding argument of new_cb(). * * **Parameters** * - \p plugin_instance is a pointer to the instance returned by new_cb() - * - \p lost_frame is a pointer to a writable frame to be filled with the - * interpolated data - * - \p lookahead_frame is a pointer to a read-only frame following the - * lost one + * - \p lost_frame points to writable frame to be filled with the interpolation + * - \p lookahead_frame points to read-only frame following the lost one * * **Ownership** * - frames and their data can't be used after the callback returns diff --git a/src/public_api/src/adapters.cpp b/src/public_api/src/adapters.cpp index 40c522b11..d56464f00 100644 --- a/src/public_api/src/adapters.cpp +++ b/src/public_api/src/adapters.cpp @@ -339,6 +339,26 @@ bool sample_spec_from_user(audio::SampleSpec& out, return true; } +bool sample_spec_to_user(roc_media_encoding& out, const audio::SampleSpec& in) { + memset(&out, 0, sizeof(out)); + + if (!in.is_valid()) { + return false; + } + + out.rate = (unsigned int)in.sample_rate(); + + if (!sample_format_to_user(out.format, in)) { + return false; + } + + if (!channel_set_to_user(out.channels, out.tracks, in.channel_set())) { + return false; + } + + return true; +} + ROC_ATTR_NO_SANITIZE_UB bool sample_format_from_user(audio::SampleSpec& out, roc_format in, bool is_network) { switch (enum_from_user(in)) { @@ -353,13 +373,35 @@ bool sample_format_from_user(audio::SampleSpec& out, roc_format in, bool is_netw return false; } +bool sample_format_to_user(roc_format& out, const audio::SampleSpec& in) { + if (in.sample_format() != audio::SampleFormat_Pcm) { + return false; + } + + const audio::PcmTraits traits = audio::pcm_format_traits(in.pcm_format()); + if (!traits.is_valid || !traits.is_native) { + return false; + } + + switch (traits.native_alias) { + case audio::PcmFormat_Float32: + out = ROC_FORMAT_PCM_FLOAT32; + return true; + + default: + break; + } + + return false; +} + ROC_ATTR_NO_SANITIZE_UB bool channel_set_from_user(audio::ChannelSet& out, - roc_channel_layout in, + roc_channel_layout in_layout, unsigned int in_tracks) { out.clear(); - switch (enum_from_user(in)) { + switch (enum_from_user(in_layout)) { case ROC_CHANNEL_LAYOUT_MULTITRACK: out.set_layout(audio::ChanLayout_Multitrack); out.set_order(audio::ChanOrder_None); @@ -382,6 +424,37 @@ bool channel_set_from_user(audio::ChannelSet& out, return false; } +bool channel_set_to_user(roc_channel_layout& out_layout, + unsigned int& out_tracks, + const audio::ChannelSet& in) { + switch (in.layout()) { + case audio::ChanLayout_Surround: + if (in.order() == audio::ChanOrder_Smpte) { + if (in.is_equal(audio::ChanMask_Surround_Mono)) { + out_layout = ROC_CHANNEL_LAYOUT_MONO; + out_tracks = 0; + return true; + } + if (in.is_equal(audio::ChanMask_Surround_Stereo)) { + out_layout = ROC_CHANNEL_LAYOUT_STEREO; + out_tracks = 0; + return true; + } + } + break; + + case audio::ChanLayout_Multitrack: + out_layout = ROC_CHANNEL_LAYOUT_MULTITRACK; + out_tracks = (unsigned int)in.num_channels(); + return true; + + case audio::ChanLayout_None: + break; + } + + return false; +} + ROC_ATTR_NO_SANITIZE_UB bool clock_source_from_user(bool& out_timing, roc_clock_source in) { switch (enum_from_user(in)) { diff --git a/src/public_api/src/adapters.h b/src/public_api/src/adapters.h index e4da26b12..933c59805 100644 --- a/src/public_api/src/adapters.h +++ b/src/public_api/src/adapters.h @@ -36,12 +36,17 @@ bool interface_config_from_user(netio::UdpConfig& out, const roc_interface_confi bool sample_spec_from_user(audio::SampleSpec& out, const roc_media_encoding& in, bool is_network); +bool sample_spec_to_user(roc_media_encoding& out, const audio::SampleSpec& in); bool sample_format_from_user(audio::SampleSpec& out, roc_format in, bool is_network); +bool sample_format_to_user(roc_format& out, const audio::SampleSpec& in); bool channel_set_from_user(audio::ChannelSet& out, - roc_channel_layout in, + roc_channel_layout in_layout, unsigned int in_tracks); +bool channel_set_to_user(roc_channel_layout& out_layout, + unsigned int& out_tracks, + const audio::ChannelSet& in); bool clock_source_from_user(bool& out_timing, roc_clock_source in); diff --git a/src/public_api/src/plugin_plc.cpp b/src/public_api/src/plugin_plc.cpp index 4376301f1..2f4f2e6cb 100644 --- a/src/public_api/src/plugin_plc.cpp +++ b/src/public_api/src/plugin_plc.cpp @@ -7,6 +7,7 @@ */ #include "plugin_plc.h" +#include "adapters.h" #include "roc_core/log.h" #include "roc_core/panic.h" @@ -63,11 +64,20 @@ PluginPlc::PluginPlc(roc_plugin_plc* plugin, const audio::SampleSpec& sample_spe roc_panic_if(!plugin_); roc_panic_if(!validate(plugin_)); - plugin_instance_ = plugin_->new_cb(plugin_); + roc_media_encoding encoding; + if (!api::sample_spec_to_user(encoding, sample_spec_)) { + roc_log( + LogError, + "roc_plugin_plc: failed to create plugin instance: unsupported sample spec"); + return; + } + + plugin_instance_ = plugin_->new_cb(plugin_, &encoding); if (!plugin_instance_) { roc_log( LogError, "roc_plugin_plc: failed to create plugin instance: new_cb() returned null"); + return; } } diff --git a/src/tests/public_api/test_plugin_plc.cpp b/src/tests/public_api/test_plugin_plc.cpp index b6756705e..3f4cf52e1 100644 --- a/src/tests/public_api/test_plugin_plc.cpp +++ b/src/tests/public_api/test_plugin_plc.cpp @@ -71,8 +71,13 @@ struct TestPlc { } }; -void* test_plc_new(roc_plugin_plc* plugin) { +void* test_plc_new(roc_plugin_plc* plugin, const roc_media_encoding* encoding) { roc_panic_if_not(plugin); + roc_panic_if_not(encoding); + + roc_panic_if_not(encoding->format == ROC_FORMAT_PCM_FLOAT32); + roc_panic_if_not(encoding->rate == test::SampleRate); + roc_panic_if_not(encoding->channels == ROC_CHANNEL_LAYOUT_STEREO); return new TestPlc((TestPlugin*)plugin); }