Skip to content

Commit

Permalink
feat(encoder): add support for more encoder options
Browse files Browse the repository at this point in the history
Signed-off-by: k4yt3x <[email protected]>
  • Loading branch information
k4yt3x committed Nov 22, 2024
1 parent 169509b commit 324c238
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 60 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Automatic selection of the most suitable pixel format for the output video.
- Support for specifying arbitrary `AVOptions` for the encoder.
- More `AVCodecContext` options.

### Fixed

Expand Down
39 changes: 33 additions & 6 deletions include/libvideo2x/libvideo2x.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,43 @@ struct FilterConfig {
} config;
};

// Encoder configuration
// Encoder configurations
struct EncoderConfig {
int out_width;
int out_height;
bool copy_streams;
// Non-AVCodecContext options
enum AVCodecID codec;
bool copy_streams;

// Basic video options
int width;
int height;
enum AVPixelFormat pix_fmt;
const char *preset;

// Rate control and compression
int64_t bit_rate;
float crf;
int rc_buffer_size;
int rc_min_rate;
int rc_max_rate;
int qmin;
int qmax;

// GOP and frame structure
int gop_size;
int max_b_frames;
int keyint_min;
int refs;

// Performance and threading
int thread_count;

// Latency and buffering
int delay;

// Extra AVOptions
struct {
const char *key;
const char *value;
} *extra_options;
size_t nb_extra_options;
};

// Video processing context
Expand Down
49 changes: 38 additions & 11 deletions src/encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,40 @@ int Encoder::init(
enc_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx);
}

// Set encoding parameters
enc_ctx_->height = encoder_config->out_height;
enc_ctx_->width = encoder_config->out_width;
enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx_->bit_rate = encoder_config->bit_rate;

// Set the color properties
// Copy the color properties from the decoder context
enc_ctx_->color_range = dec_ctx->color_range;
enc_ctx_->color_primaries = dec_ctx->color_primaries;
enc_ctx_->color_trc = dec_ctx->color_trc;
enc_ctx_->colorspace = dec_ctx->colorspace;
enc_ctx_->chroma_sample_location = dec_ctx->chroma_sample_location;

// Extra options copied from the decoder context
enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;

// Set basic video options
enc_ctx_->width = encoder_config->width;
enc_ctx_->height = encoder_config->height;

// Set rate control and compression options
enc_ctx_->bit_rate = encoder_config->bit_rate;
enc_ctx_->rc_buffer_size = encoder_config->rc_buffer_size;
enc_ctx_->rc_min_rate = encoder_config->rc_min_rate;
enc_ctx_->rc_max_rate = encoder_config->rc_max_rate;
enc_ctx_->qmin = encoder_config->qmin;
enc_ctx_->qmax = encoder_config->qmax;

// Set GOP and frame structure options
enc_ctx_->gop_size = encoder_config->gop_size;
enc_ctx_->max_b_frames = encoder_config->max_b_frames;
enc_ctx_->keyint_min = encoder_config->keyint_min;
enc_ctx_->refs = encoder_config->refs;

// Set performance and threading options
enc_ctx_->thread_count = encoder_config->thread_count;

// Set latency and buffering options
enc_ctx_->delay = encoder_config->delay;

// Set the pixel format
if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
// Use the specified pixel format
Expand Down Expand Up @@ -114,10 +135,16 @@ int Encoder::init(
enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
}

// Set the CRF and preset for any codecs that support it
std::string crf_str = std::to_string(encoder_config->crf);
av_opt_set(enc_ctx_->priv_data, "crf", crf_str.c_str(), 0);
av_opt_set(enc_ctx_->priv_data, "preset", encoder_config->preset, 0);
// Set extra AVOptions
for (size_t i = 0; i < encoder_config->nb_extra_options; i++) {
const char *key = encoder_config->extra_options[i].key;
const char *value = encoder_config->extra_options[i].value;
spdlog::debug("Setting encoder option '{}' to '{}'", key, value);

if (av_opt_set(enc_ctx_->priv_data, key, value, 0) < 0) {
spdlog::warn("Failed to set encoder option '{}' to '{}'", key, value);
}
}

// Use global headers if necessary
if (ofmt_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
Expand Down
4 changes: 2 additions & 2 deletions src/libvideo2x.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ extern "C" int process_video(
spdlog::debug("Output video dimensions: {}x{}", output_width, output_height);

// Update encoder configuration with output dimensions
encoder_config->out_width = output_width;
encoder_config->out_height = output_height;
encoder_config->width = output_width;
encoder_config->height = output_height;

// Initialize the encoder
Encoder encoder;
Expand Down
161 changes: 120 additions & 41 deletions tools/video2x/src/video2x.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstring>
#include <filesystem>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
Expand Down Expand Up @@ -69,15 +70,25 @@ struct Arguments {

// Encoder options
StringType codec = STR("libx264");
StringType preset = STR("slow");
StringType pix_fmt;
int64_t bitrate = 0;
float crf = 20.0f;
int64_t bit_rate = 0;
int rc_buffer_size = 0;
int rc_min_rate = 0;
int rc_max_rate = 0;
int qmin = -1;
int qmax = -1;
int gop_size = -1;
int max_b_frames = -1;
int keyint_min = -1;
int refs = -1;
int thread_count = 0;
int delay = 0;
std::vector<std::pair<StringType, StringType>> extra_options;

// libplacebo options
std::filesystem::path shader_path;
int out_width = 0;
int out_height = 0;
int width = 0;
int height = 0;

// RealESRGAN options
StringType model_name;
Expand Down Expand Up @@ -348,22 +359,34 @@ int main(int argc, char **argv) {
("input,i", PO_STR_VALUE<StringType>(), "Input video file path")
("output,o", PO_STR_VALUE<StringType>(), "Output video file path")
("filter,f", PO_STR_VALUE<StringType>(&arguments.filter_type), "Filter to use: 'libplacebo' or 'realesrgan'")
("gpuid,g", po::value<uint32_t>(&arguments.gpuid)->default_value(0), "Vulkan GPU ID (default: 0)")
("hwaccel,a", PO_STR_VALUE<StringType>(&arguments.hwaccel)->default_value(STR("none"), "none"), "Hardware acceleration method (default: none)")
("nocopystreams", po::bool_switch(&arguments.nocopystreams), "Do not copy audio and subtitle streams")
("gpuid,g", po::value<uint32_t>(&arguments.gpuid)->default_value(0), "Vulkan GPU ID")
("hwaccel,a", PO_STR_VALUE<StringType>(&arguments.hwaccel)->default_value(STR("none"), "none"), "Hardware acceleration method")
("benchmark", po::bool_switch(&arguments.benchmark), "Discard processed frames and calculate average FPS")

// Encoder options
("codec,c", PO_STR_VALUE<StringType>(&arguments.codec)->default_value(STR("libx264"), "libx264"), "Output codec (default: libx264)")
("preset,p", PO_STR_VALUE<StringType>(&arguments.preset)->default_value(STR("slow"), "slow"), "Encoder preset (default: slow)")
("pixfmt,x", PO_STR_VALUE<StringType>(&arguments.pix_fmt), "Output pixel format (default: auto)")
("bitrate,b", po::value<int64_t>(&arguments.bitrate)->default_value(0), "Bitrate in bits per second (default: 0 (VBR))")
("crf,q", po::value<float>(&arguments.crf)->default_value(20.0f), "Constant Rate Factor (default: 20.0)")
("codec,c", PO_STR_VALUE<StringType>(&arguments.codec)->default_value(STR("libx264"), "libx264"), "Output codec")
("no_copy_streams", po::bool_switch(&arguments.nocopystreams), "Do not copy audio and subtitle streams")
("pix_fmt", PO_STR_VALUE<StringType>(&arguments.pix_fmt), "Output pixel format")
("bit_rate", po::value<int64_t>(&arguments.bit_rate)->default_value(0), "Bitrate in bits per second")
("rc_buffer_size", po::value<int>(&arguments.rc_buffer_size)->default_value(0), "Rate control buffer size in bits")
("rc_min_rate", po::value<int>(&arguments.rc_min_rate)->default_value(0), "Minimum rate control")
("rc_max_rate", po::value<int>(&arguments.rc_max_rate)->default_value(0), "Maximum rate control")
("qmin", po::value<int>(&arguments.qmin)->default_value(-1), "Minimum quantizer")
("qmax", po::value<int>(&arguments.qmax)->default_value(-1), "Maximum quantizer")
("gop_size", po::value<int>(&arguments.gop_size)->default_value(-1), "GOP size")
("max_b_frames", po::value<int>(&arguments.max_b_frames)->default_value(-1), "Maximum number of B-frames")
("keyint_min", po::value<int>(&arguments.keyint_min)->default_value(-1), "Minimum interval between keyframes")
("refs", po::value<int>(&arguments.refs)->default_value(-1), "Number of reference frames")
("thread_count", po::value<int>(&arguments.thread_count)->default_value(0), "Number of threads for encoding")
("delay", po::value<int>(&arguments.delay)->default_value(0), "Delay in milliseconds for encoder")

// Extra AVOptions (key-value pairs for codec settings)
("extra_option,e", po::value<std::vector<std::string>>()->multitoken(), "Additional AVOption(s) for codec settings (key=value)")

// libplacebo options
("shader,s", PO_STR_VALUE<StringType>(), "Name or path of the GLSL shader file to use")
("width,w", po::value<int>(&arguments.out_width), "Output width")
("height,h", po::value<int>(&arguments.out_height), "Output height")
("width,w", po::value<int>(&arguments.width), "Output width")
("height,h", po::value<int>(&arguments.height), "Output height")

// RealESRGAN options
("model,m", PO_STR_VALUE<StringType>(&arguments.model_name), "Name of the model to use")
Expand Down Expand Up @@ -417,6 +440,21 @@ int main(int argc, char **argv) {
return 1;
}

// Parse avoptions
if (vm.count("extra_option")) {
for (const auto &opt : vm["extra_option"].as<std::vector<std::string>>()) {
size_t eq_pos = opt.find('=');
if (eq_pos != std::string::npos) {
StringType key = opt.substr(0, eq_pos);
StringType value = opt.substr(eq_pos + 1);
arguments.extra_options.push_back(std::make_pair(key, value));
} else {
spdlog::critical("Invalid extra AVOption format: {}", opt);
return 1;
}
}
}

if (vm.count("shader")) {
arguments.shader_path = std::filesystem::path(vm["shader"].as<StringType>());
}
Expand All @@ -440,8 +478,7 @@ int main(int argc, char **argv) {

// Additional validations
if (arguments.filter_type == STR("libplacebo")) {
if (arguments.shader_path.empty() || arguments.out_width == 0 ||
arguments.out_height == 0) {
if (arguments.shader_path.empty() || arguments.width == 0 || arguments.height == 0) {
spdlog::critical(
"For libplacebo, shader name/path (-s), width (-w), "
"and height (-h) are required."
Expand Down Expand Up @@ -473,17 +510,11 @@ int main(int argc, char **argv) {
}

// Validate bitrate
if (arguments.bitrate < 0) {
if (arguments.bit_rate < 0) {
spdlog::critical("Invalid bitrate specified.");
return 1;
}

// Validate CRF
if (arguments.crf < 0.0f || arguments.crf > 51.0f) {
spdlog::critical("CRF must be between 0 and 51.");
return 1;
}

// Parse codec to AVCodec
const AVCodec *codec = avcodec_find_encoder_by_name(wstring_to_utf8(arguments.codec).c_str());
if (!codec) {
Expand Down Expand Up @@ -544,8 +575,8 @@ int main(int argc, char **argv) {
FilterConfig filter_config;
if (arguments.filter_type == STR("libplacebo")) {
filter_config.filter_type = FILTER_LIBPLACEBO;
filter_config.config.libplacebo.out_width = arguments.out_width;
filter_config.config.libplacebo.out_height = arguments.out_height;
filter_config.config.libplacebo.out_width = arguments.width;
filter_config.config.libplacebo.out_height = arguments.height;
filter_config.config.libplacebo.shader_path = shader_path_str.c_str();
} else if (arguments.filter_type == STR("realesrgan")) {
filter_config.filter_type = FILTER_REALESRGAN;
Expand All @@ -554,18 +585,59 @@ int main(int argc, char **argv) {
filter_config.config.realesrgan.model_name = arguments.model_name.c_str();
}

std::string preset_str = wstring_to_utf8(arguments.preset);

// Setup encoder configuration
EncoderConfig encoder_config;
encoder_config.out_width = 0;
encoder_config.out_height = 0;
encoder_config.copy_streams = !arguments.nocopystreams;
encoder_config.codec = codec->id;
encoder_config.copy_streams = !arguments.nocopystreams;
encoder_config.width = arguments.width;
encoder_config.height = arguments.height;
encoder_config.pix_fmt = pix_fmt;
encoder_config.preset = preset_str.c_str();
encoder_config.bit_rate = arguments.bitrate;
encoder_config.crf = arguments.crf;
encoder_config.bit_rate = arguments.bit_rate;
encoder_config.rc_buffer_size = arguments.rc_buffer_size;
encoder_config.rc_max_rate = arguments.rc_max_rate;
encoder_config.rc_min_rate = arguments.rc_min_rate;
encoder_config.qmin = arguments.qmin;
encoder_config.qmax = arguments.qmax;
encoder_config.gop_size = arguments.gop_size;
encoder_config.max_b_frames = arguments.max_b_frames;
encoder_config.keyint_min = arguments.keyint_min;
encoder_config.refs = arguments.refs;
encoder_config.thread_count = arguments.thread_count;
encoder_config.delay = arguments.delay;

// Handle extra AVOptions
encoder_config.nb_extra_options = arguments.extra_options.size();
encoder_config.extra_options = static_cast<decltype(encoder_config.extra_options)>(malloc(
static_cast<unsigned long>(encoder_config.nb_extra_options + 1) *
sizeof(encoder_config.extra_options[0])
));
if (encoder_config.extra_options == nullptr) {
spdlog::critical("Failed to allocate memory for extra AVOptions.");
return 1;
}

// Copy extra AVOptions to the encoder configuration
for (size_t i = 0; i < encoder_config.nb_extra_options; i++) {
const std::string key = wstring_to_utf8(arguments.extra_options[i].first);
const std::string value = wstring_to_utf8(arguments.extra_options[i].second);
encoder_config.extra_options[i].key = strdup(key.c_str());
encoder_config.extra_options[i].value = strdup(value.c_str());
}

// Custom deleter for extra AVOptions
auto extra_options_deleter = [&](decltype(encoder_config.extra_options) *extra_options_ptr) {
auto extra_options = *extra_options_ptr;
for (size_t i = 0; i < encoder_config.nb_extra_options; i++) {
free(const_cast<char *>(extra_options[i].key));
free(const_cast<char *>(extra_options[i].value));
}
free(extra_options);
*extra_options_ptr = nullptr;
};

// Define a unique_ptr to automatically free extra_options
std::unique_ptr<decltype(encoder_config.extra_options), decltype(extra_options_deleter)>
extra_options_guard(&encoder_config.extra_options, extra_options_deleter);

// Parse hardware acceleration method
enum AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE;
Expand Down Expand Up @@ -753,20 +825,27 @@ int main(int argc, char **argv) {
std::lock_guard<std::mutex> lock(proc_ctx_mutex);
processed_frames = proc_ctx.processed_frames;
}
int64_t time_elapsed = timer.get_elapsed_time() / 1000;
int time_elapsed = static_cast<int>(timer.get_elapsed_time() / 1000);
int hours_elapsed = time_elapsed / 3600;
int minutes_elapsed = (time_elapsed % 3600) / 60;
int seconds_elapsed = time_elapsed % 60;
float average_speed_fps = static_cast<float>(processed_frames) /
(time_elapsed > 0 ? static_cast<float>(time_elapsed) : 1);

// Print processing summary
printf("====== Video2X %s summary ======\n", arguments.benchmark ? "Benchmark" : "Processing");
printf("Video file processed: %s\n", arguments.in_fname.u8string().c_str());
printf("Total frames processed: %ld\n", proc_ctx.processed_frames);
printf("Total time taken: %ld s\n", time_elapsed);
printf("Average processing speed: %.2f FPS\n", average_speed_fps);
std::cout << "====== Video2X " << (arguments.benchmark ? "Benchmark" : "Processing")
<< " summary ======" << std::endl;
std::cout << "Video file processed: " << arguments.in_fname.u8string() << std::endl;
std::cout << "Total frames processed: " << processed_frames << std::endl;
std::cout << "Total time taken: " << std::setw(2) << std::setfill('0') << hours_elapsed << ":"
<< std::setw(2) << std::setfill('0') << minutes_elapsed << ":" << std::setw(2)
<< std::setfill('0') << seconds_elapsed << std::endl;
std::cout << "Average processing speed: " << std::fixed << std::setprecision(2)
<< average_speed_fps << " FPS" << std::endl;

// Print additional information if not in benchmark mode
if (!arguments.benchmark) {
printf("Output written to: %s\n", arguments.out_fname.u8string().c_str());
std::cout << "Output written to: " << arguments.out_fname.u8string() << std::endl;
}

return 0;
Expand Down

0 comments on commit 324c238

Please sign in to comment.