diff --git a/include/decoder.h b/include/decoder.h index dc55d6dbd..1315d9f29 100644 --- a/include/decoder.h +++ b/include/decoder.h @@ -2,10 +2,12 @@ #define DECODER_H #include -#include #include +#include int init_decoder( + AVHWDeviceType hw_type, + AVBufferRef *hw_ctx, const char *input_filename, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, diff --git a/include/encoder.h b/include/encoder.h index a8284705a..72296a85b 100644 --- a/include/encoder.h +++ b/include/encoder.h @@ -3,10 +3,12 @@ #include #include +#include #include "libvideo2x.h" int init_encoder( + AVBufferRef *hw_ctx, const char *output_filename, AVFormatContext **ofmt_ctx, AVCodecContext **enc_ctx, diff --git a/include/filter.h b/include/filter.h index 9179e2d2f..a77d373d9 100644 --- a/include/filter.h +++ b/include/filter.h @@ -6,14 +6,15 @@ extern "C" { #include #include +#include } // Abstract base class for filters class Filter { public: virtual ~Filter() {} - virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) = 0; - virtual AVFrame *process_frame(AVFrame *input_frame) = 0; + virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0; + virtual int process_frame(AVFrame *input_frame, AVFrame **output_frame) = 0; virtual int flush(std::vector &processed_frames) = 0; }; diff --git a/include/libplacebo.h b/include/libplacebo.h index 71e3591af..8e618d441 100644 --- a/include/libplacebo.h +++ b/include/libplacebo.h @@ -8,10 +8,10 @@ #include int init_libplacebo( + AVBufferRef *hw_ctx, AVFilterGraph **filter_graph, AVFilterContext **buffersrc_ctx, AVFilterContext **buffersink_ctx, - AVBufferRef **device_ctx, AVCodecContext *dec_ctx, int output_width, int output_height, diff --git a/include/libplacebo_filter.h b/include/libplacebo_filter.h index a36dd21ee..6ceddd84f 100644 --- a/include/libplacebo_filter.h +++ b/include/libplacebo_filter.h @@ -4,6 +4,7 @@ #include #include +#include #include "filter.h" @@ -13,7 +14,6 @@ class LibplaceboFilter : public Filter { AVFilterGraph *filter_graph; AVFilterContext *buffersrc_ctx; AVFilterContext *buffersink_ctx; - AVBufferRef *device_ctx; int output_width; int output_height; const std::filesystem::path shader_path; @@ -27,10 +27,10 @@ class LibplaceboFilter : public Filter { virtual ~LibplaceboFilter(); // Initializes the filter with decoder and encoder contexts - int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override; + int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override; // Processes an input frame and returns the processed frame - AVFrame *process_frame(AVFrame *input_frame) override; + int process_frame(AVFrame *input_frame, AVFrame **output_frame) override; // Flushes any remaining frames int flush(std::vector &processed_frames) override; diff --git a/include/libvideo2x.h b/include/libvideo2x.h index 33c117621..fa3a270f8 100644 --- a/include/libvideo2x.h +++ b/include/libvideo2x.h @@ -1,12 +1,13 @@ #ifndef LIBVIDEO2X_H #define LIBVIDEO2X_H -#include #include #include #include #include +#include +#include #ifdef _WIN32 #ifdef LIBVIDEO2X_EXPORTS @@ -74,6 +75,7 @@ struct ProcessingStatus { LIBVIDEO2X_API int process_video( const char *input_filename, const char *output_filename, + enum AVHWDeviceType hw_device_type, const struct FilterConfig *filter_config, struct EncoderConfig *encoder_config, struct ProcessingStatus *status diff --git a/include/realesrgan_filter.h b/include/realesrgan_filter.h index 60b58df00..6ebf578b0 100644 --- a/include/realesrgan_filter.h +++ b/include/realesrgan_filter.h @@ -3,6 +3,9 @@ #include +#include +#include + #include "filter.h" #include "realesrgan.h" @@ -35,10 +38,10 @@ class RealesrganFilter : public Filter { virtual ~RealesrganFilter(); // Initializes the filter with decoder and encoder contexts - int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override; + int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override; // Processes an input frame and returns the processed frame - AVFrame *process_frame(AVFrame *input_frame) override; + int process_frame(AVFrame *input_frame, AVFrame **output_frame) override; // Flushes any remaining frames (if necessary) int flush(std::vector &processed_frames) override; diff --git a/src/conversions.cpp b/src/conversions.cpp index 21ff5d3df..b79a72e3b 100644 --- a/src/conversions.cpp +++ b/src/conversions.cpp @@ -77,10 +77,11 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) { converted_frame = convert_avframe_pix_fmt(frame, AV_PIX_FMT_BGR24); if (!converted_frame) { fprintf(stderr, "Failed to convert AVFrame to BGR24.\n"); - return ncnn::Mat(); // Return an empty ncnn::Mat on failure + return ncnn::Mat(); } } else { - converted_frame = frame; // If the frame is already in BGR24, use it directly + // If the frame is already in BGR24, use it directly + converted_frame = frame; } // Allocate a new ncnn::Mat and copy the data @@ -146,10 +147,7 @@ AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) { return nullptr; } - // Copy data from ncnn::Mat to the BGR AVFrame - // mat.to_pixels(bgr_frame->data[0], ncnn::Mat::PIXEL_BGR); - - // Manually copy the pixel data from ncnn::Mat to the BGR AVFrame + // Copy the pixel data from ncnn::Mat to the BGR AVFrame for (int y = 0; y < mat.h; y++) { uint8_t *dst_row = bgr_frame->data[0] + y * bgr_frame->linesize[0]; const uint8_t *src_row = mat.row(y); diff --git a/src/decoder.cpp b/src/decoder.cpp index 0b3779437..f6bf37222 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -8,12 +8,28 @@ extern "C" { #include #include #include +#include #include #include #include } +static enum AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE; + +// Callback function to choose the hardware-accelerated pixel format +static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) { + for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { + if (*p == hw_pix_fmt) { + return *p; + } + } + fprintf(stderr, "Failed to get HW surface format.\n"); + return AV_PIX_FMT_NONE; +} + int init_decoder( + AVHWDeviceType hw_type, + AVBufferRef *hw_ctx, const char *input_filename, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, @@ -44,18 +60,45 @@ int init_decoder( AVStream *video_stream = ifmt_ctx->streams[stream_index]; // Set up the decoder - const AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id); - if (!dec) { + const AVCodec *decoder = avcodec_find_decoder(video_stream->codecpar->codec_id); + if (!decoder) { fprintf(stderr, "Failed to find decoder for stream #%u\n", stream_index); return AVERROR_DECODER_NOT_FOUND; } - codec_ctx = avcodec_alloc_context3(dec); + codec_ctx = avcodec_alloc_context3(decoder); if (!codec_ctx) { fprintf(stderr, "Failed to allocate the decoder context\n"); return AVERROR(ENOMEM); } + // Set hardware device context + if (hw_ctx != nullptr) { + codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); + codec_ctx->get_format = get_hw_format; + + // Automatically determine the hardware pixel format + for (int i = 0;; i++) { + const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i); + if (config == nullptr) { + fprintf( + stderr, + "Decoder %s does not support device type %s.\n", + decoder->name, + av_hwdevice_get_type_name(hw_type) + ); + avcodec_free_context(&codec_ctx); + avformat_close_input(&ifmt_ctx); + return AVERROR(ENOSYS); + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && + config->device_type == hw_type) { + hw_pix_fmt = config->pix_fmt; + break; + } + } + } + if ((ret = avcodec_parameters_to_context(codec_ctx, video_stream->codecpar)) < 0) { fprintf(stderr, "Failed to copy decoder parameters to input decoder context\n"); return ret; @@ -66,7 +109,7 @@ int init_decoder( codec_ctx->pkt_timebase = video_stream->time_base; codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, video_stream, NULL); - if ((ret = avcodec_open2(codec_ctx, dec, NULL)) < 0) { + if ((ret = avcodec_open2(codec_ctx, decoder, NULL)) < 0) { fprintf(stderr, "Failed to open decoder for stream #%u\n", stream_index); return ret; } diff --git a/src/encoder.cpp b/src/encoder.cpp index 40abc8d92..cc2a35bc4 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -10,15 +10,27 @@ extern "C" { #include #include #include +#include #include #include +#include #include } #include "conversions.h" #include "libvideo2x.h" +static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) { + const enum AVPixelFormat *p = encoder->pix_fmts; + if (!p) { + fprintf(stderr, "No pixel formats supported by encoder\n"); + return AV_PIX_FMT_NONE; + } + return *p; +} + int init_encoder( + AVBufferRef *hw_ctx, const char *output_filename, AVFormatContext **ofmt_ctx, AVCodecContext **enc_ctx, @@ -36,8 +48,8 @@ int init_encoder( } // Create a new video stream - const AVCodec *enc = avcodec_find_encoder(encoder_config->codec); - if (!enc) { + const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec); + if (!encoder) { fprintf(stderr, "Necessary encoder not found\n"); return AVERROR_ENCODER_NOT_FOUND; } @@ -48,24 +60,40 @@ int init_encoder( return AVERROR_UNKNOWN; } - codec_ctx = avcodec_alloc_context3(enc); + codec_ctx = avcodec_alloc_context3(encoder); if (!codec_ctx) { fprintf(stderr, "Failed to allocate the encoder context\n"); return AVERROR(ENOMEM); } + // Set hardware device context + if (hw_ctx != nullptr) { + codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); + } + // Set encoding parameters codec_ctx->height = encoder_config->output_height; codec_ctx->width = encoder_config->output_width; codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio; - codec_ctx->pix_fmt = encoder_config->pix_fmt; codec_ctx->time_base = av_inv_q(dec_ctx->framerate); + if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) { + // Use the specified pixel format + codec_ctx->pix_fmt = encoder_config->pix_fmt; + } else { + // Fall back to the default pixel format + codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder); + if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) { + fprintf(stderr, "Could not get the default pixel format for the encoder\n"); + return AVERROR(EINVAL); + } + } + if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) { codec_ctx->time_base = av_inv_q(av_guess_frame_rate(fmt_ctx, out_stream, NULL)); } - // Set the bit rate and other encoder parameters if needed + // Set the bit rate and other encoder parameters codec_ctx->bit_rate = encoder_config->bit_rate; codec_ctx->gop_size = 60; // Keyframe interval codec_ctx->max_b_frames = 3; // B-frames @@ -73,16 +101,16 @@ int init_encoder( char crf_str[16]; snprintf(crf_str, sizeof(crf_str), "%.f", encoder_config->crf); - if (encoder_config->codec == AV_CODEC_ID_H264 || encoder_config->codec == AV_CODEC_ID_HEVC) { - av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0); - av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0); - } + + // Set the CRF and preset for any codecs that support it + av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0); + av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0); if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) { codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } - if ((ret = avcodec_open2(codec_ctx, enc, NULL)) < 0) { + if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) { fprintf(stderr, "Cannot open video encoder\n"); return ret; } diff --git a/src/libplacebo.cpp b/src/libplacebo.cpp index 798119211..8f51053aa 100644 --- a/src/libplacebo.cpp +++ b/src/libplacebo.cpp @@ -19,10 +19,10 @@ extern "C" { #include "fsutils.h" int init_libplacebo( + AVBufferRef *hw_ctx, AVFilterGraph **filter_graph, AVFilterContext **buffersrc_ctx, AVFilterContext **buffersink_ctx, - AVBufferRef **device_ctx, AVCodecContext *dec_ctx, int output_width, int output_height, @@ -31,14 +31,6 @@ int init_libplacebo( char args[512]; int ret; - // Initialize the Vulkan hardware device - AVBufferRef *hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN); - ret = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VULKAN, NULL, NULL, 0); - if (ret < 0) { - fprintf(stderr, "Unable to initialize Vulkan device\n"); - return ret; - } - AVFilterGraph *graph = avfilter_graph_alloc(); if (!graph) { fprintf(stderr, "Unable to create filter graph.\n"); @@ -67,7 +59,6 @@ int init_libplacebo( ret = avfilter_graph_create_filter(buffersrc_ctx, buffersrc, "in", args, NULL, graph); if (ret < 0) { fprintf(stderr, "Cannot create buffer source\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -78,7 +69,6 @@ int init_libplacebo( const AVFilter *libplacebo_filter = avfilter_get_by_name("libplacebo"); if (!libplacebo_filter) { fprintf(stderr, "Filter 'libplacebo' not found\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return AVERROR_FILTER_NOT_FOUND; } @@ -108,19 +98,19 @@ int init_libplacebo( ); if (ret < 0) { fprintf(stderr, "Cannot create libplacebo filter\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } // Set the hardware device context to Vulkan - libplacebo_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx); + if (hw_ctx != nullptr) { + libplacebo_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); + } // Link buffersrc to libplacebo ret = avfilter_link(last_filter, 0, libplacebo_ctx, 0); if (ret < 0) { fprintf(stderr, "Error connecting buffersrc to libplacebo filter\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -132,7 +122,6 @@ int init_libplacebo( ret = avfilter_graph_create_filter(buffersink_ctx, buffersink, "out", NULL, NULL, graph); if (ret < 0) { fprintf(stderr, "Cannot create buffer sink\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -141,7 +130,6 @@ int init_libplacebo( ret = avfilter_link(last_filter, 0, *buffersink_ctx, 0); if (ret < 0) { fprintf(stderr, "Error connecting libplacebo filter to buffersink\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -150,12 +138,10 @@ int init_libplacebo( ret = avfilter_graph_config(graph, NULL); if (ret < 0) { fprintf(stderr, "Error configuring the filter graph\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } *filter_graph = graph; - *device_ctx = hw_device_ctx; return 0; } diff --git a/src/libplacebo_filter.cpp b/src/libplacebo_filter.cpp index b1758bd3f..0bb5b3ceb 100644 --- a/src/libplacebo_filter.cpp +++ b/src/libplacebo_filter.cpp @@ -15,7 +15,6 @@ LibplaceboFilter::LibplaceboFilter(int width, int height, const std::filesystem: : filter_graph(nullptr), buffersrc_ctx(nullptr), buffersink_ctx(nullptr), - device_ctx(nullptr), output_width(width), output_height(height), shader_path(std::move(shader_path)) {} @@ -29,17 +28,13 @@ LibplaceboFilter::~LibplaceboFilter() { avfilter_free(buffersink_ctx); buffersink_ctx = nullptr; } - if (device_ctx) { - av_buffer_unref(&device_ctx); - device_ctx = nullptr; - } if (filter_graph) { avfilter_graph_free(&filter_graph); filter_graph = nullptr; } } -int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { +int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) { // Construct the shader path std::filesystem::path shader_full_path; if (filepath_is_readable(shader_path)) { @@ -51,14 +46,20 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { find_resource_file(std::filesystem::path("models") / (shader_path.string() + ".glsl")); } + // Check if the shader file exists + if (!std::filesystem::exists(shader_full_path)) { + fprintf(stderr, "libplacebo shader file not found: %s\n", shader_full_path.c_str()); + return -1; + } + // Save the output time base output_time_base = enc_ctx->time_base; return init_libplacebo( + hw_ctx, &filter_graph, &buffersrc_ctx, &buffersink_ctx, - &device_ctx, dec_ctx, output_width, output_height, @@ -66,45 +67,39 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { ); } -AVFrame *LibplaceboFilter::process_frame(AVFrame *input_frame) { +int LibplaceboFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) { int ret; // Get the filtered frame - AVFrame *output_frame = av_frame_alloc(); - if (output_frame == nullptr) { + *output_frame = av_frame_alloc(); + if (*output_frame == nullptr) { fprintf(stderr, "Failed to allocate output frame\n"); - return nullptr; + return -1; } // Feed the frame to the filter graph ret = av_buffersrc_add_frame(buffersrc_ctx, input_frame); if (ret < 0) { fprintf(stderr, "Error while feeding the filter graph\n"); - return nullptr; + return ret; } - ret = av_buffersink_get_frame(buffersink_ctx, output_frame); + ret = av_buffersink_get_frame(buffersink_ctx, *output_frame); if (ret < 0) { - av_frame_free(&output_frame); - if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - av_strerror(ret, errbuf, sizeof(errbuf)); - fprintf(stderr, "Error getting frame from filter graph: %s\n", errbuf); - return nullptr; - } - return (AVFrame *)-1; + av_frame_free(output_frame); + return ret; } // Rescale PTS to encoder's time base - output_frame->pts = - av_rescale_q(output_frame->pts, buffersink_ctx->inputs[0]->time_base, output_time_base); + (*output_frame)->pts = + av_rescale_q((*output_frame)->pts, buffersink_ctx->inputs[0]->time_base, output_time_base); // Return the processed frame to the caller - return output_frame; + return 0; } int LibplaceboFilter::flush(std::vector &processed_frames) { - int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr); // Signal EOF to the filter graph + int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr); if (ret < 0) { fprintf(stderr, "Error while flushing filter graph\n"); return ret; diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 181d9e2ac..e7b274eac 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -16,10 +16,21 @@ extern "C" { #include "libvideo2x.h" #include "realesrgan_filter.h" -// Function to process frames using the selected filter (same as before) +/** + * @brief Process frames using the selected filter. + * + * @param[in,out] status Struct containing the processing status + * @param[in] fmt_ctx Input format context + * @param[in] ofmt_ctx Output format context + * @param[in] dec_ctx Decoder context + * @param[in] enc_ctx Encoder context + * @param[in] filter Filter instance + * @param[in] video_stream_index Index of the video stream in the input format context + * @return int 0 on success, negative value on error + */ int process_frames( ProcessingStatus *status, - AVFormatContext *fmt_ctx, + AVFormatContext *ifmt_ctx, AVFormatContext *ofmt_ctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, @@ -32,7 +43,7 @@ int process_frames( char errbuf[AV_ERROR_MAX_STRING_SIZE]; // Get the total number of frames in the video - AVStream *video_stream = fmt_ctx->streams[video_stream_index]; + AVStream *video_stream = ifmt_ctx->streams[video_stream_index]; status->total_frames = video_stream->nb_frames; // If nb_frames is not set, calculate total frames using duration and frame rate @@ -58,7 +69,7 @@ int process_frames( // Read frames from the input file while (1) { - ret = av_read_frame(fmt_ctx, &packet); + ret = av_read_frame(ifmt_ctx, &packet); if (ret < 0) { break; // End of file or error } @@ -85,8 +96,9 @@ int process_frames( } // Process the frame using the selected filter - AVFrame *processed_frame = filter->process_frame(frame); - if (processed_frame != nullptr && processed_frame != (AVFrame *)-1) { + AVFrame *processed_frame = nullptr; + ret = filter->process_frame(frame, &processed_frame); + if (ret == 0 && processed_frame != nullptr) { // Encode and write the processed frame ret = encode_and_write_frame(processed_frame, enc_ctx, ofmt_ctx); if (ret < 0) { @@ -98,8 +110,8 @@ int process_frames( av_frame_free(&processed_frame); status->processed_frames++; - } else if (processed_frame != (AVFrame *)-1) { - fprintf(stderr, "Error processing frame\n"); + } else if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) { + fprintf(stderr, "Filter returned an error\n"); goto end; } @@ -163,16 +175,23 @@ int process_frames( return ret; } -// Cleanup helper function +// Cleanup resources after processing the video void cleanup( - AVFormatContext *fmt_ctx, + AVFormatContext *ifmt_ctx, AVFormatContext *ofmt_ctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, + AVBufferRef *hw_ctx, Filter *filter ) { - if (filter) { - delete filter; + if (ifmt_ctx) { + avformat_close_input(&ifmt_ctx); + } + if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) { + avio_closep(&ofmt_ctx->pb); + } + if (ofmt_ctx) { + avformat_free_context(ofmt_ctx); } if (dec_ctx) { avcodec_free_context(&dec_ctx); @@ -180,38 +199,57 @@ void cleanup( if (enc_ctx) { avcodec_free_context(&enc_ctx); } - if (fmt_ctx) { - avformat_close_input(&fmt_ctx); - } - if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) { - avio_closep(&ofmt_ctx->pb); + if (hw_ctx) { + av_buffer_unref(&hw_ctx); } - if (ofmt_ctx) { - avformat_free_context(ofmt_ctx); + if (filter) { + delete filter; } } -// Main function to process the video +/** + * @brief Process a video file using the selected filter and encoder settings. + * + * @param[in] input_filename Path to the input video file + * @param[in] output_filename Path to the output video file + * @param[in] hw_type Hardware device type + * @param[in] filter_config Filter configurations + * @param[in] encoder_config Encoder configurations + * @param[in,out] status Video processing status + * @return int 0 on success, non-zero value on error + */ extern "C" int process_video( const char *input_filename, const char *output_filename, + AVHWDeviceType hw_type, const FilterConfig *filter_config, EncoderConfig *encoder_config, ProcessingStatus *status ) { - AVFormatContext *fmt_ctx = nullptr; + AVFormatContext *ifmt_ctx = nullptr; AVFormatContext *ofmt_ctx = nullptr; AVCodecContext *dec_ctx = nullptr; AVCodecContext *enc_ctx = nullptr; + AVBufferRef *hw_ctx = nullptr; Filter *filter = nullptr; int video_stream_index = -1; - int ret = 0; // Initialize ret with 0 to assume success + int ret = 0; + + // Initialize hardware device context + if (hw_type != AV_HWDEVICE_TYPE_NONE) { + ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0); + if (ret < 0) { + fprintf(stderr, "Unable to initialize hardware device context\n"); + return ret; + } + } // Initialize input - if (init_decoder(input_filename, &fmt_ctx, &dec_ctx, &video_stream_index) < 0) { + ret = init_decoder(hw_type, hw_ctx, input_filename, &ifmt_ctx, &dec_ctx, &video_stream_index); + if (ret < 0) { fprintf(stderr, "Failed to initialize decoder\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return ret; } // Initialize output based on Libplacebo or RealESRGAN configuration @@ -230,17 +268,19 @@ extern "C" int process_video( // Initialize output encoder encoder_config->output_width = output_width; encoder_config->output_height = output_height; - if (init_encoder(output_filename, &ofmt_ctx, &enc_ctx, dec_ctx, encoder_config) < 0) { + ret = init_encoder(hw_ctx, output_filename, &ofmt_ctx, &enc_ctx, dec_ctx, encoder_config); + if (ret < 0) { fprintf(stderr, "Failed to initialize encoder\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return ret; } // Write the output file header - if (avformat_write_header(ofmt_ctx, NULL) < 0) { + ret = avformat_write_header(ofmt_ctx, NULL); + if (ret < 0) { fprintf(stderr, "Error occurred when opening output file\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return ret; } // Create and initialize the appropriate filter @@ -251,15 +291,15 @@ extern "C" int process_video( // Validate shader path if (!config.shader_path) { fprintf(stderr, "Shader path must be provided for the libplacebo filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return -1; } // Validate output dimensions if (config.output_width <= 0 || config.output_height <= 0) { fprintf(stderr, "Output dimensions must be provided for the libplacebo filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return -1; } filter = new LibplaceboFilter( @@ -273,15 +313,15 @@ extern "C" int process_video( // Validate model name if (!config.model) { fprintf(stderr, "Model name must be provided for the RealESRGAN filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return -1; } // Validate scaling factor if (config.scaling_factor <= 0) { fprintf(stderr, "Scaling factor must be provided for the RealESRGAN filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return -1; } filter = new RealesrganFilter( @@ -291,37 +331,37 @@ extern "C" int process_video( } default: fprintf(stderr, "Unknown filter type\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return -1; } // Initialize the filter - if (filter->init(dec_ctx, enc_ctx) < 0) { + ret = filter->init(dec_ctx, enc_ctx, hw_ctx); + if (ret < 0) { fprintf(stderr, "Failed to initialize filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return ret; } // Process frames - if ((ret = - process_frames(status, fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter, video_stream_index) - ) < 0) { + ret = process_frames(status, ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter, video_stream_index); + if (ret < 0) { fprintf(stderr, "Error processing frames\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); + return ret; } // Write the output file trailer av_write_trailer(ofmt_ctx); // Cleanup before returning - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, filter); if (ret < 0 && ret != AVERROR_EOF) { char errbuf[AV_ERROR_MAX_STRING_SIZE]; av_strerror(ret, errbuf, sizeof(errbuf)); fprintf(stderr, "Error occurred: %s\n", errbuf); - return 1; + return ret; } return 0; } diff --git a/src/realesrgan_filter.cpp b/src/realesrgan_filter.cpp index 7ccc2dcfb..ffedec361 100644 --- a/src/realesrgan_filter.cpp +++ b/src/realesrgan_filter.cpp @@ -6,6 +6,7 @@ extern "C" { #include #include +#include #include } @@ -37,7 +38,7 @@ RealesrganFilter::~RealesrganFilter() { } } -int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { +int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) { // Construct the model paths using std::filesystem std::filesystem::path model_param_path; std::filesystem::path model_bin_path; @@ -62,6 +63,18 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { std::filesystem::path model_param_full_path = find_resource_file(model_param_path); std::filesystem::path model_bin_full_path = find_resource_file(model_bin_path); + // Check if the model files exist + if (!std::filesystem::exists(model_param_full_path)) { + fprintf( + stderr, "RealESRGAN model param file not found: %s\n", model_param_full_path.c_str() + ); + return -1; + } + if (!std::filesystem::exists(model_bin_full_path)) { + fprintf(stderr, "RealESRGAN model bin file not found: %s\n", model_bin_full_path.c_str()); + return -1; + } + // Create a new RealESRGAN instance realesrgan = new RealESRGAN(gpuid, tta_mode); @@ -95,12 +108,14 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { return 0; } -AVFrame *RealesrganFilter::process_frame(AVFrame *input_frame) { +int RealesrganFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) { + int ret; + // Convert the input frame to RGB24 ncnn::Mat input_mat = avframe_to_ncnn_mat(input_frame); if (input_mat.empty()) { fprintf(stderr, "Failed to convert AVFrame to ncnn::Mat\n"); - return nullptr; + return -1; } // Allocate space for ouptut ncnn::Mat @@ -108,19 +123,20 @@ AVFrame *RealesrganFilter::process_frame(AVFrame *input_frame) { int output_height = input_mat.h * realesrgan->scale; ncnn::Mat output_mat = ncnn::Mat(output_width, output_height, (size_t)3, 3); - if (realesrgan->process(input_mat, output_mat) != 0) { + ret = realesrgan->process(input_mat, output_mat); + if (ret != 0) { fprintf(stderr, "RealESRGAN processing failed\n"); - return nullptr; + return ret; } // Convert ncnn::Mat to AVFrame - AVFrame *output_frame = ncnn_mat_to_avframe(output_mat, output_pix_fmt); + *output_frame = ncnn_mat_to_avframe(output_mat, output_pix_fmt); // Rescale PTS to encoder's time base - output_frame->pts = av_rescale_q(input_frame->pts, input_time_base, output_time_base); + (*output_frame)->pts = av_rescale_q(input_frame->pts, input_time_base, output_time_base); // Return the processed frame to the caller - return output_frame; + return ret; } int RealesrganFilter::flush(std::vector &processed_frames) { diff --git a/src/video2x.c b/src/video2x.c index 8ae077c87..2c39deddc 100644 --- a/src/video2x.c +++ b/src/video2x.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -17,8 +18,9 @@ static struct option long_options[] = { {"input", required_argument, NULL, 'i'}, {"output", required_argument, NULL, 'o'}, {"filter", required_argument, NULL, 'f'}, + {"hwaccel", required_argument, NULL, 'a'}, {"version", no_argument, NULL, 'v'}, - {"help", no_argument, NULL, 0}, + {"help", no_argument, NULL, '?'}, // Encoder options {"codec", required_argument, NULL, 'c'}, @@ -45,6 +47,7 @@ struct arguments { const char *input_filename; const char *output_filename; const char *filter_type; + const char *hwaccel; // Encoder options const char *codec; @@ -88,14 +91,15 @@ void print_help() { printf(" -i, --input Input video file path\n"); printf(" -o, --output Output video file path\n"); printf(" -f, --filter Filter to use: 'libplacebo' or 'realesrgan'\n"); + printf(" -a, --hwaccel Hardware acceleration method (default: none)\n"); printf(" -v, --version Print program version\n"); - printf(" --help Display this help page\n"); + printf(" -?, --help Display this help page\n"); printf("\nEncoder Options (Optional):\n"); printf(" -c, --codec Output codec (default: libx264)\n"); printf(" -p, --preset Encoder preset (default: veryslow)\n"); - printf(" -x, --pixfmt Output pixel format (default: yuv420p)\n"); - printf(" -b, --bitrate Bitrate in bits per second (default: 2000000)\n"); + printf(" -x, --pixfmt Output pixel format (default: auto)\n"); + printf(" -b, --bitrate Bitrate in bits per second (default: 0 (VBR))\n"); printf(" -q, --crf Constant Rate Factor (default: 17.0)\n"); printf("\nlibplacebo Options:\n"); @@ -117,12 +121,13 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { arguments->input_filename = NULL; arguments->output_filename = NULL; arguments->filter_type = NULL; + arguments->hwaccel = "none"; // Encoder options arguments->codec = "libx264"; arguments->preset = "veryslow"; - arguments->pix_fmt = "yuv420p"; - arguments->bitrate = 2 * 1000 * 1000; + arguments->pix_fmt = NULL; + arguments->bitrate = 0; arguments->crf = 17.0; // libplacebo options @@ -135,8 +140,9 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { arguments->model = NULL; arguments->scaling_factor = 0; - while ((c = getopt_long(argc, argv, "i:o:f:c:x:p:b:q:s:w:h:r:m:v", long_options, &option_index) - ) != -1) { + while ((c = getopt_long( + argc, argv, "i:o:f:a:c:x:p:b:q:s:w:h:r:m:v", long_options, &option_index + )) != -1) { switch (c) { case 'i': arguments->input_filename = optarg; @@ -147,6 +153,9 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { case 'f': arguments->filter_type = optarg; break; + case 'a': + arguments->hwaccel = optarg; + break; case 'c': arguments->codec = optarg; break; @@ -283,10 +292,13 @@ int main(int argc, char **argv) { } // Parse pixel format to AVPixelFormat - enum AVPixelFormat pix_fmt = av_get_pix_fmt(arguments.pix_fmt); - if (pix_fmt == AV_PIX_FMT_NONE) { - fprintf(stderr, "Error: Invalid pixel format '%s'.\n", arguments.pix_fmt); - return 1; + enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; + if (arguments.pix_fmt) { + pix_fmt = av_get_pix_fmt(arguments.pix_fmt); + if (pix_fmt == AV_PIX_FMT_NONE) { + fprintf(stderr, "Error: Invalid pixel format '%s'.\n", arguments.pix_fmt); + return 1; + } } // Setup encoder configuration @@ -300,6 +312,18 @@ int main(int argc, char **argv) { .crf = arguments.crf, }; + // Parse hardware acceleration method + enum AVHWDeviceType hw_device_type; + if (strcmp(arguments.hwaccel, "none") == 0) { + hw_device_type = AV_HWDEVICE_TYPE_NONE; + } else { + hw_device_type = av_hwdevice_find_type_by_name(arguments.hwaccel); + if (hw_device_type == AV_HWDEVICE_TYPE_NONE) { + fprintf(stderr, "Error: Invalid hardware device type '%s'.\n", arguments.hwaccel); + return 1; + } + } + // Setup struct to store processing status struct ProcessingStatus status = {0}; @@ -307,11 +331,12 @@ int main(int argc, char **argv) { if (process_video( arguments.input_filename, arguments.output_filename, + hw_device_type, &filter_config, &encoder_config, &status )) { - fprintf(stderr, "Video processing failed.\n"); + fprintf(stderr, "Video processing failed\n"); return 1; }