feat: added hwaccel for encoder and decoder

Signed-off-by: k4yt3x <[email protected]>
k4yt3x · Oct 8, 2024 · 7c20feb · 7c20feb
1 parent a2e590f
commit 7c20feb
Show file tree

Hide file tree

Showing 15 changed files with 286 additions and 145 deletions.
diff --git a/include/decoder.h b/include/decoder.h
@@ -2,10 +2,12 @@
 #define DECODER_H
 
 #include <libavcodec/avcodec.h>
-#include <libavfilter/avfilter.h>
 #include <libavformat/avformat.h>
+#include <libavutil/buffer.h>
 
 int init_decoder(
+    AVHWDeviceType hw_type,
+    AVBufferRef *hw_ctx,
     const char *input_filename,
     AVFormatContext **fmt_ctx,
     AVCodecContext **dec_ctx,

diff --git a/include/encoder.h b/include/encoder.h
@@ -3,10 +3,12 @@
 
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
+#include <libavutil/buffer.h>
 
 #include "libvideo2x.h"
 
 int init_encoder(
+    AVBufferRef *hw_ctx,
     const char *output_filename,
     AVFormatContext **ofmt_ctx,
     AVCodecContext **enc_ctx,

diff --git a/include/filter.h b/include/filter.h
@@ -6,14 +6,15 @@
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavfilter/avfilter.h>
+#include <libavutil/buffer.h>
 }
 
 // Abstract base class for filters
 class Filter {
    public:
     virtual ~Filter() {}
-    virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) = 0;
-    virtual AVFrame *process_frame(AVFrame *input_frame) = 0;
+    virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0;
+    virtual int process_frame(AVFrame *input_frame, AVFrame **output_frame) = 0;
     virtual int flush(std::vector<AVFrame *> &processed_frames) = 0;
 };
 

diff --git a/include/libplacebo.h b/include/libplacebo.h
@@ -8,10 +8,10 @@
 #include <libavutil/buffer.h>
 
 int init_libplacebo(
+    AVBufferRef *hw_ctx,
     AVFilterGraph **filter_graph,
     AVFilterContext **buffersrc_ctx,
     AVFilterContext **buffersink_ctx,
-    AVBufferRef **device_ctx,
     AVCodecContext *dec_ctx,
     int output_width,
     int output_height,

diff --git a/include/libplacebo_filter.h b/include/libplacebo_filter.h
@@ -4,6 +4,7 @@
 #include <filesystem>
 
 #include <libavutil/buffer.h>
+#include <libavutil/frame.h>
 
 #include "filter.h"
 
@@ -13,7 +14,6 @@ class LibplaceboFilter : public Filter {
     AVFilterGraph *filter_graph;
     AVFilterContext *buffersrc_ctx;
     AVFilterContext *buffersink_ctx;
-    AVBufferRef *device_ctx;
     int output_width;
     int output_height;
     const std::filesystem::path shader_path;
@@ -27,10 +27,10 @@ class LibplaceboFilter : public Filter {
     virtual ~LibplaceboFilter();
 
     // Initializes the filter with decoder and encoder contexts
-    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override;
+    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
 
     // Processes an input frame and returns the processed frame
-    AVFrame *process_frame(AVFrame *input_frame) override;
+    int process_frame(AVFrame *input_frame, AVFrame **output_frame) override;
 
     // Flushes any remaining frames
     int flush(std::vector<AVFrame *> &processed_frames) override;

diff --git a/include/libvideo2x.h b/include/libvideo2x.h
@@ -1,12 +1,13 @@
 #ifndef LIBVIDEO2X_H
 #define LIBVIDEO2X_H
 
-#include <libavutil/pixfmt.h>
 #include <stdint.h>
 #include <time.h>
 
 #include <libavcodec/avcodec.h>
 #include <libavcodec/codec_id.h>
+#include <libavutil/hwcontext.h>
+#include <libavutil/pixfmt.h>
 
 #ifdef _WIN32
 #ifdef LIBVIDEO2X_EXPORTS
@@ -74,6 +75,7 @@ struct ProcessingStatus {
 LIBVIDEO2X_API int process_video(
     const char *input_filename,
     const char *output_filename,
+    enum AVHWDeviceType hw_device_type,
     const struct FilterConfig *filter_config,
     struct EncoderConfig *encoder_config,
     struct ProcessingStatus *status

diff --git a/include/realesrgan_filter.h b/include/realesrgan_filter.h
@@ -3,6 +3,9 @@
 
 #include <filesystem>
 
+#include <libavutil/buffer.h>
+#include <libavutil/frame.h>
+
 #include "filter.h"
 #include "realesrgan.h"
 
@@ -35,10 +38,10 @@ class RealesrganFilter : public Filter {
     virtual ~RealesrganFilter();
 
     // Initializes the filter with decoder and encoder contexts
-    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override;
+    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
 
     // Processes an input frame and returns the processed frame
-    AVFrame *process_frame(AVFrame *input_frame) override;
+    int process_frame(AVFrame *input_frame, AVFrame **output_frame) override;
 
     // Flushes any remaining frames (if necessary)
     int flush(std::vector<AVFrame *> &processed_frames) override;

diff --git a/src/conversions.cpp b/src/conversions.cpp
@@ -77,10 +77,11 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
         converted_frame = convert_avframe_pix_fmt(frame, AV_PIX_FMT_BGR24);
         if (!converted_frame) {
             fprintf(stderr, "Failed to convert AVFrame to BGR24.\n");
-            return ncnn::Mat();  // Return an empty ncnn::Mat on failure
+            return ncnn::Mat();
         }
     } else {
-        converted_frame = frame;  // If the frame is already in BGR24, use it directly
+        // If the frame is already in BGR24, use it directly
+        converted_frame = frame;
     }
 
     // Allocate a new ncnn::Mat and copy the data
@@ -146,10 +147,7 @@ AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) {
         return nullptr;
     }
 
-    // Copy data from ncnn::Mat to the BGR AVFrame
-    // mat.to_pixels(bgr_frame->data[0], ncnn::Mat::PIXEL_BGR);
-
-    // Manually copy the pixel data from ncnn::Mat to the BGR AVFrame
+    // Copy the pixel data from ncnn::Mat to the BGR AVFrame
     for (int y = 0; y < mat.h; y++) {
         uint8_t *dst_row = bgr_frame->data[0] + y * bgr_frame->linesize[0];
         const uint8_t *src_row = mat.row<const uint8_t>(y);

diff --git a/src/decoder.cpp b/src/decoder.cpp
@@ -8,12 +8,28 @@ extern "C" {
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavformat/avformat.h>
+#include <libavutil/hwcontext.h>
 #include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 #include <libavutil/rational.h>
 }
 
+static enum AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
+
+// Callback function to choose the hardware-accelerated pixel format
+static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) {
+    for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
+        if (*p == hw_pix_fmt) {
+            return *p;
+        }
+    }
+    fprintf(stderr, "Failed to get HW surface format.\n");
+    return AV_PIX_FMT_NONE;
+}
+
 int init_decoder(
+    AVHWDeviceType hw_type,
+    AVBufferRef *hw_ctx,
     const char *input_filename,
     AVFormatContext **fmt_ctx,
     AVCodecContext **dec_ctx,
@@ -44,18 +60,45 @@ int init_decoder(
     AVStream *video_stream = ifmt_ctx->streams[stream_index];
 
     // Set up the decoder
-    const AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id);
-    if (!dec) {
+    const AVCodec *decoder = avcodec_find_decoder(video_stream->codecpar->codec_id);
+    if (!decoder) {
         fprintf(stderr, "Failed to find decoder for stream #%u\n", stream_index);
         return AVERROR_DECODER_NOT_FOUND;
     }
 
-    codec_ctx = avcodec_alloc_context3(dec);
+    codec_ctx = avcodec_alloc_context3(decoder);
     if (!codec_ctx) {
         fprintf(stderr, "Failed to allocate the decoder context\n");
         return AVERROR(ENOMEM);
     }
 
+    // Set hardware device context
+    if (hw_ctx != nullptr) {
+        codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
+        codec_ctx->get_format = get_hw_format;
+
+        // Automatically determine the hardware pixel format
+        for (int i = 0;; i++) {
+            const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
+            if (config == nullptr) {
+                fprintf(
+                    stderr,
+                    "Decoder %s does not support device type %s.\n",
+                    decoder->name,
+                    av_hwdevice_get_type_name(hw_type)
+                );
+                avcodec_free_context(&codec_ctx);
+                avformat_close_input(&ifmt_ctx);
+                return AVERROR(ENOSYS);
+            }
+            if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
+                config->device_type == hw_type) {
+                hw_pix_fmt = config->pix_fmt;
+                break;
+            }
+        }
+    }
+
     if ((ret = avcodec_parameters_to_context(codec_ctx, video_stream->codecpar)) < 0) {
         fprintf(stderr, "Failed to copy decoder parameters to input decoder context\n");
         return ret;
@@ -66,7 +109,7 @@ int init_decoder(
     codec_ctx->pkt_timebase = video_stream->time_base;
     codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, video_stream, NULL);
 
-    if ((ret = avcodec_open2(codec_ctx, dec, NULL)) < 0) {
+    if ((ret = avcodec_open2(codec_ctx, decoder, NULL)) < 0) {
         fprintf(stderr, "Failed to open decoder for stream #%u\n", stream_index);
         return ret;
     }

diff --git a/src/encoder.cpp b/src/encoder.cpp
@@ -10,15 +10,27 @@ extern "C" {
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavformat/avformat.h>
+#include <libavutil/buffer.h>
 #include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
+#include <libavutil/pixfmt.h>
 #include <libavutil/rational.h>
 }
 
 #include "conversions.h"
 #include "libvideo2x.h"
 
+static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) {
+    const enum AVPixelFormat *p = encoder->pix_fmts;
+    if (!p) {
+        fprintf(stderr, "No pixel formats supported by encoder\n");
+        return AV_PIX_FMT_NONE;
+    }
+    return *p;
+}
+
 int init_encoder(
+    AVBufferRef *hw_ctx,
     const char *output_filename,
     AVFormatContext **ofmt_ctx,
     AVCodecContext **enc_ctx,
@@ -36,8 +48,8 @@ int init_encoder(
     }
 
     // Create a new video stream
-    const AVCodec *enc = avcodec_find_encoder(encoder_config->codec);
-    if (!enc) {
+    const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec);
+    if (!encoder) {
         fprintf(stderr, "Necessary encoder not found\n");
         return AVERROR_ENCODER_NOT_FOUND;
     }
@@ -48,41 +60,57 @@ int init_encoder(
         return AVERROR_UNKNOWN;
     }
 
-    codec_ctx = avcodec_alloc_context3(enc);
+    codec_ctx = avcodec_alloc_context3(encoder);
     if (!codec_ctx) {
         fprintf(stderr, "Failed to allocate the encoder context\n");
         return AVERROR(ENOMEM);
     }
 
+    // Set hardware device context
+    if (hw_ctx != nullptr) {
+        codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
+    }
+
     // Set encoding parameters
     codec_ctx->height = encoder_config->output_height;
     codec_ctx->width = encoder_config->output_width;
     codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
-    codec_ctx->pix_fmt = encoder_config->pix_fmt;
     codec_ctx->time_base = av_inv_q(dec_ctx->framerate);
 
+    if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
+        // Use the specified pixel format
+        codec_ctx->pix_fmt = encoder_config->pix_fmt;
+    } else {
+        // Fall back to the default pixel format
+        codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder);
+        if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) {
+            fprintf(stderr, "Could not get the default pixel format for the encoder\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
     if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) {
         codec_ctx->time_base = av_inv_q(av_guess_frame_rate(fmt_ctx, out_stream, NULL));
     }
 
-    // Set the bit rate and other encoder parameters if needed
+    // Set the bit rate and other encoder parameters
     codec_ctx->bit_rate = encoder_config->bit_rate;
     codec_ctx->gop_size = 60;     // Keyframe interval
     codec_ctx->max_b_frames = 3;  // B-frames
     codec_ctx->keyint_min = 60;   // Maximum GOP size
 
     char crf_str[16];
     snprintf(crf_str, sizeof(crf_str), "%.f", encoder_config->crf);
-    if (encoder_config->codec == AV_CODEC_ID_H264 || encoder_config->codec == AV_CODEC_ID_HEVC) {
-        av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0);
-        av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
-    }
+
+    // Set the CRF and preset for any codecs that support it
+    av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0);
+    av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
 
     if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) {
         codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
     }
 
-    if ((ret = avcodec_open2(codec_ctx, enc, NULL)) < 0) {
+    if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) {
         fprintf(stderr, "Cannot open video encoder\n");
         return ret;
     }