From aba246fb890a35eb8ab3e0e3f924d4644c4af2a8 Mon Sep 17 00:00:00 2001 From: Robert Edmonds Date: Sun, 17 Mar 2024 23:09:46 -0400 Subject: [PATCH] Add H265RtpDepacketizer This commit adds an H265 depacketizer which takes incoming H265 RTP packets and emits H265 access units. It is closely based on the `H264RtpDepacketizer` added by @Sean-Der in https://github.com/paullouisageneau/libdatachannel/pull/1082. I originally started with a version of this commit that was closer to the `H264RtpDepacketizer` and which emitted individual H265 NALUs in `H265RtpDepacketizer::buildFrames()`. This resulted in calling my `Track::onFrame()` callback for each NALU, which did not work well with the decoder that I'm using which wants to see the VPS/SPS/PPS NALUs as a unit before initializing the decoder (https://intel.github.io/libvpl/v2.10/API_ref/VPL_func_vid_decode.html#mfxvideodecode-decodeheader). So for the `H265RtpDepacketizer` I've tried to make it emit access units rather than NALUs. An "access unit" is (RFC 7798): > A set of NAL units that are associated with each other according to a specified classification rule, that are consecutive in decoding order, *and that contain exactly one coded picture.* "Exactly one coded picture" seems to correspond with what a caller might expect an "onFrame" callback to do. Maybe the `H264RtpDepacketizer` should be revised to similarly emit H264 access units rather than NALUs, too. At least, I could not find a way to receive individual NALUs from the depacketizer and run the VPL decoder without needing to do my own buffering/copying of the NALUs. With this commit I can now do the following: * Generate encoded bitstream output from the Intel VPL encoder. * Pass the output of the encoder one frame at a time to libdatachannel's `Track::send()` on a track with an `H265RtpPacketizer` media handler. * Transport the video track over a WebRTC connection to a libdatachannel peer. * Depacketize it with the `H265RtpDepacketizer` media handler in this commit. * Pass the depacketized output via my `Track::onFrame()` callback to the Intel VPL decoder in "complete frame" mode (https://intel.github.io/libvpl/v2.10/API_ref/VPL_enums.html#_CPPv428MFX_BITSTREAM_COMPLETE_FRAME). Each "onFrame" callback corresponds to a single call to the decoder API to decode a frame. --- CMakeLists.txt | 2 + include/rtc/h265rtpdepacketizer.hpp | 43 ++++++++ include/rtc/rtc.hpp | 1 + src/h265nalunit.cpp | 2 +- src/h265rtpdepacketizer.cpp | 152 ++++++++++++++++++++++++++++ 5 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 include/rtc/h265rtpdepacketizer.hpp create mode 100644 src/h265rtpdepacketizer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 93727c63c..55dbeea57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,7 @@ set(LIBDATACHANNEL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/h264rtpdepacketizer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/nalunit.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/h265rtppacketizer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/h265rtpdepacketizer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/h265nalunit.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/av1rtppacketizer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/nalunit.cpp @@ -117,6 +118,7 @@ set(LIBDATACHANNEL_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h264rtpdepacketizer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/nalunit.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265rtppacketizer.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265rtpdepacketizer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265nalunit.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/av1rtppacketizer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/nalunit.hpp diff --git a/include/rtc/h265rtpdepacketizer.hpp b/include/rtc/h265rtpdepacketizer.hpp new file mode 100644 index 000000000..688dc0c91 --- /dev/null +++ b/include/rtc/h265rtpdepacketizer.hpp @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2020 Staz Modrzynski + * Copyright (c) 2020-2024 Paul-Louis Ageneau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +#ifndef RTC_H265_RTP_DEPACKETIZER_H +#define RTC_H265_RTP_DEPACKETIZER_H + +#if RTC_ENABLE_MEDIA + +#include "common.hpp" +#include "mediahandler.hpp" +#include "message.hpp" +#include "rtp.hpp" + +#include + +namespace rtc { + +/// RTP depacketization for H265 +class RTC_CPP_EXPORT H265RtpDepacketizer : public MediaHandler { +public: + H265RtpDepacketizer() = default; + virtual ~H265RtpDepacketizer() = default; + + void incoming(message_vector &messages, const message_callback &send) override; + +private: + std::vector mRtpBuffer; + + message_vector buildFrames(message_vector::iterator firstPkt, message_vector::iterator lastPkt, + uint32_t timestamp); +}; + +} // namespace rtc + +#endif // RTC_ENABLE_MEDIA + +#endif // RTC_H265_RTP_DEPACKETIZER_H diff --git a/include/rtc/rtc.hpp b/include/rtc/rtc.hpp index 683219fc9..02bfc7cd5 100644 --- a/include/rtc/rtc.hpp +++ b/include/rtc/rtc.hpp @@ -32,6 +32,7 @@ #include "h264rtppacketizer.hpp" #include "h264rtpdepacketizer.hpp" #include "h265rtppacketizer.hpp" +#include "h265rtpdepacketizer.hpp" #include "mediahandler.hpp" #include "plihandler.hpp" #include "rtcpnackresponder.hpp" diff --git a/src/h265nalunit.cpp b/src/h265nalunit.cpp index 5fda10545..6f6b7e12a 100644 --- a/src/h265nalunit.cpp +++ b/src/h265nalunit.cpp @@ -34,7 +34,7 @@ H265NalUnitFragment::fragmentsFrom(shared_ptr nalu, uint16_t maxFra auto fragments_count = ceil(double(nalu->size()) / maxFragmentSize); maxFragmentSize = uint16_t(int(ceil(nalu->size() / fragments_count))); - // 3 bytes for FU indicator and FU header + // 3 bytes for NALU header and FU header maxFragmentSize -= (H265_NAL_HEADER_SIZE + H265_FU_HEADER_SIZE); auto f = nalu->forbiddenBit(); uint8_t nuhLayerId = nalu->nuhLayerId() & 0x3F; // 6 bits diff --git a/src/h265rtpdepacketizer.cpp b/src/h265rtpdepacketizer.cpp new file mode 100644 index 000000000..b8a3b4d4a --- /dev/null +++ b/src/h265rtpdepacketizer.cpp @@ -0,0 +1,152 @@ +/** + * Copyright (c) 2023-2024 Paul-Louis Ageneau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +#if RTC_ENABLE_MEDIA + +#include "h265rtpdepacketizer.hpp" +#include "h265nalunit.hpp" + +#include "impl/internals.hpp" + +namespace rtc { + +static const rtc::binary naluStartCode = {byte{0}, byte{0}, byte{0}, byte{1}}; + +static const uint8_t naluTypeAP = 48; +static const uint8_t naluTypeFU = 49; + +message_vector H265RtpDepacketizer::buildFrames(message_vector::iterator begin, + message_vector::iterator end, uint32_t timestamp) { + message_vector out = {}; + auto accessUnit = std::vector{}; + auto frameInfo = std::make_shared(timestamp); + auto nFrags = 0; + auto reassembledHeaderOffset = 0; + + for (auto it = begin; it != end; ++it) { + auto pkt = it->get(); + auto pktParsed = reinterpret_cast(pkt->data()); + auto headerSize = pktParsed->getSize() + pktParsed->getExtensionHeaderSize(); + auto nalUnitHeader = H265NalUnitHeader{std::to_integer(pkt->at(headerSize)), + std::to_integer(pkt->at(headerSize + 1))}; + + if (nFrags != 0 || nalUnitHeader.unitType() == naluTypeFU) { + if (nFrags++ == 0) { + std::copy(naluStartCode.begin(), naluStartCode.end(), + std::back_inserter(accessUnit)); + + // Placeholder for the contents of the reassembled NALU header. + reassembledHeaderOffset = accessUnit.size(); + accessUnit.emplace_back(std::byte(0)); + accessUnit.emplace_back(std::byte(0)); + } + + auto nalUnitFragmentHeader = H265NalUnitFragmentHeader{ + std::to_integer(pkt->at(headerSize + sizeof(H265NalUnitHeader)))}; + + std::copy(pkt->begin() + headerSize + sizeof(H265NalUnitHeader) + + sizeof(H265NalUnitFragmentHeader), + pkt->end(), std::back_inserter(accessUnit)); + + if (nalUnitFragmentHeader.isEnd()) { + auto reassembledNalUnitHeader = reinterpret_cast( + accessUnit.data() + reassembledHeaderOffset); + + *reassembledNalUnitHeader = nalUnitHeader; + reassembledNalUnitHeader->setUnitType(nalUnitFragmentHeader.unitType()); + } + } else if (nalUnitHeader.unitType() == naluTypeAP) { + auto currOffset = headerSize + sizeof(H265NalUnitHeader); + + while (currOffset + sizeof(uint16_t) < pkt->size()) { + auto naluSize = std::to_integer(pkt->at(currOffset)) << 8 | + std::to_integer(pkt->at(currOffset + 1)); + + currOffset += sizeof(uint16_t); + + if (pkt->size() < currOffset + naluSize) { + throw std::runtime_error("AP declared size is larger than buffer"); + } + + std::copy(naluStartCode.begin(), naluStartCode.end(), + std::back_inserter(accessUnit)); + + std::copy(pkt->begin() + currOffset, pkt->begin() + currOffset + naluSize, + std::back_inserter(accessUnit)); + + currOffset += naluSize; + } + } else if (nalUnitHeader.unitType() < naluTypeAP) { + // "NAL units with NAL unit type values in the range of 0 to 47, inclusive, may be + // passed to the decoder." + std::copy(naluStartCode.begin(), naluStartCode.end(), std::back_inserter(accessUnit)); + std::copy(pkt->begin() + headerSize, pkt->end(), std::back_inserter(accessUnit)); + } else { + // "NAL-unit-like structures with NAL unit type values in the range of 48 to 63, + // inclusive, MUST NOT be passed to the decoder." + } + } + + if (!accessUnit.empty()) { + out.emplace_back(make_message(accessUnit.begin(), accessUnit.end(), Message::Binary, 0, + nullptr, frameInfo)); + } + + return out; +} + +void H265RtpDepacketizer::incoming(message_vector &messages, const message_callback &) { + messages.erase(std::remove_if(messages.begin(), messages.end(), + [&](message_ptr message) { + if (message->type == Message::Control) { + return false; + } + + if (message->size() < sizeof(RtpHeader)) { + PLOG_VERBOSE << "RTP packet is too small, size=" + << message->size(); + return true; + } + + mRtpBuffer.push_back(std::move(message)); + return true; + }), + messages.end()); + + while (mRtpBuffer.size() != 0) { + uint32_t current_timestamp = 0; + size_t packets_in_timestamp = 0; + + for (const auto &pkt : mRtpBuffer) { + auto p = reinterpret_cast(pkt->data()); + + if (current_timestamp == 0) { + current_timestamp = p->timestamp(); + } else if (current_timestamp != p->timestamp()) { + break; + } + + packets_in_timestamp++; + } + + if (packets_in_timestamp == mRtpBuffer.size()) { + break; + } + + auto begin = mRtpBuffer.begin(); + auto end = mRtpBuffer.begin() + (packets_in_timestamp - 1); + + auto frames = buildFrames(begin, end + 1, current_timestamp); + messages.insert(messages.end(), frames.begin(), frames.end()); + mRtpBuffer.erase(mRtpBuffer.begin(), mRtpBuffer.begin() + packets_in_timestamp); + } +} + +} // namespace rtc + +#endif // RTC_ENABLE_MEDIA