forked from paullouisageneau/libdatachannel
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit adds an H265 depacketizer which takes incoming H265 RTP packets and emits H265 access units. It is closely based on the `H264RtpDepacketizer` added by @Sean-Der in paullouisageneau#1082. I originally started with a version of this commit that was closer to the `H264RtpDepacketizer` and which emitted individual H265 NALUs in `H265RtpDepacketizer::buildFrames()`. This resulted in calling my `Track::onFrame()` callback for each NALU, which did not work well with the decoder that I'm using which wants to see the VPS/SPS/PPS NALUs as a unit before initializing the decoder (https://intel.github.io/libvpl/v2.10/API_ref/VPL_func_vid_decode.html#mfxvideodecode-decodeheader). So for the `H265RtpDepacketizer` I've tried to make it emit access units rather than NALUs. An "access unit" is (RFC 7798): > A set of NAL units that are associated with each other according to a specified classification rule, that are consecutive in decoding order, *and that contain exactly one coded picture.* "Exactly one coded picture" seems to correspond with what a caller might expect an "onFrame" callback to do. Maybe the `H264RtpDepacketizer` should be revised to similarly emit H264 access units rather than NALUs, too. At least, I could not find a way to receive individual NALUs from the depacketizer and run the VPL decoder without needing to do my own buffering/copying of the NALUs. With this commit I can now do the following: * Generate encoded bitstream output from the Intel VPL encoder. * Pass the output of the encoder one frame at a time to libdatachannel's `Track::send()` on a track with an `H265RtpPacketizer` media handler. * Transport the video track over a WebRTC connection to a libdatachannel peer. * Depacketize it with the `H265RtpDepacketizer` media handler in this commit. * Pass the depacketized output via my `Track::onFrame()` callback to the Intel VPL decoder in "complete frame" mode (https://intel.github.io/libvpl/v2.10/API_ref/VPL_enums.html#_CPPv428MFX_BITSTREAM_COMPLETE_FRAME). Each "onFrame" callback corresponds to a single call to the decoder API to decode a frame.
- Loading branch information
Showing
5 changed files
with
190 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/** | ||
* Copyright (c) 2020 Staz Modrzynski | ||
* Copyright (c) 2020-2024 Paul-Louis Ageneau | ||
* | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
*/ | ||
|
||
#ifndef RTC_H265_RTP_DEPACKETIZER_H | ||
#define RTC_H265_RTP_DEPACKETIZER_H | ||
|
||
#if RTC_ENABLE_MEDIA | ||
|
||
#include "common.hpp" | ||
#include "mediahandler.hpp" | ||
#include "message.hpp" | ||
#include "rtp.hpp" | ||
|
||
#include <iterator> | ||
|
||
namespace rtc { | ||
|
||
/// RTP depacketization for H265 | ||
class RTC_CPP_EXPORT H265RtpDepacketizer : public MediaHandler { | ||
public: | ||
H265RtpDepacketizer() = default; | ||
virtual ~H265RtpDepacketizer() = default; | ||
|
||
void incoming(message_vector &messages, const message_callback &send) override; | ||
|
||
private: | ||
std::vector<message_ptr> mRtpBuffer; | ||
|
||
message_vector buildFrames(message_vector::iterator firstPkt, message_vector::iterator lastPkt, | ||
uint32_t timestamp); | ||
}; | ||
|
||
} // namespace rtc | ||
|
||
#endif // RTC_ENABLE_MEDIA | ||
|
||
#endif // RTC_H265_RTP_DEPACKETIZER_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
/** | ||
* Copyright (c) 2023-2024 Paul-Louis Ageneau | ||
* | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
*/ | ||
|
||
#if RTC_ENABLE_MEDIA | ||
|
||
#include "h265rtpdepacketizer.hpp" | ||
#include "h265nalunit.hpp" | ||
|
||
#include "impl/internals.hpp" | ||
|
||
namespace rtc { | ||
|
||
const binary naluStartCode = {byte{0}, byte{0}, byte{0}, byte{1}}; | ||
|
||
const uint8_t naluTypeAP = 48; | ||
const uint8_t naluTypeFU = 49; | ||
|
||
message_vector H265RtpDepacketizer::buildFrames(message_vector::iterator begin, | ||
message_vector::iterator end, uint32_t timestamp) { | ||
message_vector out = {}; | ||
auto accessUnit = binary{}; | ||
auto frameInfo = std::make_shared<FrameInfo>(timestamp); | ||
auto nFrags = 0; | ||
|
||
for (auto it = begin; it != end; ++it) { | ||
auto pkt = it->get(); | ||
auto pktParsed = reinterpret_cast<const rtc::RtpHeader *>(pkt->data()); | ||
auto rtpHeaderSize = pktParsed->getSize() + pktParsed->getExtensionHeaderSize(); | ||
auto nalUnitHeader = | ||
H265NalUnitHeader{std::to_integer<uint8_t>(pkt->at(rtpHeaderSize)), | ||
std::to_integer<uint8_t>(pkt->at(rtpHeaderSize + 1))}; | ||
|
||
if (nalUnitHeader.unitType() == naluTypeFU) { | ||
auto nalUnitFragmentHeader = H265NalUnitFragmentHeader{ | ||
std::to_integer<uint8_t>(pkt->at(rtpHeaderSize + sizeof(H265NalUnitHeader)))}; | ||
|
||
if (nFrags++ == 0) { | ||
std::copy(naluStartCode.begin(), naluStartCode.end(), | ||
std::back_inserter(accessUnit)); | ||
|
||
nalUnitHeader.setUnitType(nalUnitFragmentHeader.unitType()); | ||
accessUnit.emplace_back(byte(nalUnitHeader._first)); | ||
accessUnit.emplace_back(byte(nalUnitHeader._second)); | ||
} | ||
|
||
std::copy(pkt->begin() + rtpHeaderSize + sizeof(H265NalUnitHeader) + | ||
sizeof(H265NalUnitFragmentHeader), | ||
pkt->end(), std::back_inserter(accessUnit)); | ||
} else if (nalUnitHeader.unitType() == naluTypeAP) { | ||
auto currOffset = rtpHeaderSize + sizeof(H265NalUnitHeader); | ||
|
||
while (currOffset + sizeof(uint16_t) < pkt->size()) { | ||
auto naluSize = std::to_integer<uint16_t>(pkt->at(currOffset)) << 8 | | ||
std::to_integer<uint16_t>(pkt->at(currOffset + 1)); | ||
|
||
currOffset += sizeof(uint16_t); | ||
|
||
if (pkt->size() < currOffset + naluSize) { | ||
throw std::runtime_error("H265 AP declared size is larger than buffer"); | ||
} | ||
|
||
std::copy(naluStartCode.begin(), naluStartCode.end(), | ||
std::back_inserter(accessUnit)); | ||
|
||
std::copy(pkt->begin() + currOffset, pkt->begin() + currOffset + naluSize, | ||
std::back_inserter(accessUnit)); | ||
|
||
currOffset += naluSize; | ||
} | ||
} else if (nalUnitHeader.unitType() < naluTypeAP) { | ||
// "NAL units with NAL unit type values in the range of 0 to 47, inclusive, may be | ||
// passed to the decoder." | ||
std::copy(naluStartCode.begin(), naluStartCode.end(), std::back_inserter(accessUnit)); | ||
std::copy(pkt->begin() + rtpHeaderSize, pkt->end(), std::back_inserter(accessUnit)); | ||
} else { | ||
// "NAL-unit-like structures with NAL unit type values in the range of 48 to 63, | ||
// inclusive, MUST NOT be passed to the decoder." | ||
} | ||
} | ||
|
||
if (!accessUnit.empty()) { | ||
out.emplace_back(make_message(accessUnit.begin(), accessUnit.end(), Message::Binary, 0, | ||
nullptr, frameInfo)); | ||
} | ||
|
||
return out; | ||
} | ||
|
||
void H265RtpDepacketizer::incoming(message_vector &messages, const message_callback &) { | ||
messages.erase(std::remove_if(messages.begin(), messages.end(), | ||
[&](message_ptr message) { | ||
if (message->type == Message::Control) { | ||
return false; | ||
} | ||
|
||
if (message->size() < sizeof(RtpHeader)) { | ||
PLOG_VERBOSE << "RTP packet is too small, size=" | ||
<< message->size(); | ||
return true; | ||
} | ||
|
||
mRtpBuffer.push_back(std::move(message)); | ||
return true; | ||
}), | ||
messages.end()); | ||
|
||
while (mRtpBuffer.size() != 0) { | ||
uint32_t current_timestamp = 0; | ||
size_t packets_in_timestamp = 0; | ||
|
||
for (const auto &pkt : mRtpBuffer) { | ||
auto p = reinterpret_cast<const rtc::RtpHeader *>(pkt->data()); | ||
|
||
if (current_timestamp == 0) { | ||
current_timestamp = p->timestamp(); | ||
} else if (current_timestamp != p->timestamp()) { | ||
break; | ||
} | ||
|
||
packets_in_timestamp++; | ||
} | ||
|
||
if (packets_in_timestamp == mRtpBuffer.size()) { | ||
break; | ||
} | ||
|
||
auto begin = mRtpBuffer.begin(); | ||
auto end = mRtpBuffer.begin() + (packets_in_timestamp - 1); | ||
|
||
auto frames = buildFrames(begin, end + 1, current_timestamp); | ||
messages.insert(messages.end(), frames.begin(), frames.end()); | ||
mRtpBuffer.erase(mRtpBuffer.begin(), mRtpBuffer.begin() + packets_in_timestamp); | ||
} | ||
} | ||
|
||
} // namespace rtc | ||
|
||
#endif // RTC_ENABLE_MEDIA |