From 24a90db237accffbc0ed3888d861562c747a23c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Monnom?= Date: Tue, 6 Feb 2024 18:30:54 +0100 Subject: [PATCH] fix ptr align (#160) --- examples/face_landmark/face_landmark.py | 23 ---- .../livekit/rtc/_proto/video_frame_pb2.py | 64 +++++----- .../livekit/rtc/_proto/video_frame_pb2.pyi | 15 +-- livekit-rtc/livekit/rtc/video_frame.py | 109 +++++++++++++----- livekit-rtc/rust-sdks | 2 +- 5 files changed, 119 insertions(+), 94 deletions(-) diff --git a/examples/face_landmark/face_landmark.py b/examples/face_landmark/face_landmark.py index edc20f36..a72b7bd1 100644 --- a/examples/face_landmark/face_landmark.py +++ b/examples/face_landmark/face_landmark.py @@ -15,20 +15,6 @@ tasks = set() -# You can download a face landmark model file from https://developers.google.com/mediapipe/solutions/vision/face_landmarker#models -model_file = "face_landmarker.task" -model_path = os.path.dirname(os.path.realpath(__file__)) + "/" + model_file - -BaseOptions = mp.tasks.BaseOptions -FaceLandmarker = mp.tasks.vision.FaceLandmarker -FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions -VisionRunningMode = mp.tasks.vision.RunningMode - -options = FaceLandmarkerOptions( - base_options=BaseOptions(model_asset_path=model_path), - running_mode=VisionRunningMode.VIDEO, -) - async def main(room: rtc.Room) -> None: video_stream = None @@ -103,7 +89,6 @@ def draw_landmarks_on_image(rgb_image, detection_result): async def frame_loop(video_stream: rtc.VideoStream) -> None: - landmarker = FaceLandmarker.create_from_options(options) cv2.namedWindow("livekit_video", cv2.WINDOW_AUTOSIZE) cv2.startWindowThread() async for frame_event in video_stream: @@ -112,19 +97,11 @@ async def frame_loop(video_stream: rtc.VideoStream) -> None: arr = np.frombuffer(buffer.data, dtype=np.uint8) arr = arr.reshape((buffer.height, buffer.width, 3)) - mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=arr) - detection_result = landmarker.detect_for_video( - mp_image, frame_event.timestamp_us - ) - - draw_landmarks_on_image(arr, detection_result) - arr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR) cv2.imshow("livekit_video", arr) if cv2.waitKey(1) & 0xFF == ord("q"): break - landmarker.close() cv2.destroyAllWindows() diff --git a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py index ad7c0c96..7acb92a9 100644 --- a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py +++ b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.py @@ -15,7 +15,7 @@ from . import handle_pb2 as handle__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11video_frame.proto\x12\rlivekit.proto\x1a\x0chandle.proto\"\xb5\x01\n\x15NewVideoStreamRequest\x12\x14\n\x0ctrack_handle\x18\x01 \x01(\x04\x12,\n\x04type\x18\x02 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\x12\x33\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1e.livekit.proto.VideoBufferTypeH\x00\x88\x01\x01\x12\x18\n\x10normalize_stride\x18\x04 \x01(\x08\x42\t\n\x07_format\"I\n\x16NewVideoStreamResponse\x12/\n\x06stream\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoStream\"\x7f\n\x15NewVideoSourceRequest\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\x12\x38\n\nresolution\x18\x02 \x01(\x0b\x32$.livekit.proto.VideoSourceResolution\"I\n\x16NewVideoSourceResponse\x12/\n\x06source\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoSource\"\xa7\x01\n\x18\x43\x61ptureVideoFrameRequest\x12\x15\n\rsource_handle\x18\x01 \x01(\x04\x12.\n\x06\x62uffer\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoBufferInfo\x12\x14\n\x0ctimestamp_us\x18\x03 \x01(\x03\x12.\n\x08rotation\x18\x04 \x01(\x0e\x32\x1c.livekit.proto.VideoRotation\"\x1b\n\x19\x43\x61ptureVideoFrameResponse\"\x87\x01\n\x13VideoConvertRequest\x12\x0e\n\x06\x66lip_y\x18\x01 \x01(\x08\x12.\n\x06\x62uffer\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoBufferInfo\x12\x30\n\x08\x64st_type\x18\x03 \x01(\x0e\x32\x1e.livekit.proto.VideoBufferType\"e\n\x14VideoConvertResponse\x12\x12\n\x05\x65rror\x18\x01 \x01(\tH\x00\x88\x01\x01\x12/\n\x06\x62uffer\x18\x02 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoBufferB\x08\n\x06_error\"D\n\x0fVideoResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\x12\x12\n\nframe_rate\x18\x03 \x01(\x01\"\x93\x02\n\x0fVideoBufferInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoBufferType\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x10\n\x08\x64\x61ta_ptr\x18\x04 \x01(\x04\x12\x10\n\x08\x64\x61ta_len\x18\x05 \x01(\r\x12\x0e\n\x06stride\x18\x06 \x01(\r\x12@\n\ncomponents\x18\x07 \x03(\x0b\x32,.livekit.proto.VideoBufferInfo.ComponentInfo\x1a=\n\rComponentInfo\x12\x0e\n\x06offset\x18\x01 \x01(\r\x12\x0e\n\x06stride\x18\x02 \x01(\r\x12\x0c\n\x04size\x18\x03 \x01(\r\"o\n\x10OwnedVideoBuffer\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoBufferInfo\"?\n\x0fVideoStreamInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\"o\n\x10OwnedVideoStream\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoStreamInfo\"\x9f\x01\n\x10VideoStreamEvent\x12\x15\n\rstream_handle\x18\x01 \x01(\x04\x12;\n\x0e\x66rame_received\x18\x02 \x01(\x0b\x32!.livekit.proto.VideoFrameReceivedH\x00\x12,\n\x03\x65os\x18\x03 \x01(\x0b\x32\x1d.livekit.proto.VideoStreamEOSH\x00\x42\t\n\x07message\"\x8b\x01\n\x12VideoFrameReceived\x12/\n\x06\x62uffer\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoBuffer\x12\x14\n\x0ctimestamp_us\x18\x02 \x01(\x03\x12.\n\x08rotation\x18\x03 \x01(\x0e\x32\x1c.livekit.proto.VideoRotation\"\x10\n\x0eVideoStreamEOS\"6\n\x15VideoSourceResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\"?\n\x0fVideoSourceInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\"o\n\x10OwnedVideoSource\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoSourceInfo*1\n\nVideoCodec\x12\x07\n\x03VP8\x10\x00\x12\x08\n\x04H264\x10\x01\x12\x07\n\x03\x41V1\x10\x02\x12\x07\n\x03VP9\x10\x03*l\n\rVideoRotation\x12\x14\n\x10VIDEO_ROTATION_0\x10\x00\x12\x15\n\x11VIDEO_ROTATION_90\x10\x01\x12\x16\n\x12VIDEO_ROTATION_180\x10\x02\x12\x16\n\x12VIDEO_ROTATION_270\x10\x03*\x81\x01\n\x0fVideoBufferType\x12\x08\n\x04RGBA\x10\x00\x12\x08\n\x04\x41\x42GR\x10\x01\x12\x08\n\x04\x41RGB\x10\x02\x12\x08\n\x04\x42GRA\x10\x03\x12\t\n\x05RGB24\x10\x04\x12\x08\n\x04I420\x10\x05\x12\t\n\x05I420A\x10\x06\x12\x08\n\x04I422\x10\x07\x12\x08\n\x04I444\x10\x08\x12\x08\n\x04I010\x10\t\x12\x08\n\x04NV12\x10\n*Y\n\x0fVideoStreamType\x12\x17\n\x13VIDEO_STREAM_NATIVE\x10\x00\x12\x16\n\x12VIDEO_STREAM_WEBGL\x10\x01\x12\x15\n\x11VIDEO_STREAM_HTML\x10\x02**\n\x0fVideoSourceType\x12\x17\n\x13VIDEO_SOURCE_NATIVE\x10\x00\x42\x10\xaa\x02\rLiveKit.Protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11video_frame.proto\x12\rlivekit.proto\x1a\x0chandle.proto\"\xb5\x01\n\x15NewVideoStreamRequest\x12\x14\n\x0ctrack_handle\x18\x01 \x01(\x04\x12,\n\x04type\x18\x02 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\x12\x33\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1e.livekit.proto.VideoBufferTypeH\x00\x88\x01\x01\x12\x18\n\x10normalize_stride\x18\x04 \x01(\x08\x42\t\n\x07_format\"I\n\x16NewVideoStreamResponse\x12/\n\x06stream\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoStream\"\x7f\n\x15NewVideoSourceRequest\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\x12\x38\n\nresolution\x18\x02 \x01(\x0b\x32$.livekit.proto.VideoSourceResolution\"I\n\x16NewVideoSourceResponse\x12/\n\x06source\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoSource\"\xa7\x01\n\x18\x43\x61ptureVideoFrameRequest\x12\x15\n\rsource_handle\x18\x01 \x01(\x04\x12.\n\x06\x62uffer\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoBufferInfo\x12\x14\n\x0ctimestamp_us\x18\x03 \x01(\x03\x12.\n\x08rotation\x18\x04 \x01(\x0e\x32\x1c.livekit.proto.VideoRotation\"\x1b\n\x19\x43\x61ptureVideoFrameResponse\"\x87\x01\n\x13VideoConvertRequest\x12\x0e\n\x06\x66lip_y\x18\x01 \x01(\x08\x12.\n\x06\x62uffer\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoBufferInfo\x12\x30\n\x08\x64st_type\x18\x03 \x01(\x0e\x32\x1e.livekit.proto.VideoBufferType\"e\n\x14VideoConvertResponse\x12\x12\n\x05\x65rror\x18\x01 \x01(\tH\x00\x88\x01\x01\x12/\n\x06\x62uffer\x18\x02 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoBufferB\x08\n\x06_error\"D\n\x0fVideoResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\x12\x12\n\nframe_rate\x18\x03 \x01(\x01\"\x83\x02\n\x0fVideoBufferInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoBufferType\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x10\n\x08\x64\x61ta_ptr\x18\x04 \x01(\x04\x12\x0e\n\x06stride\x18\x06 \x01(\r\x12@\n\ncomponents\x18\x07 \x03(\x0b\x32,.livekit.proto.VideoBufferInfo.ComponentInfo\x1a?\n\rComponentInfo\x12\x10\n\x08\x64\x61ta_ptr\x18\x01 \x01(\x04\x12\x0e\n\x06stride\x18\x02 \x01(\r\x12\x0c\n\x04size\x18\x03 \x01(\r\"o\n\x10OwnedVideoBuffer\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoBufferInfo\"?\n\x0fVideoStreamInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoStreamType\"o\n\x10OwnedVideoStream\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoStreamInfo\"\x9f\x01\n\x10VideoStreamEvent\x12\x15\n\rstream_handle\x18\x01 \x01(\x04\x12;\n\x0e\x66rame_received\x18\x02 \x01(\x0b\x32!.livekit.proto.VideoFrameReceivedH\x00\x12,\n\x03\x65os\x18\x03 \x01(\x0b\x32\x1d.livekit.proto.VideoStreamEOSH\x00\x42\t\n\x07message\"\x8b\x01\n\x12VideoFrameReceived\x12/\n\x06\x62uffer\x18\x01 \x01(\x0b\x32\x1f.livekit.proto.OwnedVideoBuffer\x12\x14\n\x0ctimestamp_us\x18\x02 \x01(\x03\x12.\n\x08rotation\x18\x03 \x01(\x0e\x32\x1c.livekit.proto.VideoRotation\"\x10\n\x0eVideoStreamEOS\"6\n\x15VideoSourceResolution\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\"?\n\x0fVideoSourceInfo\x12,\n\x04type\x18\x01 \x01(\x0e\x32\x1e.livekit.proto.VideoSourceType\"o\n\x10OwnedVideoSource\x12-\n\x06handle\x18\x01 \x01(\x0b\x32\x1d.livekit.proto.FfiOwnedHandle\x12,\n\x04info\x18\x02 \x01(\x0b\x32\x1e.livekit.proto.VideoSourceInfo*1\n\nVideoCodec\x12\x07\n\x03VP8\x10\x00\x12\x08\n\x04H264\x10\x01\x12\x07\n\x03\x41V1\x10\x02\x12\x07\n\x03VP9\x10\x03*l\n\rVideoRotation\x12\x14\n\x10VIDEO_ROTATION_0\x10\x00\x12\x15\n\x11VIDEO_ROTATION_90\x10\x01\x12\x16\n\x12VIDEO_ROTATION_180\x10\x02\x12\x16\n\x12VIDEO_ROTATION_270\x10\x03*\x81\x01\n\x0fVideoBufferType\x12\x08\n\x04RGBA\x10\x00\x12\x08\n\x04\x41\x42GR\x10\x01\x12\x08\n\x04\x41RGB\x10\x02\x12\x08\n\x04\x42GRA\x10\x03\x12\t\n\x05RGB24\x10\x04\x12\x08\n\x04I420\x10\x05\x12\t\n\x05I420A\x10\x06\x12\x08\n\x04I422\x10\x07\x12\x08\n\x04I444\x10\x08\x12\x08\n\x04I010\x10\t\x12\x08\n\x04NV12\x10\n*Y\n\x0fVideoStreamType\x12\x17\n\x13VIDEO_STREAM_NATIVE\x10\x00\x12\x16\n\x12VIDEO_STREAM_WEBGL\x10\x01\x12\x15\n\x11VIDEO_STREAM_HTML\x10\x02**\n\x0fVideoSourceType\x12\x17\n\x13VIDEO_SOURCE_NATIVE\x10\x00\x42\x10\xaa\x02\rLiveKit.Protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,16 +23,16 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['DESCRIPTOR']._options = None _globals['DESCRIPTOR']._serialized_options = b'\252\002\rLiveKit.Proto' - _globals['_VIDEOCODEC']._serialized_start=2148 - _globals['_VIDEOCODEC']._serialized_end=2197 - _globals['_VIDEOROTATION']._serialized_start=2199 - _globals['_VIDEOROTATION']._serialized_end=2307 - _globals['_VIDEOBUFFERTYPE']._serialized_start=2310 - _globals['_VIDEOBUFFERTYPE']._serialized_end=2439 - _globals['_VIDEOSTREAMTYPE']._serialized_start=2441 - _globals['_VIDEOSTREAMTYPE']._serialized_end=2530 - _globals['_VIDEOSOURCETYPE']._serialized_start=2532 - _globals['_VIDEOSOURCETYPE']._serialized_end=2574 + _globals['_VIDEOCODEC']._serialized_start=2132 + _globals['_VIDEOCODEC']._serialized_end=2181 + _globals['_VIDEOROTATION']._serialized_start=2183 + _globals['_VIDEOROTATION']._serialized_end=2291 + _globals['_VIDEOBUFFERTYPE']._serialized_start=2294 + _globals['_VIDEOBUFFERTYPE']._serialized_end=2423 + _globals['_VIDEOSTREAMTYPE']._serialized_start=2425 + _globals['_VIDEOSTREAMTYPE']._serialized_end=2514 + _globals['_VIDEOSOURCETYPE']._serialized_start=2516 + _globals['_VIDEOSOURCETYPE']._serialized_end=2558 _globals['_NEWVIDEOSTREAMREQUEST']._serialized_start=51 _globals['_NEWVIDEOSTREAMREQUEST']._serialized_end=232 _globals['_NEWVIDEOSTREAMRESPONSE']._serialized_start=234 @@ -52,25 +52,25 @@ _globals['_VIDEORESOLUTION']._serialized_start=953 _globals['_VIDEORESOLUTION']._serialized_end=1021 _globals['_VIDEOBUFFERINFO']._serialized_start=1024 - _globals['_VIDEOBUFFERINFO']._serialized_end=1299 - _globals['_VIDEOBUFFERINFO_COMPONENTINFO']._serialized_start=1238 - _globals['_VIDEOBUFFERINFO_COMPONENTINFO']._serialized_end=1299 - _globals['_OWNEDVIDEOBUFFER']._serialized_start=1301 - _globals['_OWNEDVIDEOBUFFER']._serialized_end=1412 - _globals['_VIDEOSTREAMINFO']._serialized_start=1414 - _globals['_VIDEOSTREAMINFO']._serialized_end=1477 - _globals['_OWNEDVIDEOSTREAM']._serialized_start=1479 - _globals['_OWNEDVIDEOSTREAM']._serialized_end=1590 - _globals['_VIDEOSTREAMEVENT']._serialized_start=1593 - _globals['_VIDEOSTREAMEVENT']._serialized_end=1752 - _globals['_VIDEOFRAMERECEIVED']._serialized_start=1755 - _globals['_VIDEOFRAMERECEIVED']._serialized_end=1894 - _globals['_VIDEOSTREAMEOS']._serialized_start=1896 - _globals['_VIDEOSTREAMEOS']._serialized_end=1912 - _globals['_VIDEOSOURCERESOLUTION']._serialized_start=1914 - _globals['_VIDEOSOURCERESOLUTION']._serialized_end=1968 - _globals['_VIDEOSOURCEINFO']._serialized_start=1970 - _globals['_VIDEOSOURCEINFO']._serialized_end=2033 - _globals['_OWNEDVIDEOSOURCE']._serialized_start=2035 - _globals['_OWNEDVIDEOSOURCE']._serialized_end=2146 + _globals['_VIDEOBUFFERINFO']._serialized_end=1283 + _globals['_VIDEOBUFFERINFO_COMPONENTINFO']._serialized_start=1220 + _globals['_VIDEOBUFFERINFO_COMPONENTINFO']._serialized_end=1283 + _globals['_OWNEDVIDEOBUFFER']._serialized_start=1285 + _globals['_OWNEDVIDEOBUFFER']._serialized_end=1396 + _globals['_VIDEOSTREAMINFO']._serialized_start=1398 + _globals['_VIDEOSTREAMINFO']._serialized_end=1461 + _globals['_OWNEDVIDEOSTREAM']._serialized_start=1463 + _globals['_OWNEDVIDEOSTREAM']._serialized_end=1574 + _globals['_VIDEOSTREAMEVENT']._serialized_start=1577 + _globals['_VIDEOSTREAMEVENT']._serialized_end=1736 + _globals['_VIDEOFRAMERECEIVED']._serialized_start=1739 + _globals['_VIDEOFRAMERECEIVED']._serialized_end=1878 + _globals['_VIDEOSTREAMEOS']._serialized_start=1880 + _globals['_VIDEOSTREAMEOS']._serialized_end=1896 + _globals['_VIDEOSOURCERESOLUTION']._serialized_start=1898 + _globals['_VIDEOSOURCERESOLUTION']._serialized_end=1952 + _globals['_VIDEOSOURCEINFO']._serialized_start=1954 + _globals['_VIDEOSOURCEINFO']._serialized_end=2017 + _globals['_OWNEDVIDEOSOURCE']._serialized_start=2019 + _globals['_OWNEDVIDEOSOURCE']._serialized_end=2130 # @@protoc_insertion_point(module_scope) diff --git a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi index 1d00f750..393bb975 100644 --- a/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi +++ b/livekit-rtc/livekit/rtc/_proto/video_frame_pb2.pyi @@ -345,35 +345,33 @@ class VideoBufferInfo(google.protobuf.message.Message): class ComponentInfo(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - OFFSET_FIELD_NUMBER: builtins.int + DATA_PTR_FIELD_NUMBER: builtins.int STRIDE_FIELD_NUMBER: builtins.int SIZE_FIELD_NUMBER: builtins.int - offset: builtins.int + data_ptr: builtins.int stride: builtins.int size: builtins.int def __init__( self, *, - offset: builtins.int = ..., + data_ptr: builtins.int = ..., stride: builtins.int = ..., size: builtins.int = ..., ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["offset", b"offset", "size", b"size", "stride", b"stride"]) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["data_ptr", b"data_ptr", "size", b"size", "stride", b"stride"]) -> None: ... TYPE_FIELD_NUMBER: builtins.int WIDTH_FIELD_NUMBER: builtins.int HEIGHT_FIELD_NUMBER: builtins.int DATA_PTR_FIELD_NUMBER: builtins.int - DATA_LEN_FIELD_NUMBER: builtins.int STRIDE_FIELD_NUMBER: builtins.int COMPONENTS_FIELD_NUMBER: builtins.int type: global___VideoBufferType.ValueType width: builtins.int height: builtins.int data_ptr: builtins.int - data_len: builtins.int stride: builtins.int - """for packed formats""" + """only for packed formats""" @property def components(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___VideoBufferInfo.ComponentInfo]: ... def __init__( @@ -383,11 +381,10 @@ class VideoBufferInfo(google.protobuf.message.Message): width: builtins.int = ..., height: builtins.int = ..., data_ptr: builtins.int = ..., - data_len: builtins.int = ..., stride: builtins.int = ..., components: collections.abc.Iterable[global___VideoBufferInfo.ComponentInfo] | None = ..., ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["components", b"components", "data_len", b"data_len", "data_ptr", b"data_ptr", "height", b"height", "stride", b"stride", "type", b"type", "width", b"width"]) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["components", b"components", "data_ptr", b"data_ptr", "height", b"height", "stride", b"stride", "type", b"type", "width", b"width"]) -> None: ... global___VideoBufferInfo = VideoBufferInfo diff --git a/livekit-rtc/livekit/rtc/video_frame.py b/livekit-rtc/livekit/rtc/video_frame.py index 2ed8887f..225243d1 100644 --- a/livekit-rtc/livekit/rtc/video_frame.py +++ b/livekit-rtc/livekit/rtc/video_frame.py @@ -53,7 +53,8 @@ def data(self) -> memoryview: @staticmethod def _from_owned_info(owned_info: proto_video.OwnedVideoBuffer) -> "VideoFrame": info = owned_info.info - cdata = (ctypes.c_uint8 * info.data_len).from_address(info.data_ptr) + data_len = _get_plane_length(info.type, info.width, info.height) + cdata = (ctypes.c_uint8 * data_len).from_address(info.data_ptr) data = bytearray(cdata) frame = VideoFrame( width=info.width, @@ -66,12 +67,14 @@ def _from_owned_info(owned_info: proto_video.OwnedVideoBuffer) -> "VideoFrame": def _proto_info(self) -> proto_video.VideoBufferInfo: info = proto_video.VideoBufferInfo() - info.components.extend(_get_plane_infos(self.type, self.width, self.height)) + addr = get_address(self.data) + info.components.extend( + _get_plane_infos(addr, self.type, self.width, self.height) + ) info.width = self.width info.height = self.height info.type = self.type - info.data_ptr = get_address(self.data) - info.data_len = len(self.data) + info.data_ptr = addr if self.type in [ proto_video.VideoBufferType.ARGB, @@ -86,14 +89,15 @@ def _proto_info(self) -> proto_video.VideoBufferInfo: return info def get_plane(self, plane_nth: int) -> Optional[memoryview]: - plane_infos = _get_plane_infos(self.type, self.width, self.height) + plane_infos = _get_plane_infos( + get_address(self.data), self.type, self.width, self.height + ) if plane_nth >= len(plane_infos): return None plane_info = plane_infos[plane_nth] - return memoryview(self.data)[ - plane_info.offset : plane_info.offset + plane_info.size - ] + cdata = (ctypes.c_uint8 * plane_info.size).from_address(plane_info.data_ptr) + return memoryview(cdata) def convert( self, type: proto_video.VideoBufferType.ValueType, *, flip_y: bool = False @@ -110,63 +114,110 @@ def convert( def _component_info( - offset: int, stride: int, size: int + data_ptr: int, stride: int, size: int ) -> proto_video.VideoBufferInfo.ComponentInfo: cmpt = proto_video.VideoBufferInfo.ComponentInfo() - cmpt.offset = offset + cmpt.data_ptr = data_ptr cmpt.stride = stride cmpt.size = size return cmpt -def _get_plane_infos( +def _get_plane_length( type: proto_video.VideoBufferType.ValueType, width: int, height: int +) -> int: + """ + Return the size in bytes of a participar video buffer type based on its size (This ignore the strides) + """ + if type in [ + proto_video.VideoBufferType.ARGB, + proto_video.VideoBufferType.ABGR, + proto_video.VideoBufferType.RGBA, + proto_video.VideoBufferType.BGRA, + ]: + return width * height * 4 + elif type == proto_video.VideoBufferType.RGB24: + return width * height * 3 + elif type == proto_video.VideoBufferType.I420: + chroma_width = (width + 1) // 2 + chroma_height = (height + 1) // 2 + return width * height + chroma_width * chroma_height * 2 + elif type == proto_video.VideoBufferType.I420A: + chroma_width = (width + 1) // 2 + return width * height * 2 + chroma_width * chroma_width * 2 + elif type == proto_video.VideoBufferType.I422: + chroma_width = (width + 1) // 2 + return width * height + chroma_width * height * 2 + elif type == proto_video.VideoBufferType.I444: + return width * height * 3 + elif type == proto_video.VideoBufferType.I010: + chroma_width = (width + 1) // 2 + chroma_height = (height + 1) // 2 + return width * height * 2 + chroma_width * chroma_height * 4 + elif type == proto_video.VideoBufferType.NV12: + chroma_width = (width + 1) // 2 + chroma_height = (height + 1) // 2 + return width * height + chroma_width * chroma_width * 2 + + raise Exception(f"unsupported video buffer type: {type}") + + +def _get_plane_infos( + addr: int, type: proto_video.VideoBufferType.ValueType, width: int, height: int ) -> List[proto_video.VideoBufferInfo.ComponentInfo]: if type == proto_video.VideoBufferType.I420: chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - y = _component_info(0, width, width * height) - u = _component_info(y.size, chroma_width, chroma_width * chroma_height) + y = _component_info(addr, width, width * height) + u = _component_info( + y.data_ptr + y.size, chroma_width, chroma_width * chroma_height + ) v = _component_info( - u.offset + u.size, chroma_width, chroma_width * chroma_height + u.data_ptr + u.size, chroma_width, chroma_width * chroma_height ) return [y, u, v] elif type == proto_video.VideoBufferType.I420A: chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - y = _component_info(0, width, width * height) - u = _component_info(y.size, chroma_width, chroma_width * chroma_height) + y = _component_info(addr, width, width * height) + u = _component_info( + y.data_ptr + y.size, chroma_width, chroma_width * chroma_height + ) v = _component_info( - u.offset + u.size, chroma_width, chroma_width * chroma_height + u.data_ptr + u.size, chroma_width, chroma_width * chroma_height ) - a = _component_info(v.offset + v.size, width, width * height) + a = _component_info(v.data_ptr + v.size, width, width * height) return [y, u, v, a] elif type == proto_video.VideoBufferType.I422: chroma_width = (width + 1) // 2 - y = _component_info(0, width, width * height) - u = _component_info(y.size, chroma_width, chroma_width * height) - v = _component_info(u.offset + u.size, chroma_width, chroma_width * height) + y = _component_info(addr, width, width * height) + u = _component_info(y.data_ptr + y.size, chroma_width, chroma_width * height) + v = _component_info( + u.data_ptr + u.size + u.size, chroma_width, chroma_width * height + ) return [y, u, v] elif type == proto_video.VideoBufferType.I444: - y = _component_info(0, width, width * height) - u = _component_info(y.size, width, width * height) - v = _component_info(u.offset + u.size, width, width * height) + y = _component_info(addr, width, width * height) + u = _component_info(y.data_ptr + y.size, width, width * height) + v = _component_info(u.data_ptr + u.size, width, width * height) return [y, u, v] elif type == proto_video.VideoBufferType.I010: chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - y = _component_info(0, width * 2, width * height * 2) - u = _component_info(y.size, chroma_width * 2, chroma_width * chroma_height * 2) + y = _component_info(addr, width * 2, width * height * 2) + u = _component_info( + y.data_ptr + y.size, chroma_width * 2, chroma_width * chroma_height * 2 + ) v = _component_info( - u.offset + u.size, chroma_width * 2, chroma_width * chroma_height * 2 + u.data_ptr + u.size, chroma_width * 2, chroma_width * chroma_height * 2 ) return [y, u, v] elif type == proto_video.VideoBufferType.NV12: chroma_width = (width + 1) // 2 chroma_height = (height + 1) // 2 - y = _component_info(0, width, width * height) + y = _component_info(addr, width, width * height) uv = _component_info( - y.stride * height, chroma_width * 2, chroma_width * chroma_height * 2 + y.data_ptr + y.size, chroma_width * 2, chroma_width * chroma_height * 2 ) return [y, uv] diff --git a/livekit-rtc/rust-sdks b/livekit-rtc/rust-sdks index 4ca41298..4450c6ca 160000 --- a/livekit-rtc/rust-sdks +++ b/livekit-rtc/rust-sdks @@ -1 +1 @@ -Subproject commit 4ca412988e9f4bfd4c5d2eb5485083ff25850bfd +Subproject commit 4450c6ca5cf269873db5debf5fc06115490a44ea