feat: new video buffer api (#155)

livekit · Feb 2, 2024 · 50bdb28 · 50bdb28
1 parent b5ac94c
commit 50bdb28
Show file tree

Hide file tree

Showing 16 changed files with 441 additions and 1,370 deletions.
diff --git a/examples/face_landmark/face_landmark.py b/examples/face_landmark/face_landmark.py
@@ -42,7 +42,7 @@ def on_track_subscribed(track: rtc.Track, *_):
                 return
 
             print("subscribed to track: " + track.name)
-            video_stream = rtc.VideoStream(track)
+            video_stream = rtc.VideoStream(track, format=rtc.VideoBufferType.RGB24)
             task = asyncio.create_task(frame_loop(video_stream))
             tasks.add(task)
             task.add_done_callback(tasks.remove)
@@ -104,35 +104,22 @@ def draw_landmarks_on_image(rgb_image, detection_result):
 
 async def frame_loop(video_stream: rtc.VideoStream) -> None:
     landmarker = FaceLandmarker.create_from_options(options)
-    argb_frame = None
     cv2.namedWindow("livekit_video", cv2.WINDOW_AUTOSIZE)
     cv2.startWindowThread()
-    async for frame in video_stream:
-        buffer = frame.buffer
-
-        if (
-            argb_frame is None
-            or argb_frame.width != buffer.width
-            or argb_frame.height != buffer.height
-        ):
-            argb_frame = rtc.ArgbFrame.create(
-                rtc.VideoFormatType.FORMAT_ABGR, buffer.width, buffer.height
-            )
-
-        buffer.to_argb(argb_frame)
+    async for frame_event in video_stream:
+        buffer = frame_event.frame
 
-        arr = np.frombuffer(argb_frame.data, dtype=np.uint8)
-        arr = arr.reshape((argb_frame.height, argb_frame.width, 4))
-        arr = cv2.cvtColor(arr, cv2.COLOR_RGBA2RGB)
+        arr = np.frombuffer(buffer.data, dtype=np.uint8)
+        arr = arr.reshape((buffer.height, buffer.width, 3))
 
         mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=arr)
-
-        detection_result = landmarker.detect_for_video(mp_image, frame.timestamp_us)
+        detection_result = landmarker.detect_for_video(
+            mp_image, frame_event.timestamp_us
+        )
 
         draw_landmarks_on_image(arr, detection_result)
 
         arr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
-
         cv2.imshow("livekit_video", arr)
         if cv2.waitKey(1) & 0xFF == ord("q"):
             break

diff --git a/examples/publish_hue.py b/examples/publish_hue.py
@@ -47,8 +47,8 @@ async def main(room: rtc.Room):
 
 
 async def draw_color_cycle(source: rtc.VideoSource):
-    argb_frame = rtc.ArgbFrame.create(rtc.VideoFormatType.FORMAT_ARGB, WIDTH, HEIGHT)
-    arr = np.frombuffer(argb_frame.data, dtype=np.uint8)
+    argb_frame = bytearray(WIDTH * HEIGHT * 4)
+    arr = np.frombuffer(argb_frame, dtype=np.uint8)
 
     framerate = 1 / 30
     hue = 0.0
@@ -65,8 +65,7 @@ async def draw_color_cycle(source: rtc.VideoSource):
         arr.flat[2::4] = argb_color[2]
         arr.flat[3::4] = argb_color[3]
 
-        frame = rtc.VideoFrame(argb_frame.to_i420())
-
+        frame = rtc.VideoFrame(WIDTH, HEIGHT, rtc.VideoBufferType.RGBA, argb_frame)
         source.capture_frame(frame)
         hue = (hue + framerate / 3) % 1.0
 

diff --git a/livekit-rtc/livekit/rtc/__init__.py b/livekit-rtc/livekit/rtc/__init__.py
@@ -26,11 +26,11 @@
 )
 from ._proto.e2ee_pb2 import EncryptionType, EncryptionState
 from ._proto.track_pb2 import StreamState, TrackKind, TrackSource
-from ._proto.video_frame_pb2 import VideoFormatType, VideoFrameBufferType, VideoRotation
+from ._proto.video_frame_pb2 import VideoBufferType, VideoRotation
 from ._proto import stats_pb2 as stats
 from .audio_frame import AudioFrame
 from .audio_source import AudioSource
-from .audio_stream import AudioStream
+from .audio_stream import AudioStream, AudioFrameEvent
 from .participant import LocalParticipant, Participant, RemoteParticipant
 from .room import ConnectError, Room, RoomOptions, RtcConfiguration, DataPacket
 from .track import (
@@ -57,21 +57,10 @@
     TrackPublication,
 )
 from .video_frame import (
-    ArgbFrame,
-    I010Buffer,
-    I420ABuffer,
-    I420Buffer,
-    I422Buffer,
-    NativeVideoBuffer,
-    NV12Buffer,
-    PlanarYuv8Buffer,
-    PlanarYuv16Buffer,
-    PlanarYuvBuffer,
     VideoFrame,
-    VideoFrameBuffer,
 )
 from .video_source import VideoSource
-from .video_stream import VideoStream
+from .video_stream import VideoStream, VideoFrameEvent
 from .chat import ChatManager, ChatMessage
 
 from .version import __version__
@@ -89,13 +78,13 @@
     "StreamState",
     "TrackKind",
     "TrackSource",
-    "VideoFormatType",
-    "VideoFrameBufferType",
+    "VideoBufferType",
     "VideoRotation",
     "stats",
     "AudioFrame",
     "AudioSource",
     "AudioStream",
+    "AudioFrameEvent",
     "LocalParticipant",
     "Participant",
     "RemoteParticipant",
@@ -121,20 +110,10 @@
     "LocalTrackPublication",
     "RemoteTrackPublication",
     "TrackPublication",
-    "ArgbFrame",
-    "I010Buffer",
-    "I420ABuffer",
-    "I420Buffer",
-    "I422Buffer",
-    "NativeVideoBuffer",
-    "NV12Buffer",
-    "PlanarYuv8Buffer",
-    "PlanarYuv16Buffer",
-    "PlanarYuvBuffer",
     "VideoFrame",
-    "VideoFrameBuffer",
     "VideoSource",
     "VideoStream",
+    "VideoFrameEvent",
     "ChatManager",
     "ChatMessage",
     "__version__",

diff --git a/livekit-rtc/livekit/rtc/_proto/audio_frame_pb2.py b/livekit-rtc/livekit/rtc/_proto/audio_frame_pb2.py