diff --git a/benchmark.py b/benchmark.py
deleted file mode 100644
index 2c6d1967137185..00000000000000
--- a/benchmark.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import os
-import time
-
-import cv2
-import av
-import numpy as np 
-from numba import jit, cuda
-from decord import VideoReader, cpu, gpu
-
-import torch
-from torchvision import io
-
-
-video_dir = "/raid/raushan/temp_dir/"
-NUM_FRAMES = 32
-
-
-# @jit(nopython=True, target_backend='cuda') # <-- If you have a cuda GPU
-def process_video_cv2(video: cv2.VideoCapture, indices: np.array, length: int):
-    index = 0
-    frames = []
-    while video.isOpened():
-        success, frame = video.read()
-        if index in indices:
-            # Channel 0:B 1:G 2:R
-            height, width, channel = frame.shape
-            frames.append(frame[0:height, 0:width, 0:channel])
-        if success:
-            index += 1
-        if index >= length:
-            break
-
-    video.release()
-    return frames
-
-
-def read_video_opencv(video_path, num_frames=NUM_FRAMES):
-    '''
-    Decode the video with open-cv decoder.
-
-    Args:
-        video_path (str): Path to the video file.
-        num_frames (int): Number of frames to sample uniformly. Defaults to NUM_FRAMES
-
-    Returns:
-        np.ndarray: np array of decoded frames of shape (num_frames, height, width, 3).
-    '''
-    video = cv2.VideoCapture(video_path)
-    fps = int(video.get(cv2.CAP_PROP_FPS))
-    total_num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-    indices = np.arange(0, total_num_frames, total_num_frames / num_frames).astype(int)
-    frames = process_video_cv2(video, indices, total_num_frames)
-    return np.stack(frames)
-
-
-
-def read_video_decord(video_path, num_frames=NUM_FRAMES):
-    '''
-    Decode the video with Decord decoder.
-
-    Args:
-        video_path (str): Path to the video file.
-        num_frames (int): Number of frames to sample uniformly. Defaults to NUM_FRAMES
-
-    Returns:
-        np.ndarray: np array of decoded frames of shape (num_frames, height, width, 3).
-    '''
-    vr = VideoReader(uri=video_path, ctx=cpu(0)) # you need to install from source to use gpu ctx
-    indices = np.arange(0, len(vr), len(vr) / num_frames).astype(int)
-    frames = vr.get_batch(indices).asnumpy()
-    return frames
-
-
-def read_video_pyav(video_path, num_frames=NUM_FRAMES):
-    '''
-    Decode the video with PyAV decoder.
-
-    Args:
-        video_path (str): Path to the video file.
-        num_frames (int): Number of frames to sample uniformly. Defaults to NUM_FRAMES
-
-    Returns:
-        np.ndarray: np array of decoded frames of shape (num_frames, height, width, 3).
-    '''
-    container = av.open(video_path)
-
-    # sample uniformly "num_frames" frames from the video
-    total_frames = container.streams.video[0].frames
-    indices = np.arange(0, total_frames, total_frames / num_frames).astype(int)
-
-    frames = []
-    container.seek(0)
-    start_index = indices[0]
-    end_index = indices[-1]
-    for i, frame in enumerate(container.decode(video=0)):
-        if i > end_index:
-            break
-        if i >= start_index and i in indices:
-            frames.append(frame)
-    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
-
-
-
-def read_video_torchvision(video_path, num_frames=NUM_FRAMES):
-    video, _, info = io.read_video(
-        video_path,
-        start_pts=0.0,
-        end_pts=None,
-        pts_unit="sec",
-        output_format="TCHW",
-    )
-
-    idx = torch.linspace(0, video.size(0) - 1, num_frames, dtype=torch.int64)
-    return video[idx]
-
-
-decoders = {"decord": read_video_decord, "opencv": read_video_opencv, "av": read_video_pyav, "torchvision": read_video_torchvision}
-for name, fn in decoders.items():
-    start = time.perf_counter()
-    for video_file in os.listdir(video_dir):
-        path = f"{video_dir}/{video_file}"
-        output = fn(path)
-
-    end = time.perf_counter()
-    print(f"Time taken for {name}: {(end-start):.04f} sec")
-
-
-# Time taken for decord: 475.2979 sec
-# Time taken for opencv: 614.6062 sec
-# Time taken for av: 1067.0860 sec
-# Time taken for torchvision: 1924.0433 sec
-