Skip to content

Commit

Permalink
Removed video source check, using yt-dlp for all video downloads instead
Browse files Browse the repository at this point in the history
  • Loading branch information
Aleksandr Movchan committed Jan 8, 2024
1 parent c33c956 commit a2a8391
Show file tree
Hide file tree
Showing 12 changed files with 73 additions and 185 deletions.
1 change: 0 additions & 1 deletion aana/configs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ class Settings(BaseSettings):
"""A pydantic model for SDK settings."""

tmp_data_dir: Path = Path("/tmp/aana_data") # noqa: S108
youtube_video_dir = tmp_data_dir / "youtube_videos"
image_dir = tmp_data_dir / "images"
video_dir = tmp_data_dir / "videos"

Expand Down
6 changes: 4 additions & 2 deletions aana/exceptions/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,20 @@ class DownloadException(BaseException):
url (str): the URL of the file that caused the exception
"""

def __init__(self, url: str):
def __init__(self, url: str, msg: str = ""):
"""Initialize the exception.
Args:
url (str): the URL of the file that caused the exception
msg (str): the error message
"""
super().__init__(url=url)
self.url = url
self.msg = msg

def __reduce__(self):
"""Used for pickling."""
return (self.__class__, (self.url,))
return (self.__class__, (self.url, self.msg))


class VideoException(BaseException):
Expand Down
Empty file added aana/models/__init__.py
Empty file.
12 changes: 11 additions & 1 deletion aana/models/core/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ class Video(Media):
media_dir: Path | None = settings.video_dir

def validate(self):
"""Validate the video."""
"""Validate the video.
Raises:
ValueError: if none of 'path', 'url', or 'content' is provided
VideoReadingException: if the video is not valid
"""
# validate the parent class
super().validate()

Expand All @@ -46,6 +51,10 @@ def validate(self):
"At least one of 'path', 'url' or 'content' must be provided."
)

# check that the video is valid
if self.path and not self.is_video():
raise VideoReadingException(video=self)

def is_video(self) -> bool:
"""Checks if it's a valid video."""
if not self.path:
Expand All @@ -64,6 +73,7 @@ def save_from_url(self, file_path):
Raises:
DownloadError: if the media can't be downloaded
VideoReadingException: if the media is not a valid video
"""
super().save_from_url(file_path)
# check that the file is a video
Expand Down
35 changes: 0 additions & 35 deletions aana/models/core/video_source.py

This file was deleted.

2 changes: 1 addition & 1 deletion aana/models/pydantic/asr_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from faster_whisper.transcribe import (
Word as WhisperWord,
)
from pydantic import BaseModel, Field

from aana.models.pydantic.base import BaseListModel
from aana.models.pydantic.time_interval import TimeInterval
from pydantic import BaseModel, Field


class AsrWord(BaseModel):
Expand Down
3 changes: 2 additions & 1 deletion aana/models/pydantic/captions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from types import MappingProxyType

from aana.models.pydantic.base import BaseListModel
from pydantic import BaseModel

from aana.models.pydantic.base import BaseListModel


class Caption(BaseModel):
"""A model for a caption."""
Expand Down
4 changes: 2 additions & 2 deletions aana/models/pydantic/image_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from types import MappingProxyType

import numpy as np
from pydantic import BaseModel, Field, ValidationError, root_validator, validator
from pydantic.error_wrappers import ErrorWrapper

from aana.models.core.image import Image
from aana.models.pydantic.base import BaseListModel
from pydantic import BaseModel, Field, ValidationError, root_validator, validator
from pydantic.error_wrappers import ErrorWrapper


class ImageInput(BaseModel):
Expand Down
5 changes: 3 additions & 2 deletions aana/models/pydantic/video_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from pathlib import Path
from types import MappingProxyType

from aana.models.core.video import Video
from aana.models.pydantic.base import BaseListModel
from pydantic import BaseModel, Field, ValidationError, root_validator, validator
from pydantic.error_wrappers import ErrorWrapper

from aana.models.core.video import Video
from aana.models.pydantic.base import BaseListModel


class VideoInput(BaseModel):
"""A video input.
Expand Down
49 changes: 16 additions & 33 deletions aana/tests/test_video.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# ruff: noqa: S101
import hashlib
from importlib import resources
from pathlib import Path

Expand All @@ -11,26 +12,7 @@
from aana.utils.video import download_video


def mocked_download_file(url: str) -> bytes:
"""Mock download_file to return different content based on URL."""
if url == "http://example.com/squirrel.mp4":
path = resources.path("aana.tests.files.videos", "squirrel.mp4")
elif url == "http://example.com/Starry_Night.jpeg":
path = resources.path("aana.tests.files.images", "Starry_Night.jpeg")
else:
raise DownloadException(url)
return path.read_bytes()


@pytest.fixture
def mock_download_file(mocker):
"""Mock download_file to return different content based on URL."""
mock = mocker.patch("aana.models.core.media.download_file", autospec=True)
mock.side_effect = mocked_download_file
return mock


def test_video(mock_download_file):
def test_video():
"""Test that the video can be created from path, url, or content."""
# Test creation from a path
try:
Expand All @@ -47,7 +29,7 @@ def test_video(mock_download_file):

# Test creation from a URL
try:
url = "http://example.com/squirrel.mp4"
url = "https://mobius-public.s3.eu-west-1.amazonaws.com/squirrel.mp4"
video = Video(url=url, save_on_disk=False)
assert video.path is None
assert video.content is None
Expand All @@ -72,12 +54,12 @@ def test_video(mock_download_file):
video.cleanup()


def test_media_dir(mock_download_file):
def test_media_dir():
"""Test that the media_dir is set correctly."""
# Test saving from URL to disk
video_dir = settings.video_dir
try:
url = "http://example.com/squirrel.mp4"
url = "https://mobius-public.s3.eu-west-1.amazonaws.com/squirrel.mp4"
video = Video(url=url, save_on_disk=True)
assert video.media_dir == video_dir
assert video.content is None
Expand All @@ -95,7 +77,7 @@ def test_video_path_not_exist():
Video(path=path)


def test_save_video(mock_download_file):
def test_save_video():
"""Test that save_on_disk works for video."""
# Test that the video is saved to disk when save_on_disk is True
try:
Expand All @@ -111,7 +93,7 @@ def test_save_video(mock_download_file):

# Test saving from URL to disk
try:
url = "http://example.com/squirrel.mp4"
url = "https://mobius-public.s3.eu-west-1.amazonaws.com/squirrel.mp4"
video = Video(url=url, save_on_disk=True)
assert video.content is None
assert video.url == url
Expand All @@ -131,10 +113,10 @@ def test_save_video(mock_download_file):
video.cleanup()


def test_cleanup(mock_download_file):
def test_cleanup():
"""Test that cleanup works for video."""
try:
url = "http://example.com/squirrel.mp4"
url = "https://mobius-public.s3.eu-west-1.amazonaws.com/squirrel.mp4"
video = Video(url=url, save_on_disk=True)
assert video.path.exists()
finally:
Expand All @@ -161,7 +143,7 @@ def test_at_least_one_input():
Video(save_on_disk=True)


def test_download_video(mock_download_file):
def test_download_video():
"""Test download_video."""
# Test VideoInput with path
path = resources.path("aana.tests.files.videos", "squirrel.mp4")
Expand All @@ -174,7 +156,7 @@ def test_download_video(mock_download_file):

# Test VideoInput with url
try:
url = "http://example.com/squirrel.mp4"
url = "https://mobius-public.s3.eu-west-1.amazonaws.com/squirrel.mp4"
video_input = VideoInput(url=url)
video = download_video(video_input)
assert isinstance(video, Video)
Expand All @@ -188,8 +170,9 @@ def test_download_video(mock_download_file):

# Test Youtube URL
youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
youtube_video_dir = settings.youtube_video_dir
expected_path = youtube_video_dir / "dQw4w9WgXcQ.mp4"
youtube_url_hash = hashlib.md5(youtube_url.encode()).hexdigest() # noqa: S324
video_dir = settings.video_dir
expected_path = video_dir / f"{youtube_url_hash}.webm"
# remove the file if it exists
expected_path.unlink(missing_ok=True)

Expand All @@ -201,7 +184,7 @@ def test_download_video(mock_download_file):
assert video.path is not None
assert video.path.exists()
assert video.content is None
assert video.url is None
assert video.url == youtube_url
assert video.media_id == "dQw4w9WgXcQ"
assert (
video.title
Expand All @@ -221,7 +204,7 @@ def test_download_video(mock_download_file):
download_video(youtube_video_input)

# Test url that doesn't contain a video
url = "http://example.com/Starry_Night.jpeg"
url = "https://mobius-public.s3.eu-west-1.amazonaws.com/Starry_Night.jpeg"
video_input = VideoInput(url=url)
with pytest.raises(VideoReadingException):
download_video(video_input)
Expand Down
69 changes: 0 additions & 69 deletions aana/tests/test_video_source.py

This file was deleted.

Loading

0 comments on commit a2a8391

Please sign in to comment.