Skip to content

Commit

Permalink
Check for empty playlists after filtering, and after downloading videos
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Nov 1, 2024
1 parent bc1de9e commit 7ef215d
Showing 1 changed file with 21 additions and 9 deletions.
30 changes: 21 additions & 9 deletions scraper/src/youtube2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,18 +565,19 @@ def extract_videos_list(self):
# we only return video_ids that we'll use later on. per-playlist JSON stored
for playlist in self.playlists:
videos_json = get_videos_json(playlist.playlist_id)
if len(videos_json) == 0:
logger.warning(
f"Playlist '{playlist.playlist_id}' is empty, will be ignored"
)
empty_playlists.append(playlist)
# filter in videos within date range and filter away deleted videos
skip_outofrange = functools.partial(
skip_outofrange_videos, self.dateafter
)
filter_videos = filter(skip_outofrange, videos_json)
filter_videos = filter(skip_deleted_videos, filter_videos)
filter_videos = filter(skip_non_public_videos, filter_videos)
filter_videos = list(filter_videos)
if len(filter_videos) == 0:
logger.warning(
f"Playlist '{playlist.playlist_id}' is empty, will be ignored"
)
empty_playlists.append(playlist)
all_videos.update(
{v["contentDetails"]["videoId"]: v for v in filter_videos}
)
Expand Down Expand Up @@ -1154,10 +1155,21 @@ def get_playlist_slug(playlist) -> str:
home_playlist_list = []

main_playlist_slug = None
if len(self.playlists) > 0:
main_playlist_slug = get_playlist_slug(
self.playlists[0]
) # set first playlist as main playlist
empty_playlists = list(
filter(lambda playlist: len(get_videos_list(playlist)) == 0, self.playlists)
)
for empty_playlist in empty_playlists:
logger.warning(
f"Removing finally empty playlist {empty_playlist.playlist_id}"
)
self.playlists.remove(empty_playlist)

if len(self.playlists) == 0:
raise Exception("No playlist succeeded to download")

main_playlist_slug = get_playlist_slug(
self.playlists[0]
) # set first playlist as main playlist

for playlist in self.playlists:
playlist_slug = get_playlist_slug(playlist)
Expand Down

0 comments on commit 7ef215d

Please sign in to comment.