Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API endpoint for fetching transcripts from YouTube #4788

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions src/invidious/routes/api/v1/videos.cr
Original file line number Diff line number Diff line change
Expand Up @@ -411,4 +411,90 @@ module Invidious::Routes::API::V1::Videos
end
end
end

# Fetches transcripts from YouTube
#
# Use the `lang` and `autogen` query parameter to select which transcript to fetch
# Request without any URL parameters to see all the available transcripts.
def self.transcripts(env)
env.response.content_type = "application/json"

id = env.params.url["id"]
lang = env.params.query["lang"]?
label = env.params.query["label"]?
auto_generated = env.params.query["autogen"]? ? true : false

# Return all available transcript options when none is given
if !label && !lang
begin
video = get_video(id)
rescue ex : NotFoundException
return error_json(404, ex)
rescue ex
return error_json(500, ex)
end

response = JSON.build do |json|
# The amount of transcripts available to fetch is the
# same as the amount of captions available.
available_transcripts = video.captions

json.object do
json.field "transcripts" do
json.array do
available_transcripts.each do |transcript|
json.object do
json.field "label", transcript.name
json.field "languageCode", transcript.language_code
json.field "autoGenerated", transcript.auto_generated

if transcript.auto_generated
json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}&autogen"
else
json.field "url", "/api/v1/transcripts/#{id}?lang=#{URI.encode_www_form(transcript.language_code)}"
end
end
end
end
end
end
end

return response
end

# If lang is not given then we attempt to fetch
# the transcript through the given label
if lang.nil?
begin
video = get_video(id)
rescue ex : NotFoundException
return error_json(404, ex)
rescue ex
return error_json(500, ex)
end

target_transcript = video.captions.select(&.name.== label)
if target_transcript.empty?
return error_json(404, NotFoundException.new("Requested transcript does not exist"))
else
target_transcript = target_transcript[0]
lang, auto_generated = target_transcript.language_code, target_transcript.auto_generated
end
end

params = Invidious::Videos::Transcript.generate_param(id, lang, auto_generated)

begin
transcript = Invidious::Videos::Transcript.from_raw(
YoutubeAPI.get_transcript(params), lang, auto_generated
)
rescue ex : NotFoundException
return error_json(404, ex)
rescue ex
return error_json(500, ex)
end

return transcript.to_json
end
end
1 change: 1 addition & 0 deletions src/invidious/routing.cr
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ module Invidious::Routing
get "/api/v1/annotations/:id", {{namespace}}::Videos, :annotations
get "/api/v1/comments/:id", {{namespace}}::Videos, :comments
get "/api/v1/clips/:id", {{namespace}}::Videos, :clips
get "/api/v1/transcripts/:id", {{namespace}}::Videos, :transcripts

# Feeds
get "/api/v1/trending", {{namespace}}::Feeds, :trending
Expand Down
35 changes: 35 additions & 0 deletions src/invidious/videos/transcript.cr
Original file line number Diff line number Diff line change
Expand Up @@ -122,5 +122,40 @@ module Invidious::Videos

return vtt
end

def to_json(json : JSON::Builder)
json.field "languageCode", @language_code
json.field "autoGenerated", @auto_generated
json.field "label", @label
json.field "body" do
json.array do
@lines.each do |line|
json.object do
if line.is_a? HeadingLine
json.field "type", "heading"
else
json.field "type", "regular"
end

json.field "startMs", line.start_ms.total_milliseconds
json.field "endMs", line.end_ms.total_milliseconds
json.field "line", line.line
end
end
end
end
end

def to_json
JSON.build do |json|
json.object do
json.field "transcript" do
json.object do
to_json(json)
end
end
end
end
end
end
end
Loading