Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
liamwhite committed Mar 4, 2024
2 parents 8aa3632 + 55eac3c commit 8292ce9
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 67 deletions.
18 changes: 14 additions & 4 deletions lib/philomena/images/thumbnailer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,26 @@ defmodule Philomena.Images.Thumbnailer do
file = download_image_file(image)
{:ok, analysis} = Analyzers.analyze(file)

apply_edit_script(image, Processors.process(analysis, file, generated_sizes(image)))
file = apply_edit_script(image, file, Processors.process(analysis, file, generated_sizes(image)))
generate_dupe_reports(image)
recompute_meta(image, file, &Image.thumbnail_changeset/2)

apply_edit_script(image, Processors.post_process(analysis, file))
file = apply_edit_script(image, file, Processors.post_process(analysis, file))
recompute_meta(image, file, &Image.process_changeset/2)
end

defp apply_edit_script(image, changes),
do: Enum.map(changes, &apply_change(image, &1))
defp apply_edit_script(image, file, changes) do
Enum.reduce(changes, file, fn change, existing_file ->
apply_change(image, change)

case change do
{:replace_original, new_file} ->
new_file
_ ->
existing_file
end
end)
end

defp apply_change(image, {:intensities, intensities}),
do: ImageIntensities.create_image_intensity(image, intensities)
Expand Down
75 changes: 12 additions & 63 deletions lib/philomena/scrapers/twitter.ex
Original file line number Diff line number Diff line change
@@ -1,78 +1,27 @@
defmodule Philomena.Scrapers.Twitter do
@url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?|
@script_regex ~r|="(https://abs.twimg.com/responsive-web/client-web(?:-legacy)?/main\.[\da-z]+\.js)"|
@bearer_regex ~r|"(AAAAAAAAAAAAA[^"]*)"|
@activate_url "https://api.twitter.com/1.1/guest/activate.json"

@spec can_handle?(URI.t(), String.t()) :: true | false
def can_handle?(_uri, url) do
String.match?(url, @url_regex)
end

def scrape(_uri, url) do
api_response!(url)
|> extract_data()
end

defp extract_data(tweet) do
images =
tweet["entities"]["media"]
|> Enum.map(
&%{
url: &1["media_url_https"] <> "?format=jpg&name=4096x4096",
camo_url: Camo.Image.image_url(&1["media_url_https"])
}
)

%{
source_url: tweet["url"],
author_name: tweet["user"],
description: tweet["text"] || tweet["full_text"],
images: images
}
end

# We'd like to use the API anonymously. In order to do this, we need to
# extract the anonymous bearer token. Fortunately, this is pretty easy
# to identify in the minified mobile script source.
def api_response!(url) do
[user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first)

page_url = "https://twitter.com/#{user}/status/#{status_id}"

api_url =
"https://api.twitter.com/2/timeline/conversation/#{status_id}.json?tweet_mode=extended"

url = "https://twitter.com/#{user}/status/#{status_id}"
image_url = "https://d.fxtwitter.com/#{user}/status/#{status_id}.jpg"

{gt, bearer} =
Philomena.Http.get(page_url)
|> extract_guest_token_and_bearer()
{:ok, %Tesla.Env{status: 200}} = Philomena.Http.head(image_url)

{:ok, api_resp} =
Philomena.Http.get(api_url, [{"Authorization", "Bearer #{bearer}"}, {"x-guest-token", gt}])

api_resp
|> Map.get(:body)
|> Jason.decode!()
|> Map.get("globalObjects")
|> Map.get("tweets")
|> Map.get(status_id)
|> Map.put("user", user)
|> Map.put("url", url)
end

defp extract_guest_token_and_bearer({:ok, %Tesla.Env{body: page}}) do
[script | _] = Regex.run(@script_regex, page, capture: :all_but_first)
{:ok, %{body: body}} = Philomena.Http.get(script)

[bearer] = Regex.run(@bearer_regex, body, capture: :all_but_first)

{:ok, %{body: body}} =
Philomena.Http.post(@activate_url, nil, [{"Authorization", "Bearer #{bearer}"}])

gt = Map.fetch!(Jason.decode!(body), "guest_token")

{gt, bearer}
%{
source_url: "https://twitter.com/#{user}/status/#{status_id}",
author_name: user,
images: [
%{
url: image_url,
camo_url: Camo.Image.image_url(image_url),
},
]
}
end
end

0 comments on commit 8292ce9

Please sign in to comment.