diff --git a/lib/philomena/images/thumbnailer.ex b/lib/philomena/images/thumbnailer.ex index 6d96256ef..4750847a1 100644 --- a/lib/philomena/images/thumbnailer.ex +++ b/lib/philomena/images/thumbnailer.ex @@ -76,16 +76,26 @@ defmodule Philomena.Images.Thumbnailer do file = download_image_file(image) {:ok, analysis} = Analyzers.analyze(file) - apply_edit_script(image, Processors.process(analysis, file, generated_sizes(image))) + file = apply_edit_script(image, file, Processors.process(analysis, file, generated_sizes(image))) generate_dupe_reports(image) recompute_meta(image, file, &Image.thumbnail_changeset/2) - apply_edit_script(image, Processors.post_process(analysis, file)) + file = apply_edit_script(image, file, Processors.post_process(analysis, file)) recompute_meta(image, file, &Image.process_changeset/2) end - defp apply_edit_script(image, changes), - do: Enum.map(changes, &apply_change(image, &1)) + defp apply_edit_script(image, file, changes) do + Enum.reduce(changes, file, fn change, existing_file -> + apply_change(image, change) + + case change do + {:replace_original, new_file} -> + new_file + _ -> + existing_file + end + end) + end defp apply_change(image, {:intensities, intensities}), do: ImageIntensities.create_image_intensity(image, intensities) diff --git a/lib/philomena/scrapers/twitter.ex b/lib/philomena/scrapers/twitter.ex index a3585d150..10b17c133 100644 --- a/lib/philomena/scrapers/twitter.ex +++ b/lib/philomena/scrapers/twitter.ex @@ -1,8 +1,5 @@ defmodule Philomena.Scrapers.Twitter do @url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?| - @script_regex ~r|="(https://abs.twimg.com/responsive-web/client-web(?:-legacy)?/main\.[\da-z]+\.js)"| - @bearer_regex ~r|"(AAAAAAAAAAAAA[^"]*)"| - @activate_url "https://api.twitter.com/1.1/guest/activate.json" @spec can_handle?(URI.t(), String.t()) :: true | false def can_handle?(_uri, url) do @@ -10,69 +7,21 @@ defmodule Philomena.Scrapers.Twitter do end def scrape(_uri, url) do - api_response!(url) - |> extract_data() - end - - defp extract_data(tweet) do - images = - tweet["entities"]["media"] - |> Enum.map( - &%{ - url: &1["media_url_https"] <> "?format=jpg&name=4096x4096", - camo_url: Camo.Image.image_url(&1["media_url_https"]) - } - ) - - %{ - source_url: tweet["url"], - author_name: tweet["user"], - description: tweet["text"] || tweet["full_text"], - images: images - } - end - - # We'd like to use the API anonymously. In order to do this, we need to - # extract the anonymous bearer token. Fortunately, this is pretty easy - # to identify in the minified mobile script source. - def api_response!(url) do [user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first) - page_url = "https://twitter.com/#{user}/status/#{status_id}" - - api_url = - "https://api.twitter.com/2/timeline/conversation/#{status_id}.json?tweet_mode=extended" - - url = "https://twitter.com/#{user}/status/#{status_id}" + image_url = "https://d.fxtwitter.com/#{user}/status/#{status_id}.jpg" - {gt, bearer} = - Philomena.Http.get(page_url) - |> extract_guest_token_and_bearer() + {:ok, %Tesla.Env{status: 200}} = Philomena.Http.head(image_url) - {:ok, api_resp} = - Philomena.Http.get(api_url, [{"Authorization", "Bearer #{bearer}"}, {"x-guest-token", gt}]) - - api_resp - |> Map.get(:body) - |> Jason.decode!() - |> Map.get("globalObjects") - |> Map.get("tweets") - |> Map.get(status_id) - |> Map.put("user", user) - |> Map.put("url", url) - end - - defp extract_guest_token_and_bearer({:ok, %Tesla.Env{body: page}}) do - [script | _] = Regex.run(@script_regex, page, capture: :all_but_first) - {:ok, %{body: body}} = Philomena.Http.get(script) - - [bearer] = Regex.run(@bearer_regex, body, capture: :all_but_first) - - {:ok, %{body: body}} = - Philomena.Http.post(@activate_url, nil, [{"Authorization", "Bearer #{bearer}"}]) - - gt = Map.fetch!(Jason.decode!(body), "guest_token") - - {gt, bearer} + %{ + source_url: "https://twitter.com/#{user}/status/#{status_id}", + author_name: user, + images: [ + %{ + url: image_url, + camo_url: Camo.Image.image_url(image_url), + }, + ] + } end end