Skip to content

Commit

Permalink
Add Bluesky scraper (#290)
Browse files Browse the repository at this point in the history
* add bluesky scraper

* use com.atproto.sync.getBlob to get original bluesky image

* ignore data after bluesky post id

* fix json access and missing function

* fix bluesky fullsize image regex
  • Loading branch information
Yay295 authored Oct 28, 2024
1 parent ece6f09 commit 6fbc444
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/philomena_proxy/scrapers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ defmodule PhilomenaProxy.Scrapers do
}

@scrapers [
PhilomenaProxy.Scrapers.Bluesky,
PhilomenaProxy.Scrapers.Deviantart,
PhilomenaProxy.Scrapers.Pillowfort,
PhilomenaProxy.Scrapers.Twitter,
Expand Down
42 changes: 42 additions & 0 deletions lib/philomena_proxy/scrapers/bluesky.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
defmodule PhilomenaProxy.Scrapers.Bluesky do
@moduledoc false

alias PhilomenaProxy.Scrapers.Scraper
alias PhilomenaProxy.Scrapers

@behaviour Scraper

@url_regex ~r|https://bsky\.app/profile/([^/]+)/post/([^/?#]+)|
@fullsize_image_regex ~r|.*/img/feed_fullsize/plain/([^/]+)/([^@]+).*|
@blob_image_url_pattern "https://bsky.social/xrpc/com.atproto.sync.getBlob/?did=\\1&cid=\\2"

@spec can_handle?(URI.t(), String.t()) :: boolean()
def can_handle?(_uri, url) do
String.match?(url, @url_regex)
end

@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
def scrape(_uri, url) do
[handle, id] = Regex.run(@url_regex, url, capture: :all_but_first)

api_url_resolve_handle = "https://public.api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=#{handle}"
did = PhilomenaProxy.Http.get(api_url_resolve_handle) |> json!() |> Map.fetch!(:did)

api_url_get_posts = "https://public.api.bsky.app/xrpc/app.bsky.feed.getPosts?uris=at://#{did}/app.bsky.feed.post/#{id}"
post_json = PhilomenaProxy.Http.get(api_url_get_posts) |> json!() |> Map.fetch!(:posts) |> hd

%{
source_url: url,
author_name: post_json["author"]["handle"],
description: post_json["record"]["text"],
images: post_json["embed"]["images"] |> Enum.map(
&%{
url: String.replace(&1["fullsize"], @fullsize_image_regex, @blob_image_url_pattern),
camo_url: PhilomenaProxy.Camo.image_url(&1["thumb"])
}
)
}
end

defp json!({:ok, %{body: body, status: 200}}), do: Jason.decode!(body)
end

0 comments on commit 6fbc444

Please sign in to comment.