forked from glitch-soc/mastodon
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #44 from NeuromatchAcademy/fetch-all-replies-service
Fetch All Replies v2 - Service Edition
- Loading branch information
Showing
11 changed files
with
558 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# frozen_string_literal: true | ||
|
||
module Status::FetchRepliesConcern | ||
extend ActiveSupport::Concern | ||
|
||
# enable/disable fetching all replies | ||
FETCH_REPLIES_ENABLED = ENV.key?('FETCH_REPLIES_ENABLED') ? ENV['FETCH_REPLIES_ENABLED'] == 'true' : true | ||
|
||
# debounce fetching all replies to minimize DoS | ||
FETCH_REPLIES_DEBOUNCE = (ENV['FETCH_REPLIES_DEBOUNCE'] || 15).to_i.minutes | ||
CREATED_RECENTLY_DEBOUNCE = (ENV['FETCH_REPLIES_CREATED_RECENTLY'] || 5).to_i.minutes | ||
|
||
included do | ||
scope :created_recently, -> { where(created_at: CREATED_RECENTLY_DEBOUNCE.ago..) } | ||
scope :not_created_recently, -> { where(created_at: ..CREATED_RECENTLY_DEBOUNCE.ago) } | ||
scope :fetched_recently, -> { where(fetched_replies_at: FETCH_REPLIES_DEBOUNCE.ago..) } | ||
scope :not_fetched_recently, -> { where(fetched_replies_at: ..FETCH_REPLIES_DEBOUNCE.ago).or(where(fetched_replies_at: nil)) } | ||
|
||
scope :shouldnt_fetch_replies, -> { local.merge(created_recently).merge(fetched_recently) } | ||
scope :should_fetch_replies, -> { local.invert_where.merge(not_created_recently).merge(not_fetched_recently) } | ||
end | ||
|
||
def should_fetch_replies? | ||
# we aren't brand new, and we haven't fetched replies since the debounce window | ||
FETCH_REPLIES_ENABLED && !local? && created_at <= CREATED_RECENTLY_DEBOUNCE.ago && ( | ||
fetched_replies_at.nil? || fetched_replies_at <= FETCH_REPLIES_DEBOUNCE.ago | ||
) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# frozen_string_literal: true | ||
|
||
class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService | ||
include JsonLdHelper | ||
|
||
# Limit of replies to fetch per status | ||
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_SINGLE'] || 500).to_i | ||
|
||
def call(collection_or_uri, allow_synchronous_requests: true, request_id: nil) | ||
@allow_synchronous_requests = allow_synchronous_requests | ||
@filter_by_host = false | ||
@collection_or_uri = collection_or_uri | ||
|
||
@items = collection_items(collection_or_uri) | ||
@items = filtered_replies | ||
return if @items.nil? | ||
|
||
FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] } | ||
|
||
@items | ||
end | ||
|
||
private | ||
|
||
def filtered_replies | ||
return if @items.nil? | ||
|
||
# Find all statuses that we *shouldn't* update the replies for, and use that as a filter. | ||
# We don't assume that we have the statuses before they're created, | ||
# hence the negative filter - | ||
# "keep all these uris except the ones we already have" | ||
# instead of | ||
# "keep all these uris that match some conditions on existing Status objects" | ||
# | ||
# Typically we assume the number of replies we *shouldn't* fetch is smaller than the | ||
# replies we *should* fetch, so we also minimize the number of uris we should load here. | ||
uris = @items.map { |item| value_or_id(item) } | ||
dont_update = Status.where(uri: uris).shouldnt_fetch_replies.pluck(:uri) | ||
|
||
# touch all statuses that already exist and that we're about to update | ||
Status.where(uri: uris).should_fetch_replies.touch_all(:fetched_replies_at) | ||
|
||
# Reject all statuses that we already have in the db | ||
uris = uris.reject { |uri| dont_update.include?(uri) }.take(MAX_REPLIES) | ||
|
||
Rails.logger.debug { "FetchAllRepliesService - #{@collection_or_uri}: Fetching filtered statuses: #{uris}" } | ||
uris | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# frozen_string_literal: true | ||
|
||
# Fetch all replies to a status, querying recursively through | ||
# ActivityPub replies collections, fetching any statuses that | ||
# we either don't already have or we haven't checked for new replies | ||
# in the Status::FETCH_REPLIES_DEBOUNCE interval | ||
class ActivityPub::FetchAllRepliesWorker | ||
include Sidekiq::Worker | ||
include ExponentialBackoff | ||
include JsonLdHelper | ||
|
||
sidekiq_options queue: 'pull', retry: 3 | ||
|
||
# Global max replies to fetch per request (all replies, recursively) | ||
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_GLOBAL'] || 1000).to_i | ||
|
||
def perform(parent_status_id, options = {}) | ||
@parent_status = Status.find(parent_status_id) | ||
Rails.logger.debug { "FetchAllRepliesWorker - #{@parent_status.uri}: Fetching all replies for status: #{@parent_status}" } | ||
|
||
uris_to_fetch = get_replies(@parent_status.uri, options) | ||
return if uris_to_fetch.nil? | ||
|
||
@parent_status.touch(:fetched_replies_at) | ||
|
||
fetched_uris = uris_to_fetch.clone.to_set | ||
|
||
until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES | ||
next_reply = uris_to_fetch.pop | ||
next if next_reply.nil? | ||
|
||
new_reply_uris = get_replies(next_reply, options) | ||
next if new_reply_uris.nil? | ||
|
||
new_reply_uris = new_reply_uris.reject { |uri| fetched_uris.include?(uri) } | ||
|
||
uris_to_fetch.concat(new_reply_uris) | ||
fetched_uris = fetched_uris.merge(new_reply_uris) | ||
end | ||
|
||
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{fetched_uris.length} replies" } | ||
fetched_uris | ||
end | ||
|
||
private | ||
|
||
def get_replies(status_uri, options = {}) | ||
replies_collection_or_uri = get_replies_uri(status_uri) | ||
return if replies_collection_or_uri.nil? | ||
|
||
ActivityPub::FetchAllRepliesService.new.call(replies_collection_or_uri, **options.deep_symbolize_keys) | ||
end | ||
|
||
def get_replies_uri(parent_status_uri) | ||
begin | ||
json_status = fetch_resource(parent_status_uri, true) | ||
if json_status.nil? | ||
Rails.logger.debug { "FetchAllRepliesWorker - #{@parent_status.uri}: error getting replies URI for #{parent_status_uri}, returned nil" } | ||
nil | ||
elsif !json_status.key?('replies') | ||
Rails.logger.debug { "FetchAllRepliesWorker - #{@parent_status.uri}: no replies collection found in ActivityPub object: #{json_status}" } | ||
nil | ||
else | ||
json_status['replies'] | ||
end | ||
rescue => e | ||
Rails.logger.warn { "FetchAllRepliesWorker - #{@parent_status.uri}: caught exception fetching replies URI: #{e}" } | ||
# Raise if we can't get the collection for top-level status to trigger retry | ||
raise e if parent_status_uri == @parent_status.uri | ||
|
||
nil | ||
end | ||
end | ||
end |
7 changes: 7 additions & 0 deletions
7
db/migrate/20240918233930_add_fetched_replies_at_to_status.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# frozen_string_literal: true | ||
|
||
class AddFetchedRepliesAtToStatus < ActiveRecord::Migration[7.1] | ||
def change | ||
add_column :statuses, :fetched_replies_at, :datetime, null: true | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.