Skip to content

Commit

Permalink
ATProto fetch_blobs: check Content-Type against blob lexicon accept, …
Browse files Browse the repository at this point in the history
…if any
  • Loading branch information
snarfed committed Sep 29, 2024
1 parent 36c612f commit 5073f0f
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
15 changes: 9 additions & 6 deletions atproto.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,29 +733,32 @@ def _convert(cls, obj, fetch_blobs=False, from_user=None):

blobs = {} # maps str URL to dict blob object
if fetch_blobs:
def fetch_blob(url, max_size):
def fetch_blob(url, blob_field, check_size=True):
if url and url not in blobs:
max_size = blob_field.get('maxSize') if check_size else None
accept = blob_field.get('accept')
try:
blob = AtpRemoteBlob.get_or_create(
url=url, get_fn=util.requests_get, max_size=max_size)
url=url, get_fn=util.requests_get, max_size=max_size,
accept_types=accept)
blobs[url] = blob.as_object()
except (RequestException, ValidationError) as e:
logging.info(f'failed, skipping {url} : {e}')

for o in obj.as1, as1.get_object(obj.as1):
for url in util.get_urls(o, 'image'):
fetch_blob(url, None)
# TODO: maybe eventually check image maxSize? the current
# TODO: maybe eventually check size? the current
# 1MB limit feels too small though, and the AppView doesn't
# seem to validate, it's happily allowing bigger image blobs
# as of 9/29/2024:
# https://github.com/snarfed/bridgy-fed/issues/1348#issuecomment-2381056468
# appview.defs['app.bsky.embed.images#image']['properties']['image']['maxSize'])
fetch_blob(url, appview.defs['app.bsky.embed.images#image']['properties']['image'],
check_size=False)

for att in util.get_list(o, 'attachments'):
if isinstance(att, dict):
fetch_blob(att.get('stream', {}).get('url'),
appview.defs['app.bsky.embed.video']['properties']['video']['maxSize'])
appview.defs['app.bsky.embed.video']['properties']['video'])

inner_obj = as1.get_object(obj.as1) or obj.as1
orig_url = as1.get_url(inner_obj) or inner_obj.get('id')
Expand Down
22 changes: 22 additions & 0 deletions tests/test_atproto.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,28 @@ def test_convert_fetch_blobs_true_video_over_maxSize(self, mock_get):
}],
}), fetch_blobs=True))

self.assertEqual(0, AtpRemoteBlob.query().count())
mock_get.assert_has_calls([self.req('https://my/vid')])

@patch('requests.get', return_value=requests_response(
'blob contents', content_type='not/ok'))
def test_convert_fetch_blobs_true_video_type_not_in_accept(self, mock_get):
self.assertEqual({
'$type': 'app.bsky.feed.post',
'text': 'foo bar',
'createdAt': '2022-01-02T03:04:05.000Z',
'bridgyOriginalText': 'foo bar',
}, ATProto.convert(Object(our_as1={
'objectType': 'note',
'content': 'foo bar',
'attachments': [{
'objectType': 'video',
'stream': {'url': 'https://my/vid'},
'displayName': 'my alt',
}],
}), fetch_blobs=True))

self.assertEqual(0, AtpRemoteBlob.query().count())
mock_get.assert_has_calls([self.req('https://my/vid')])

@patch('requests.get', side_effect=[
Expand Down

0 comments on commit 5073f0f

Please sign in to comment.