From 287bba91b8c06ab856e08e7aac4a11f63d9f6447 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Tue, 8 Mar 2022 16:57:00 +0000 Subject: [PATCH 01/20] add list fields --- twarc/expansions.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/twarc/expansions.py b/twarc/expansions.py index 9b5d9715..8d8e9e57 100644 --- a/twarc/expansions.py +++ b/twarc/expansions.py @@ -92,6 +92,15 @@ "place_type", ] +LIST_FIELDS = [ + "owner_id", + "created_at", + "member_count", + "follower_count", + "private", + "description", +] + def extract_includes(response, expansion, _id="id"): if "includes" in response and expansion in response["includes"]: From 093eb5c72c8ebccdd42bbcce3bf556da0fca6e37 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Tue, 8 Mar 2022 16:57:28 +0000 Subject: [PATCH 02/20] update list memberships to use expansions.py --- twarc/client2.py | 61 ++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/twarc/client2.py b/twarc/client2.py index 4084bdb7..e864edab 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -21,6 +21,7 @@ MEDIA_FIELDS, POLL_FIELDS, PLACE_FIELDS, + LIST_FIELDS, ensure_flattened, ) from twarc.decorators2 import * @@ -156,6 +157,13 @@ def _prepare_params(self, **kwargs): else ",".join(PLACE_FIELDS) ) + if "list_fields" in kwargs: + params["list.fields"] = ( + kwargs.pop("list_fields") + if kwargs["list_fields"] + else ",".join(LIST_FIELDS) + ) + # Format start_time and end_time if "start_time" in kwargs: start_time = kwargs["start_time"] @@ -296,46 +304,53 @@ def _search( def list_memberships( self, - id, + user, expansions=None, list_fields=None, max_results=None, pagination_token=None, - user_field=None - ): + user_fields=None, + ): """ - Function allows to get all the membership list from an specific user ID + Returns all Lists a specified user is a member of. - Calls [GET /2/users/:id/list_memberships](https://developer.twitter.com/en/docs/twitter-api/lists/list-members/introduction) + Calls [GET /2/users/:id/list_memberships](https://developer.twitter.com/en/docs/twitter-api/lists/list-members/api-reference/get-users-id-list_memberships) Args: + user (int): ID of the user. expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. - list.fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. - max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. - pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. - user.fields( enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. - """ - user_id = self._ensure_user_id(id) + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) url = f"https://api.twitter.com/2/users/{user_id}/list_memberships" params = self._prepare_params( - list_fields=list_fields, - max_results=max_results, - pagination_token=pagination_token, - user_field=user_field + list_fields=list_fields, + max_results=max_results, + pagination_token=pagination_token, + user_fields=user_fields, ) if expansions: - params["expansions"] = "owner_id" - - resp = self.get(url, params=params) - data = resp.json() - - return data - + params["expansions"] = "owner_id" + count = 0 + for response in self.get_paginated(url, params=params): + # can return without 'data' if there are no results + if "data" in response: + count += len(response["data"]) + yield response + else: + log.info( + f"Retrieved an empty page of results for list memberships of {user_id}" + ) def search_recent( self, @@ -1263,7 +1278,7 @@ def get_paginated(self, *args, **kwargs): Returns: generator[dict]: A generator, dict for each page of results. """ - + resp = self.get(*args, **kwargs) page = resp.json() From 045cb3f57da92bd94a01719693bbbc7c0c7f6200 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Tue, 8 Mar 2022 18:08:08 +0000 Subject: [PATCH 03/20] remove unused count --- twarc/client2.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/twarc/client2.py b/twarc/client2.py index e864edab..cdf42726 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -341,11 +341,9 @@ def list_memberships( if expansions: params["expansions"] = "owner_id" - count = 0 for response in self.get_paginated(url, params=params): # can return without 'data' if there are no results if "data" in response: - count += len(response["data"]) yield response else: log.info( @@ -952,11 +950,9 @@ def _timeline( if len(excludes) > 0: params["exclude"] = ",".join(excludes) - count = 0 for response in self.get_paginated(url, params=params): # can return without 'data' if there are no results if "data" in response: - count += len(response["data"]) yield response else: log.info(f"Retrieved an empty page of results for timeline {user_id}") From d981369f389029db5556014200ed73bf61a19003 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Tue, 8 Mar 2022 23:26:19 +0000 Subject: [PATCH 04/20] pagination for lists --- twarc/client2.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/twarc/client2.py b/twarc/client2.py index cdf42726..749429bc 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -1285,6 +1285,7 @@ def get_paginated(self, *args, **kwargs): yield page + # Todo: Maybe this should be backwards.. check for `next_token` endings = [ "mentions", "tweets", @@ -1293,6 +1294,10 @@ def get_paginated(self, *args, **kwargs): "liked_tweets", "liking_users", "retweeted_by", + "members", + "memberships", + "followed_lists", + "owned_lists", ] # The search endpoints only take a next_token, but the timeline From d804a20da8713c3e63a2366a3bf2c154d288872f Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Wed, 9 Mar 2022 00:11:53 +0000 Subject: [PATCH 05/20] add pinned lists --- twarc/client2.py | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/twarc/client2.py b/twarc/client2.py index 749429bc..3edaa41d 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -338,17 +338,38 @@ def list_memberships( user_fields=user_fields, ) - if expansions: - params["expansions"] = "owner_id" + def pinned_lists( + self, + user, + expansions=None, + list_fields=None, + max_results=None, + pagination_token=None, + user_fields=None, + ): + """ + Returns the Lists pinned by a specified user. - for response in self.get_paginated(url, params=params): - # can return without 'data' if there are no results - if "data" in response: - yield response - else: - log.info( - f"Retrieved an empty page of results for list memberships of {user_id}" - ) + Calls [GET /2/users/:id/pinned_lists](https://developer.twitter.com/en/docs/twitter-api/lists/pinned-lists/api-reference/get-users-id-pinned_lists) + + Args: + user (int): ID of the user. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) + url = f"https://api.twitter.com/2/users/{user_id}/pinned_lists" + + return self._lists( + url, expansions, list_fields, max_results, pagination_token, user_fields + ) def search_recent( self, @@ -1298,6 +1319,7 @@ def get_paginated(self, *args, **kwargs): "memberships", "followed_lists", "owned_lists", + "pinned_lists", ] # The search endpoints only take a next_token, but the timeline From 2a9c1cc0c1ed236afe16ca29f5d0ef91f8f4026e Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Wed, 9 Mar 2022 00:12:37 +0000 Subject: [PATCH 06/20] add more lists endpoints --- twarc/client2.py | 175 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 5 deletions(-) diff --git a/twarc/client2.py b/twarc/client2.py index 3edaa41d..c6e0f2af 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -302,6 +302,108 @@ def _search( log.info(f"No more results for search {query}.") + def _lists( + self, + url, + expansions=None, + list_fields=None, + max_results=None, + pagination_token=None, + user_fields=None, + ): + """ + Paginates and returns lists + """ + params = self._prepare_params( + list_fields=list_fields, + max_results=max_results, + pagination_token=pagination_token, + user_fields=user_fields, + ) + + if expansions: + params["expansions"] = "owner_id" + + for response in self.get_paginated(url, params=params): + # can return without 'data' if there are no results + if "data" in response: + yield response + else: + log.info(f"Retrieved an empty page of results of lists for {url}") + + def lists_followers( + self, + list_id, + expansions, + max_results, + pagination_token, + tweet_fields, + user_fields, + ): + """ + Returns a list of users who are followers of the specified List. + + Calls [GET /2/lists/:id/followers](https://developer.twitter.com/en/docs/twitter-api/lists/list-follows/api-reference/get-lists-id-followers) + + Args: + list_id (int): ID of the list. + expansions enum (pinned_tweet_id): Expansions, include pinned tweets. + max_results (int): the maximum number of results to retrieve. Between 1 and 100. Default is 100. + + Returns: + generator[dict]: A generator, dict for each page of results. + + """ + params = self._prepare_params( + tweet_fields=tweet_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) + + if expansions: + params["expansions"] = "pinned_tweet_id" + + url = f"https://api.twitter.com/2/lists/{list_id}/followers" + return self.get_paginated(url, params=params) + + def lists_members( + self, + list_id, + expansions, + max_results, + pagination_token, + tweet_fields, + user_fields, + ): + """ + Returns a list of users who are members of the specified List. + + Calls [GET /2/lists/:id/members](https://developer.twitter.com/en/docs/twitter-api/lists/list-members/api-reference/get-lists-id-members) + + Args: + list_id (int): ID of the list. + expansions enum (pinned_tweet_id): Expansions, include pinned tweets. + max_results (int): the maximum number of results to retrieve. Between 1 and 100. Default is 100. + + Returns: + generator[dict]: A generator, dict for each page of results. + + """ + + params = self._prepare_params( + tweet_fields=tweet_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) + + if expansions: + params["expansions"] = "pinned_tweet_id" + + url = f"https://api.twitter.com/2/lists/{list_id}/members" + return self.get_paginated(url, params=params) + def list_memberships( self, user, @@ -331,11 +433,74 @@ def list_memberships( user_id = self._ensure_user_id(user) url = f"https://api.twitter.com/2/users/{user_id}/list_memberships" - params = self._prepare_params( - list_fields=list_fields, - max_results=max_results, - pagination_token=pagination_token, - user_fields=user_fields, + return self._lists( + url, expansions, list_fields, max_results, pagination_token, user_fields + ) + + def followed_lists( + self, + user, + expansions=None, + list_fields=None, + max_results=None, + pagination_token=None, + user_fields=None, + ): + """ + Returns all Lists a specified user follows. + + Calls [GET /2/users/:id/followed_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-follows/api-reference/get-users-id-followed_lists) + + Args: + user (int): ID of the user. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) + url = f"https://api.twitter.com/2/users/{user_id}/followed_lists" + + return self._lists( + url, expansions, list_fields, max_results, pagination_token, user_fields + ) + + def owned_lists( + self, + user, + expansions=None, + list_fields=None, + max_results=None, + pagination_token=None, + user_fields=None, + ): + """ + Returns all Lists owned by the specified user. + + Calls [GET /2/users/:id/owned_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-lookup/api-reference/get-users-id-owned_lists) + + Args: + user (int): ID of the user. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) + url = f"https://api.twitter.com/2/users/{user_id}/owned_lists" + + return self._lists( + url, expansions, list_fields, max_results, pagination_token, user_fields ) def pinned_lists( From e6a8bc2fec4a4e113cac0ba104d53608cd3e5b67 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Wed, 9 Mar 2022 01:24:59 +0000 Subject: [PATCH 07/20] add list_lookup --- twarc/client2.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/twarc/client2.py b/twarc/client2.py index c6e0f2af..a0ef2a18 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -536,6 +536,33 @@ def pinned_lists( url, expansions, list_fields, max_results, pagination_token, user_fields ) + def list_lookup(self, list_id, expansions, list_fields, user_fields): + """ + Returns the details of a specified List. + + Calls [GET /2/lists/:id](https://developer.twitter.com/en/docs/twitter-api/lists/list-lookup/api-reference/get-lists-id) + + Args: + list_id (int): ID of the list. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + + Returns: + dict: Result dictionary. + """ + + params = self._prepare_params( + list_fields=list_fields, + user_fields=user_fields, + ) + + if expansions: + params["expansions"] = "owner_id" + url = f"https://api.twitter.com/2/lists/{list_id}" + return self.get(url, params=params) + def search_recent( self, query, From adfda44882f8590765fd812db043a9ad8f135e70 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Wed, 9 Mar 2022 03:43:39 +0000 Subject: [PATCH 08/20] list tests and params --- test_twarc2.py | 41 +++++++++++++++++++++++++++++++++++++++++ twarc/client2.py | 30 +++++++++++++++--------------- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/test_twarc2.py b/test_twarc2.py index 46908391..5aecf082 100644 --- a/test_twarc2.py +++ b/test_twarc2.py @@ -577,6 +577,47 @@ def test_liked_tweets(): break +def test_list_lookup(): + parks_list = T.list_lookup(715919216927322112) + assert "data" in parks_list + assert parks_list["data"]["name"] == "National-parks" + + +def test_list_members(): + response = list(T.list_members(715919216927322112)) + assert len(response) == 1 + members = twarc.expansions.flatten(response[0]) + assert len(members) == 8 + + +def test_list_followers(): + response = list(T.list_followers(715919216927322112)) + assert len(response) >= 2 + followers = twarc.expansions.flatten(response[0]) + assert len(followers) > 50 + + +def test_list_memberships(): + response = list(T.list_memberships("64flavors")) + assert len(response) == 1 + lists = twarc.expansions.flatten(response[0]) + assert len(lists) >= 9 + + +def test_followed_lists(): + response = list(T.followed_lists("nasa")) + assert len(response) == 1 + lists = twarc.expansions.flatten(response[0]) + assert len(lists) >= 1 + + +def test_owned_lists(): + response = list(T.owned_lists("nasa")) + assert len(response) >= 1 + lists = twarc.expansions.flatten(response[0]) + assert len(lists) >= 11 + + def test_twarc_metadata(): # With metadata (default) diff --git a/twarc/client2.py b/twarc/client2.py index a0ef2a18..f4f845ae 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -331,14 +331,14 @@ def _lists( else: log.info(f"Retrieved an empty page of results of lists for {url}") - def lists_followers( + def list_followers( self, list_id, - expansions, - max_results, - pagination_token, - tweet_fields, - user_fields, + expansions=None, + max_results=None, + pagination_token=None, + tweet_fields=None, + user_fields=None, ): """ Returns a list of users who are followers of the specified List. @@ -367,14 +367,14 @@ def lists_followers( url = f"https://api.twitter.com/2/lists/{list_id}/followers" return self.get_paginated(url, params=params) - def lists_members( + def list_members( self, list_id, - expansions, - max_results, - pagination_token, - tweet_fields, - user_fields, + expansions=None, + max_results=None, + pagination_token=None, + tweet_fields=None, + user_fields=None, ): """ Returns a list of users who are members of the specified List. @@ -513,7 +513,7 @@ def pinned_lists( user_fields=None, ): """ - Returns the Lists pinned by a specified user. + Returns the Lists pinned by the authenticating user. Does not work with a Bearer token. Calls [GET /2/users/:id/pinned_lists](https://developer.twitter.com/en/docs/twitter-api/lists/pinned-lists/api-reference/get-users-id-pinned_lists) @@ -536,7 +536,7 @@ def pinned_lists( url, expansions, list_fields, max_results, pagination_token, user_fields ) - def list_lookup(self, list_id, expansions, list_fields, user_fields): + def list_lookup(self, list_id, expansions=None, list_fields=None, user_fields=None): """ Returns the details of a specified List. @@ -561,7 +561,7 @@ def list_lookup(self, list_id, expansions, list_fields, user_fields): if expansions: params["expansions"] = "owner_id" url = f"https://api.twitter.com/2/lists/{list_id}" - return self.get(url, params=params) + return self.get(url, params=params).json() def search_recent( self, From cb11ef29b3f810b5e332b018a9ba997b9843a18e Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Thu, 10 Mar 2022 05:09:21 +0000 Subject: [PATCH 09/20] add list tweets --- test_twarc2.py | 7 +++++++ twarc/client2.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/test_twarc2.py b/test_twarc2.py index 5aecf082..bc9af51f 100644 --- a/test_twarc2.py +++ b/test_twarc2.py @@ -618,6 +618,13 @@ def test_owned_lists(): assert len(lists) >= 11 +def test_list_tweets(): + response = next(T.list_tweets(715919216927322112)) + assert "data" in response + tweets = twarc.expansions.flatten(response) + assert len(tweets) >= 90 + + def test_twarc_metadata(): # With metadata (default) diff --git a/twarc/client2.py b/twarc/client2.py index f4f845ae..bae1ac0a 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -563,6 +563,42 @@ def list_lookup(self, list_id, expansions=None, list_fields=None, user_fields=No url = f"https://api.twitter.com/2/lists/{list_id}" return self.get(url, params=params).json() + def list_tweets( + self, + list_id, + expansions=None, + tweet_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Returns Tweets from the specified List. + + Calls [GET /2/lists/:id/tweets](https://developer.twitter.com/en/docs/twitter-api/lists/list-tweets/api-reference/get-lists-id-tweets) + + Args: + list_id (int): ID of the list. + expansions enum (author_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + + params = self._prepare_params( + max_results=max_results, + expansions=expansions, + tweet_fields=tweet_fields, + user_fields=user_fields, + pagination_token=pagination_token, + ) + + url = f"https://api.twitter.com/2/lists/{list_id}/tweets" + return self.get_paginated(url, params=params) + def search_recent( self, query, From 243d043b98e0561b8f5998c9f3e68db05cc91d1e Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Thu, 10 Mar 2022 18:17:17 +0000 Subject: [PATCH 10/20] rearrange params --- twarc/client2.py | 69 +++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/twarc/client2.py b/twarc/client2.py index bae1ac0a..0bb4ddfd 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -307,18 +307,18 @@ def _lists( url, expansions=None, list_fields=None, + user_fields=None, max_results=None, pagination_token=None, - user_fields=None, ): """ Paginates and returns lists """ params = self._prepare_params( list_fields=list_fields, + user_fields=user_fields, max_results=max_results, pagination_token=pagination_token, - user_fields=user_fields, ) if expansions: @@ -335,10 +335,10 @@ def list_followers( self, list_id, expansions=None, - max_results=None, - pagination_token=None, tweet_fields=None, user_fields=None, + max_results=None, + pagination_token=None, ): """ Returns a list of users who are followers of the specified List. @@ -371,10 +371,10 @@ def list_members( self, list_id, expansions=None, - max_results=None, - pagination_token=None, tweet_fields=None, user_fields=None, + max_results=None, + pagination_token=None, ): """ Returns a list of users who are members of the specified List. @@ -384,7 +384,8 @@ def list_members( Args: list_id (int): ID of the list. expansions enum (pinned_tweet_id): Expansions, include pinned tweets. - max_results (int): the maximum number of results to retrieve. Between 1 and 100. Default is 100. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. Returns: generator[dict]: A generator, dict for each page of results. @@ -409,9 +410,9 @@ def list_memberships( user, expansions=None, list_fields=None, + user_fields=None, max_results=None, pagination_token=None, - user_fields=None, ): """ Returns all Lists a specified user is a member of. @@ -422,10 +423,10 @@ def list_memberships( user (int): ID of the user. expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. - max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. - pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. Returns: generator[dict]: A generator, dict for each page of results. @@ -434,7 +435,12 @@ def list_memberships( url = f"https://api.twitter.com/2/users/{user_id}/list_memberships" return self._lists( - url, expansions, list_fields, max_results, pagination_token, user_fields + url=url, + expansions=expansions, + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, ) def followed_lists( @@ -442,9 +448,9 @@ def followed_lists( user, expansions=None, list_fields=None, + user_fields=None, max_results=None, pagination_token=None, - user_fields=None, ): """ Returns all Lists a specified user follows. @@ -455,10 +461,10 @@ def followed_lists( user (int): ID of the user. expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. - max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. - pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. Returns: generator[dict]: A generator, dict for each page of results. @@ -467,7 +473,12 @@ def followed_lists( url = f"https://api.twitter.com/2/users/{user_id}/followed_lists" return self._lists( - url, expansions, list_fields, max_results, pagination_token, user_fields + url=url, + expansions=expansions, + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, ) def owned_lists( @@ -475,9 +486,9 @@ def owned_lists( user, expansions=None, list_fields=None, + user_fields=None, max_results=None, pagination_token=None, - user_fields=None, ): """ Returns all Lists owned by the specified user. @@ -488,10 +499,10 @@ def owned_lists( user (int): ID of the user. expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. - max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. - pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. Returns: generator[dict]: A generator, dict for each page of results. @@ -500,7 +511,12 @@ def owned_lists( url = f"https://api.twitter.com/2/users/{user_id}/owned_lists" return self._lists( - url, expansions, list_fields, max_results, pagination_token, user_fields + url=url, + expansions=expansions, + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, ) def pinned_lists( @@ -508,9 +524,9 @@ def pinned_lists( user, expansions=None, list_fields=None, + user_fields=None, max_results=None, pagination_token=None, - user_fields=None, ): """ Returns the Lists pinned by the authenticating user. Does not work with a Bearer token. @@ -521,10 +537,10 @@ def pinned_lists( user (int): ID of the user. expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. - max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. - pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. Returns: generator[dict]: A generator, dict for each page of results. @@ -533,7 +549,12 @@ def pinned_lists( url = f"https://api.twitter.com/2/users/{user_id}/pinned_lists" return self._lists( - url, expansions, list_fields, max_results, pagination_token, user_fields + url=url, + expansions=expansions, + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, ) def list_lookup(self, list_id, expansions=None, list_fields=None, user_fields=None): @@ -589,10 +610,10 @@ def list_tweets( """ params = self._prepare_params( - max_results=max_results, expansions=expansions, tweet_fields=tweet_fields, user_fields=user_fields, + max_results=max_results, pagination_token=pagination_token, ) From c655a312d0c85e34d5ef28f9d0d125c0cfe65a31 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Thu, 10 Mar 2022 18:30:14 +0000 Subject: [PATCH 11/20] add list lookup command --- twarc/command2.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/twarc/command2.py b/twarc/command2.py index 56c7e0d5..86b43424 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -35,6 +35,7 @@ MEDIA_FIELDS, POLL_FIELDS, PLACE_FIELDS, + LIST_FIELDS, ) from click import command, option, Option, UsageError from click_config_file import configuration_option @@ -1916,6 +1917,44 @@ def stream(T, outfile, limit, **kwargs): log.info("archived %s", result["data"]["id"]) +@twarc2.group() +@click.pass_obj +def lists(T): + """ + Lists API support. + """ + pass + + +@lists.command("lookup") +@click.argument("list_id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option("--pretty", is_flag=True, default=False, help="Pretty print the JSON") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.pass_obj +@cli_api_error +def lists_lookup(T, list_id, outfile, pretty, **kwargs): + """ + Look up a list using its list id or URL. + """ + + kwargs = _process_expansions_shortcuts(kwargs) + + if "https" in list_id: + list_id = list_id.split("/")[-1] + if not re.match("^\d+$", list_id): + click.echo(click.style("Please enter a List URL or ID", fg="red"), err=True) + result = T.list_lookup(list_id, **kwargs) + _write(result, outfile, pretty=pretty) + + @twarc2.group() @click.pass_obj def stream_rules(T): From a9ad8ba74944b44c1400b573be167f3d97e70ad4 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Fri, 11 Mar 2022 03:55:58 +0000 Subject: [PATCH 12/20] bulk lookup for lists --- twarc/command2.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/twarc/command2.py b/twarc/command2.py index 86b43424..b6f115e7 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -1942,7 +1942,7 @@ def lists(T): @cli_api_error def lists_lookup(T, list_id, outfile, pretty, **kwargs): """ - Look up a list using its list id or URL. + Look up a single list using its list id or URL. """ kwargs = _process_expansions_shortcuts(kwargs) @@ -1955,6 +1955,42 @@ def lists_lookup(T, list_id, outfile, pretty, **kwargs): _write(result, outfile, pretty=pretty) +@lists.command("bulk-lookup") +@command_line_input_output_file_arguments +@command_line_progressbar_option +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.pass_obj +@cli_api_error +def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): + """ + Look up many lists given a file of IDs or URLs. + """ + + kwargs = _process_expansions_shortcuts(kwargs) + + with FileLineProgressBar(infile, outfile, disable=hide_progress) as progress: + for list_id in infile: + progress.update() + + if "https" in list_id: + list_id = list_id.split("/")[-1] + if not re.match("^\d+$", list_id): + click.echo( + click.style("Skipping invalid List URL or ID: {line}", fg="red"), + err=True, + ) + continue + result = T.list_lookup(list_id.strip(), **kwargs) + _write(result, outfile) + + @twarc2.group() @click.pass_obj def stream_rules(T): From 16ead26be231cda4f02a2c8284f2b875f5068794 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Sun, 13 Mar 2022 00:54:32 +0000 Subject: [PATCH 13/20] add stub methods --- twarc/command2.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/twarc/command2.py b/twarc/command2.py index b6f115e7..9d83544f 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -1991,6 +1991,74 @@ def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): _write(result, outfile) +def _lists_owned(T, user, **kwargs): + pass + + +def _lists_subscribed(T, user, **kwargs): + pass + + +@lists.command("all") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_all(T, user, outfile, hide_progress, **kwargs): + """ + Get all Lists that a user created or is subscribed to. + """ + pass + +@lists.command("owned") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_owned(T, user, outfile, hide_progress, **kwargs): + """ + Get all Lists that a user created. + """ + pass + +@lists.command("subscribed") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_subscribed(T, user, outfile, hide_progress, **kwargs): + """ + Get all Lists that a user is subscribed to. + """ + + @twarc2.group() @click.pass_obj def stream_rules(T): From 327b73e097a5768b5a705d338382b8d8910d41f4 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Mon, 14 Mar 2022 04:08:41 +0000 Subject: [PATCH 14/20] rearrange functions --- twarc/client2.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/twarc/client2.py b/twarc/client2.py index 0bb4ddfd..674df31b 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -443,7 +443,7 @@ def list_memberships( pagination_token=pagination_token, ) - def followed_lists( + def owned_lists( self, user, expansions=None, @@ -453,9 +453,9 @@ def followed_lists( pagination_token=None, ): """ - Returns all Lists a specified user follows. + Returns all Lists owned by the specified user. - Calls [GET /2/users/:id/followed_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-follows/api-reference/get-users-id-followed_lists) + Calls [GET /2/users/:id/owned_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-lookup/api-reference/get-users-id-owned_lists) Args: user (int): ID of the user. @@ -470,7 +470,7 @@ def followed_lists( generator[dict]: A generator, dict for each page of results. """ user_id = self._ensure_user_id(user) - url = f"https://api.twitter.com/2/users/{user_id}/followed_lists" + url = f"https://api.twitter.com/2/users/{user_id}/owned_lists" return self._lists( url=url, @@ -481,7 +481,7 @@ def followed_lists( pagination_token=pagination_token, ) - def owned_lists( + def followed_lists( self, user, expansions=None, @@ -491,9 +491,9 @@ def owned_lists( pagination_token=None, ): """ - Returns all Lists owned by the specified user. + Returns all Lists a specified user follows. - Calls [GET /2/users/:id/owned_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-lookup/api-reference/get-users-id-owned_lists) + Calls [GET /2/users/:id/followed_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-follows/api-reference/get-users-id-followed_lists) Args: user (int): ID of the user. @@ -508,7 +508,7 @@ def owned_lists( generator[dict]: A generator, dict for each page of results. """ user_id = self._ensure_user_id(user) - url = f"https://api.twitter.com/2/users/{user_id}/owned_lists" + url = f"https://api.twitter.com/2/users/{user_id}/followed_lists" return self._lists( url=url, From a7cef387a63f39cc47d0f8936f1458b949851590 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Mon, 14 Mar 2022 04:08:52 +0000 Subject: [PATCH 15/20] followed and subscribed lists --- twarc/command2.py | 60 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/twarc/command2.py b/twarc/command2.py index d1936d7f..8cee9afc 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -2025,12 +2025,21 @@ def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): _write(result, outfile) -def _lists_owned(T, user, **kwargs): - pass - - -def _lists_subscribed(T, user, **kwargs): - pass +def _get_lists(func, user, outfile, limit, hide_progress, **kwargs): + """ + Get owned or followed lists + """ + count = 0 + with tqdm(disable=hide_progress, total=1) as progress: + _lists = func(user, **kwargs) + for result in _lists: + _write(result, outfile) + count += len(result["data"]) + if limit != 0 and count >= limit: + # Display message when stopped early + progress.desc = f"Set --limit of {limit} reached" + break + progress.update() @lists.command("all") @@ -2044,14 +2053,23 @@ def _lists_subscribed(T, user, **kwargs): help="Comma separated list of tweet fields to retrieve. Default is all available.", callback=_validate_expansions, ) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) @command_line_progressbar_option @click.pass_obj @cli_api_error -def lists_all(T, user, outfile, hide_progress, **kwargs): +def lists_all(T, user, outfile, limit, hide_progress, **kwargs): """ Get all Lists that a user created or is subscribed to. """ - pass + hide_progress = True if (outfile.name == "") else hide_progress + _get_lists(T.owned_lists, user, outfile, limit, hide_progress, **kwargs) + _get_lists(T.followed_lists, user, outfile, limit, hide_progress, **kwargs) + @lists.command("owned") @click.argument("user", type=str) @@ -2064,16 +2082,24 @@ def lists_all(T, user, outfile, hide_progress, **kwargs): help="Comma separated list of tweet fields to retrieve. Default is all available.", callback=_validate_expansions, ) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) @command_line_progressbar_option @click.pass_obj @cli_api_error -def lists_owned(T, user, outfile, hide_progress, **kwargs): +def lists_owned(T, user, outfile, limit, hide_progress, **kwargs): """ Get all Lists that a user created. """ - pass + hide_progress = True if (outfile.name == "") else hide_progress + _get_lists(T.owned_lists, user, outfile, limit, hide_progress, **kwargs) + -@lists.command("subscribed") +@lists.command("followed") @click.argument("user", type=str) @click.argument("outfile", type=click.File("w"), default="-") @click.option( @@ -2084,13 +2110,21 @@ def lists_owned(T, user, outfile, hide_progress, **kwargs): help="Comma separated list of tweet fields to retrieve. Default is all available.", callback=_validate_expansions, ) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) @command_line_progressbar_option @click.pass_obj @cli_api_error -def lists_subscribed(T, user, outfile, hide_progress, **kwargs): +def lists_followed(T, user, outfile, limit, hide_progress, **kwargs): """ - Get all Lists that a user is subscribed to. + Get all Lists that a user is following. """ + hide_progress = True if (outfile.name == "") else hide_progress + _get_lists(T.followed_lists, user, outfile, limit, hide_progress, **kwargs) @twarc2.group() From b8f1f8e985c41af5f2b6b0a6a11d976fb404fac5 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Tue, 15 Mar 2022 00:31:31 +0000 Subject: [PATCH 16/20] add memberships --- twarc/command2.py | 49 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/twarc/command2.py b/twarc/command2.py index 8cee9afc..e3638a53 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -2025,12 +2025,12 @@ def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): _write(result, outfile) -def _get_lists(func, user, outfile, limit, hide_progress, **kwargs): +def _get_lists(func, user, outfile, limit, hide_progress, default_total=1, **kwargs): """ Get owned or followed lists """ count = 0 - with tqdm(disable=hide_progress, total=1) as progress: + with tqdm(disable=hide_progress, total=default_total) as progress: _lists = func(user, **kwargs) for result in _lists: _write(result, outfile) @@ -2040,6 +2040,7 @@ def _get_lists(func, user, outfile, limit, hide_progress, **kwargs): progress.desc = f"Set --limit of {limit} reached" break progress.update() + progress.update(progress.total - progress.n) @lists.command("all") @@ -2127,6 +2128,50 @@ def lists_followed(T, user, outfile, limit, hide_progress, **kwargs): _get_lists(T.followed_lists, user, outfile, limit, hide_progress, **kwargs) +@lists.command("memberships") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_memberships(T, user, outfile, limit, hide_progress, **kwargs): + """ + Get all Lists that a user is following. + """ + hide_progress = True if (outfile.name == "") else hide_progress + user_object = T._ensure_user(user) + listed_count = 1 + if "public_metrics" in user_object: + if ( + "listed_count" in user_object["public_metrics"] + and user_object["public_metrics"]["listed_count"] > 0 + ): + listed_count = user_object["public_metrics"]["listed_count"] + _get_lists( + T.list_memberships, + user, + outfile, + limit, + hide_progress, + default_total=listed_count, + **kwargs, + ) + + @twarc2.group() @click.pass_obj def stream_rules(T): From fe681e208cfb1a8cf6cf9dd7926879843c319e22 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Tue, 15 Mar 2022 04:12:52 +0000 Subject: [PATCH 17/20] refactor with _write_with_progress --- twarc/client2.py | 1 + twarc/command2.py | 316 ++++++++++++++++++++++++++++++---------------- 2 files changed, 211 insertions(+), 106 deletions(-) diff --git a/twarc/client2.py b/twarc/client2.py index 674df31b..da025c81 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -1457,6 +1457,7 @@ def liked_tweets( Retrieve the tweets liked by the given user_id. """ + user_id = self._ensure_user_id(user_id) url = f"https://api.twitter.com/2/users/{user_id}/liked_tweets" params = self._prepare_params( diff --git a/twarc/command2.py b/twarc/command2.py index e3638a53..54e0f941 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -9,6 +9,7 @@ import time import twarc import click +import inspect import logging import pathlib import datetime @@ -811,26 +812,26 @@ def followers(T, user, outfile, limit, max_results, hide_progress): """ Get the followers for a given user. """ - count = 0 user_id = None - lookup_total = 0 + lookup_total = 1 - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: target_user = T._ensure_user(user) user_id = target_user["id"] lookup_total = target_user["public_metrics"]["followers_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.followers(user, user_id=user_id, max_results=max_results): - _write(result, outfile) - count += len(result["data"]) - progress.update(len(result["data"])) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.followers, + user=user, + user_id=user_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("following") @@ -855,26 +856,26 @@ def following(T, user, outfile, limit, max_results, hide_progress): """ Get the users that a given user is following. """ - count = 0 user_id = None - lookup_total = 0 + lookup_total = 1 - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: target_user = T._ensure_user(user) user_id = target_user["id"] lookup_total = target_user["public_metrics"]["following_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.following(user, user_id=user_id, max_results=max_results): - _write(result, outfile) - count += len(result["data"]) - progress.update(len(result["data"])) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.following, + user=user, + user_id=user_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("liking-users") @@ -902,14 +903,12 @@ def liking_users(T, tweet_id, outfile, limit, max_results, hide_progress): Note that the progress bar is approximate. """ - count = 0 - lookup_total = 0 + lookup_total = 1 if not re.match("^\d+$", str(tweet_id)): click.echo(click.style("Please enter a tweet ID", fg="red"), err=True) - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: # TODO: we could probably do this everytime, and avoid doing any lookups @@ -918,14 +917,15 @@ def liking_users(T, tweet_id, outfile, limit, max_results, hide_progress): if "data" in target_tweet: lookup_total = target_tweet["data"][0]["public_metrics"]["like_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.liking_users(tweet_id, max_results=max_results): - _write(result, outfile) - count += len(result.get("data", [])) - progress.update(len(result.get("data", []))) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.liking_users, + tweet_id=tweet_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("retweeted-by") @@ -953,14 +953,12 @@ def retweeted_by(T, tweet_id, outfile, limit, max_results, hide_progress): Note that the progress bar is approximate. """ - count = 0 lookup_total = 0 if not re.match("^\d+$", str(tweet_id)): click.echo(click.style("Please enter a tweet ID", fg="red"), err=True) - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: # TODO: we could probably do this everytime, and avoid doing any lookups @@ -969,14 +967,15 @@ def retweeted_by(T, tweet_id, outfile, limit, max_results, hide_progress): if "data" in target_tweet: lookup_total = target_tweet["data"][0]["public_metrics"]["retweet_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.retweeted_by(tweet_id, max_results=max_results): - _write(result, outfile) - count += len(result.get("data", [])) - progress.update(len(result.get("data", []))) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.retweeed_by, + tweet_id=tweet_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("liked-tweets") @@ -1004,26 +1003,18 @@ def liked_tweets(T, user_id, outfile, limit, max_results, hide_progress): Note that the progress bar is approximate. """ - count = 0 - lookup_total = 0 - - if not re.match("^\d+$", str(user_id)): - click.echo(click.style("Please enter a user ID", fg="red"), err=True) - - if outfile is not None and (outfile.name == ""): - hide_progress = True # NB: there doesn't appear to be anyway to get the total count of likes # a user has made, so the progress bar isn't very useful in this case... - - with tqdm(disable=hide_progress) as progress: - for result in T.liked_tweets(user_id, max_results=max_results): - _write(result, outfile) - count += len(result.get("data", [])) - progress.update(len(result.get("data", []))) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.liked_tweets, + user_id=user_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + max_results=max_results, + ) @twarc2.command("sample") @@ -1228,7 +1219,7 @@ def mentions(T, user_id, outfile, hide_progress, **kwargs): with tqdm(disable=hide_progress, total=800) as progress: for result in T.mentions(user_id, **kwargs): _write(result, outfile) - progress.update(len(result["data"])) + progress.update(len(result.get("data", []))) else: if progress.n > 800: progress.desc = f"API limit reached with {progress.n} tweets" @@ -2025,24 +2016,6 @@ def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): _write(result, outfile) -def _get_lists(func, user, outfile, limit, hide_progress, default_total=1, **kwargs): - """ - Get owned or followed lists - """ - count = 0 - with tqdm(disable=hide_progress, total=default_total) as progress: - _lists = func(user, **kwargs) - for result in _lists: - _write(result, outfile) - count += len(result["data"]) - if limit != 0 and count >= limit: - # Display message when stopped early - progress.desc = f"Set --limit of {limit} reached" - break - progress.update() - progress.update(progress.total - progress.n) - - @lists.command("all") @click.argument("user", type=str) @click.argument("outfile", type=click.File("w"), default="-") @@ -2067,9 +2040,25 @@ def lists_all(T, user, outfile, limit, hide_progress, **kwargs): """ Get all Lists that a user created or is subscribed to. """ - hide_progress = True if (outfile.name == "") else hide_progress - _get_lists(T.owned_lists, user, outfile, limit, hide_progress, **kwargs) - _get_lists(T.followed_lists, user, outfile, limit, hide_progress, **kwargs) + kwargs = _process_expansions_shortcuts(kwargs) + _write_with_progress( + func=T.owned_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) + _write_with_progress( + func=T.followed_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) @lists.command("owned") @@ -2096,8 +2085,16 @@ def lists_owned(T, user, outfile, limit, hide_progress, **kwargs): """ Get all Lists that a user created. """ - hide_progress = True if (outfile.name == "") else hide_progress - _get_lists(T.owned_lists, user, outfile, limit, hide_progress, **kwargs) + kwargs = _process_expansions_shortcuts(kwargs) + _write_with_progress( + func=T.owned_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) @lists.command("followed") @@ -2124,8 +2121,16 @@ def lists_followed(T, user, outfile, limit, hide_progress, **kwargs): """ Get all Lists that a user is following. """ - hide_progress = True if (outfile.name == "") else hide_progress - _get_lists(T.followed_lists, user, outfile, limit, hide_progress, **kwargs) + kwargs = _process_expansions_shortcuts(kwargs) + _write_with_progress( + func=T.followed_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) @lists.command("memberships") @@ -2150,24 +2155,100 @@ def lists_followed(T, user, outfile, limit, hide_progress, **kwargs): @cli_api_error def lists_memberships(T, user, outfile, limit, hide_progress, **kwargs): """ - Get all Lists that a user is following. + Get all Lists that a user is a member of. """ + kwargs = _process_expansions_shortcuts(kwargs) + lookup_total = 1 + hide_progress = True if (outfile.name == "") else hide_progress - user_object = T._ensure_user(user) - listed_count = 1 - if "public_metrics" in user_object: - if ( - "listed_count" in user_object["public_metrics"] - and user_object["public_metrics"]["listed_count"] > 0 - ): - listed_count = user_object["public_metrics"]["listed_count"] - _get_lists( - T.list_memberships, - user, - outfile, - limit, - hide_progress, - default_total=listed_count, + + if not hide_progress: + target_user = T._ensure_user(user) + lookup_total = target_user["public_metrics"]["listed_count"] + + _write_with_progress( + func=T.list_memberships, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + **kwargs, + ) + + +@lists.command("followers") +@click.argument("list-id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_expansions_options +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_followers(T, list_id, outfile, limit, hide_progress, **kwargs): + """ + Get all Users that are following (subscribed) to a list. + """ + kwargs = _process_expansions_shortcuts(kwargs) + # Also remove media poll and place from kwargs, these are not valid for this endpoint: + kwargs.pop("media_fields", None) + kwargs.pop("poll_fields", None) + kwargs.pop("place_fields", None) + + _list = ensure_flattened(T.list_lookup(list_id))[-1] + list_id = _list["id"] + lookup_total = int(_list["follower_count"]) + + _write_with_progress( + func=T.list_followers, + list_id=list_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + **kwargs, + ) + + +@lists.command("members") +@click.argument("list-id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_expansions_options +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_members(T, list_id, outfile, limit, hide_progress, **kwargs): + """ + Get all Users that are members of a list. + """ + kwargs = _process_expansions_shortcuts(kwargs) + # Also remove media poll and place from kwargs, these are not valid for this endpoint: + kwargs.pop("media_fields", None) + kwargs.pop("poll_fields", None) + kwargs.pop("place_fields", None) + + _list = ensure_flattened(T.list_lookup(list_id))[-1] + list_id = _list["id"] + lookup_total = int(_list["member_count"]) + + _write_with_progress( + func=T.list_members, + list_id=list_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, **kwargs, ) @@ -2770,3 +2851,26 @@ def _error_str(errors): def _write(results, outfile, pretty=False): indent = 2 if pretty else None click.echo(json.dumps(results, indent=indent), file=outfile) + + +def _write_with_progress( + func, outfile, limit, hide_progress, progress_total=1, **kwargs +): + """ + Get results page by page and write them out with a progress bar + """ + count = 0 + hide_progress = True if (outfile.name == "") else hide_progress + + with tqdm(disable=hide_progress, total=progress_total) as progress: + results = func(**kwargs) + for result in results: + _write(result, outfile) + count += len(result.get("data", [])) + progress.update(len(result.get("data", []))) + if limit != 0 and count >= limit: + # Display message when stopped early + progress.desc = f"Set --limit of {limit} reached" + break + # Finish the progress bar + progress.update(progress.total - progress.n) From f047fe72a627217150892e1d0e7da6e6e5ee88e5 Mon Sep 17 00:00:00 2001 From: Igor Brigadir Date: Tue, 15 Mar 2022 04:26:40 +0000 Subject: [PATCH 18/20] list tweets command --- twarc/command2.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/twarc/command2.py b/twarc/command2.py index 54e0f941..4d034751 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -9,7 +9,6 @@ import time import twarc import click -import inspect import logging import pathlib import datetime @@ -2253,6 +2252,40 @@ def lists_members(T, list_id, outfile, limit, hide_progress, **kwargs): ) +@lists.command("tweets") +@click.argument("list-id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--limit", + default=0, + help="Maximum number of tweets to save. Default and max is last 800.", + type=int, +) +@command_line_expansions_options +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_tweets(T, list_id, outfile, limit, hide_progress, **kwargs): + """ + Get all Users that are members of a list. + """ + kwargs = _process_expansions_shortcuts(kwargs) + # Also remove media poll and place from kwargs, these are not valid for this endpoint: + kwargs.pop("media_fields", None) + kwargs.pop("poll_fields", None) + kwargs.pop("place_fields", None) + + _write_with_progress( + func=T.list_tweets, + list_id=list_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=800, + **kwargs, + ) + + @twarc2.group() @click.pass_obj def stream_rules(T): From 42f0be4e5bc7baeb728f51d7a21117ae77f41337 Mon Sep 17 00:00:00 2001 From: Sam Hames Date: Thu, 17 Mar 2022 10:21:33 +1000 Subject: [PATCH 19/20] Minor docstring tweaks for clarity --- twarc/command2.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/twarc/command2.py b/twarc/command2.py index 4d034751..97fbaba9 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -2038,6 +2038,11 @@ def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): def lists_all(T, user, outfile, limit, hide_progress, **kwargs): """ Get all Lists that a user created or is subscribed to. + + You can use the `owned` or `followed` command to get just the lists + created by the user, or just the lists followed by the user + respectively. + """ kwargs = _process_expansions_shortcuts(kwargs) _write_with_progress( @@ -2267,7 +2272,7 @@ def lists_members(T, list_id, outfile, limit, hide_progress, **kwargs): @cli_api_error def lists_tweets(T, list_id, outfile, limit, hide_progress, **kwargs): """ - Get all Users that are members of a list. + Get up to the most recent 800 tweets posted by members of a list. """ kwargs = _process_expansions_shortcuts(kwargs) # Also remove media poll and place from kwargs, these are not valid for this endpoint: From 9c1f0c46fef0a6f3e3a1e39549a83091108768d8 Mon Sep 17 00:00:00 2001 From: Sam Hames Date: Thu, 17 Mar 2022 10:31:54 +1000 Subject: [PATCH 20/20] Tweak docstring and help text for list bulk-lookup command --- twarc/command2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twarc/command2.py b/twarc/command2.py index 97fbaba9..88fcf4ca 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -1987,14 +1987,14 @@ def lists_lookup(T, list_id, outfile, pretty, **kwargs): default=",".join(LIST_FIELDS), type=click.STRING, is_eager=True, - help="Comma separated list of tweet fields to retrieve. Default is all available.", + help="Comma separated list of fields about a list to retrieve. Default is all available.", callback=_validate_expansions, ) @click.pass_obj @cli_api_error def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): """ - Look up many lists given a file of IDs or URLs. + Look up the details of many lists given a file of IDs or URLs. """ kwargs = _process_expansions_shortcuts(kwargs)