From 58a8868e5a82888c6ba1a9e9f75442aad891ef05 Mon Sep 17 00:00:00 2001 From: Christoph Thiede Date: Wed, 22 Dec 2021 14:33:59 +0100 Subject: [PATCH] Try to robustize twitter tasks by raising retry_count twint fails sporadically with a RefreshTokenException on our VM as Twitter is blocking too many accesses from certain IPs. See: https://github.com/twintproject/twint/issues/957. --- luigi.cfg | 2 ++ src/extended_twitter_collection/collect_tweets.py | 5 +++++ src/twitter.py | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/luigi.cfg b/luigi.cfg index 74efe2fb..b497b477 100644 --- a/luigi.cfg +++ b/luigi.cfg @@ -25,6 +25,8 @@ password=${SMTP_PASSWORD} [worker] timeout=600 # (600 seconds = 10 minutes) +keep-alive=True +# Required for using per-task retry policy (retry_count) [core] log_level=INFO diff --git a/src/extended_twitter_collection/collect_tweets.py b/src/extended_twitter_collection/collect_tweets.py index 7505d508..e1caad60 100644 --- a/src/extended_twitter_collection/collect_tweets.py +++ b/src/extended_twitter_collection/collect_tweets.py @@ -116,6 +116,11 @@ class TwitterCollectCandidateTweets(DataPreparationTask): # for a given keyword-interval collection_r_limit = luigi.IntParameter(default=50) + # twint fails sporadically with RefreshTokenException on our VM as Twitter + # is blocking too many accesses from certain IPs. + # See https://github.com/twintproject/twint/issues/957. + retry_count = 3 + def requires(self): return KeywordIntervalsToDB() diff --git a/src/twitter.py b/src/twitter.py index 032b675f..919ab0fa 100644 --- a/src/twitter.py +++ b/src/twitter.py @@ -119,6 +119,11 @@ class FetchTwitter(DataPreparationTask): default=dt.timedelta(weeks=2), description="For how many days tweets should be fetched") + # twint fails sporadically with RefreshTokenException on our VM as Twitter + # is blocking too many accesses from certain IPs. + # See https://github.com/twintproject/twint/issues/957. + retry_count = 3 + def output(self): return luigi.LocalTarget( f'{self.output_dir}/twitter/raw_tweets.csv',