Skip to content

Commit

Permalink
Merge branch 'retry-twitter' into 'master'
Browse files Browse the repository at this point in the history
Try to robustize twitter tasks by raising `retry_count`

See merge request Museum-Barberini/Barberini-Analytics!429
  • Loading branch information
LinqLover committed Jan 2, 2022
2 parents 42b5848 + 08a1569 commit 20565a8
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
2 changes: 2 additions & 0 deletions luigi.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ password=${SMTP_PASSWORD}
[worker]
timeout=600
# (600 seconds = 10 minutes)
keep-alive=True
# Required for using per-task retry policy (retry_count)

[core]
log_level=INFO
Expand Down
5 changes: 5 additions & 0 deletions src/extended_twitter_collection/collect_tweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ class TwitterCollectCandidateTweets(DataPreparationTask):
# for a given keyword-interval
collection_r_limit = luigi.IntParameter(default=50)

# twint fails sporadically with RefreshTokenException on our VM as Twitter
# is blocking too many accesses from certain IPs.
# See https://github.com/twintproject/twint/issues/957.
retry_count = 3

def requires(self):
return KeywordIntervalsToDB()

Expand Down
5 changes: 5 additions & 0 deletions src/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ class FetchTwitter(DataPreparationTask):
default=dt.timedelta(weeks=2),
description="For how many days tweets should be fetched")

# twint fails sporadically with RefreshTokenException on our VM as Twitter
# is blocking too many accesses from certain IPs.
# See https://github.com/twintproject/twint/issues/957.
retry_count = 3

def output(self):
return luigi.LocalTarget(
f'{self.output_dir}/twitter/raw_tweets.csv',
Expand Down

0 comments on commit 20565a8

Please sign in to comment.