Skip to content

Commit

Permalink
feat: Remove empty spider directories
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Aug 19, 2024
1 parent 7574e10 commit fb6a588
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions data_registry/process_manager/task/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,13 @@ def wipe(self):
# 2001-02-03 04:05:06 Kingfisher Collect's Kingfisher Process API extension
# 2001-02-03T04:05:06 Kingfisher Process
data_version = data_version.translate(str.maketrans(" T", "__", "-:"))
path = f"{settings.KINGFISHER_COLLECT_FILES_STORE}/{self.spider}/{data_version}"
if os.path.exists(path):
spider_path = os.path.join(settings.KINGFISHER_COLLECT_FILES_STORE, self.spider)
crawl_path = os.path.join(spider_path, data_version)
if os.path.exists(crawl_path):
try:
shutil.rmtree(path)
shutil.rmtree(crawl_path)
with os.scandir(spider_path) as it:
if not any(it):
os.rmdir(spider_path)
except OSError:
raise RecoverableException(f"Unable to wipe the Scrapyd job {scrapyd_job_id} at {path}")
raise RecoverableException(f"Unable to wipe the Scrapyd job {scrapyd_job_id} at {crawl_path}")

0 comments on commit fb6a588

Please sign in to comment.