From 9e835ad75546fc3948100d7c22363b9fbe545127 Mon Sep 17 00:00:00 2001 From: Alon Albalak Date: Fri, 31 May 2024 09:08:19 -0700 Subject: [PATCH] run precommit hooks --- pubmedcentral/to-dolma.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pubmedcentral/to-dolma.py b/pubmedcentral/to-dolma.py index 9c1b17c..e0019a9 100644 --- a/pubmedcentral/to-dolma.py +++ b/pubmedcentral/to-dolma.py @@ -4,6 +4,7 @@ import json import os +from licensed_pile import logs from licensed_pile.licenses import PermissiveLicenses from licensed_pile.write import to_dolma @@ -50,8 +51,12 @@ def format_dolma( ): file, journal, accessionID, _, lic = file.split("\t") file = os.path.basename(file).replace("tar.gz", "md") - with open(os.path.join(data_dir, file)) as f: - text = f.read() + try: + with open(os.path.join(data_dir, file)) as f: + text = f.read() + except FileNotFoundError: + logger = logs.get_logger("pubmedcentral") + logger.error(f"File {os.path.join(data_dir, file)} does not exist") with open( os.path.join(args.metadata_dir, f"{os.path.splitext(file)[0]}.json"),