Skip to content

Commit

Permalink
run precommit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
alon-albalak committed May 31, 2024
1 parent 796b6cc commit 9e835ad
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions pubmedcentral/to-dolma.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import os

from licensed_pile import logs
from licensed_pile.licenses import PermissiveLicenses
from licensed_pile.write import to_dolma

Expand Down Expand Up @@ -50,8 +51,12 @@ def format_dolma(
):
file, journal, accessionID, _, lic = file.split("\t")
file = os.path.basename(file).replace("tar.gz", "md")
with open(os.path.join(data_dir, file)) as f:
text = f.read()
try:
with open(os.path.join(data_dir, file)) as f:
text = f.read()
except FileNotFoundError:
logger = logs.get_logger("pubmedcentral")
logger.error(f"File {os.path.join(data_dir, file)} does not exist")

with open(
os.path.join(args.metadata_dir, f"{os.path.splitext(file)[0]}.json"),
Expand Down

0 comments on commit 9e835ad

Please sign in to comment.