Skip to content

Commit

Permalink
correct load unfinished atlas expansion accounting for key errors wit…
Browse files Browse the repository at this point in the history
…h atlas history
  • Loading branch information
Nathaniel Imel authored and Nathaniel Imel committed Dec 30, 2023
1 parent afe99ef commit bc72e06
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 3 deletions.
5 changes: 5 additions & 0 deletions src/examples/scratch/outputs/default_atlas_dir/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# README

This atlas dir is a good place to develop. A commannd one can use is

python main.py --bibtex_fp data/Imeletal2022a.bib --atlas_dir outputs/default_atlas_dir --target_size 100 --max_pubs_per_expand 20
4 changes: 3 additions & 1 deletion src/sciterra/mapping/atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
publications: list[Publication],
projection: Projection = None,
bad_ids: set[str] = set(),
history: dict[str, Any] = dict(),
history: dict[str, Any] = None,
center: Publication = None,
) -> None:
if not isinstance(publications, list):
Expand Down Expand Up @@ -104,6 +104,8 @@ def save(
fp = os.path.join(atlas_dirpath, fn)
if os.path.isfile(fp):
warnings.warn(f"Overwriting existing file at {fp}.")
else:
warnings.warn(f"Writing to {fp}.")
write_pickle(fp, attributes[attribute])
else:
warnings.warn(f"No {attribute} to save, skipping.")
Expand Down
10 changes: 8 additions & 2 deletions src/sciterra/mapping/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,14 @@ def iterate_expand(

# Expansion loop
failures = 0
its = 0
# Count previous iterations from loaded atlas as part of total
its = len(atl.history['pubs_per_update']) if atl.history is not None else 0
while not converged:
its += 1
len_prev = len(atl)

print(f"\nExpansion {its}\n-------------------------------")

# Retrieve up to n_pubs_max citations and references.
atl = crt.expand(
atl,
Expand Down Expand Up @@ -153,7 +156,9 @@ def __init__(
f"Loaded atlas has {len(atl)} publications and {len(atl.projection) if atl.projection is not None else 'None'} embeddings.\n"
)
# Crucial step: align the history of crt with atl
self.cartographer.pubs_per_update = atl.history["pubs_per_update"]
if atl.history is not None:
self.cartographer.pubs_per_update = atl.history["pubs_per_update"]
print(f"Loaded atlas at expansion iteration {len(atl.history['pubs_per_update'])}.")
else:
print(f"Initializing atlas.")

Expand All @@ -162,6 +167,7 @@ def __init__(

# Get center from file
atl_center = self.cartographer.bibtex_to_atlas(bibtex_fp)
atl_center = self.cartographer.project(atl_center)

num_entries = len(atl_center.publications.values())
if num_entries > 1:
Expand Down
3 changes: 3 additions & 0 deletions src/tests/test_cartography.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def test_dummy_projection(self):
"identifier": f"id_{i}",
"abstract": "blah blah blah",
"publication_date": datetime(2023, 1, 1),
"fields_of_study": ["dummy_field"],
}
)
for i in range(10)
Expand All @@ -180,6 +181,7 @@ def test_dummy_projection_partial(self):
"identifier": f"id_{0}",
"abstract": "We use cosmological hydrodynamic simulations with stellar feedback from the FIRE (Feedback In Realistic Environments) project to study the physical nature of Lyman limit systems (LLSs) at z ≤ 1.", # everything here should be in the Word2Vec default vocab, since it trains on this abstract.
"publication_date": datetime(2023, 1, 1),
"fields_of_study": ["dummy_field"],
}
),
Publication(
Expand All @@ -194,6 +196,7 @@ def test_dummy_projection_partial(self):
"identifier": f"id_{2}",
"abstract": "We use cosmological hydrodynamic simulations with stellar feedback from the FIRE (Feedback In Realistic Environments) project to study the physical nature of Lyman limit systems (LLSs) at z ≤ 1.",
"publication_date": datetime(2023, 1, 1),
"fields_of_study": ["dummy_field"],
}
),
]
Expand Down

0 comments on commit bc72e06

Please sign in to comment.