diff --git a/.gitignore b/.gitignore index b5b13f8..e4ae4d9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,9 @@ *.py[cod] *$py.class +# no DS_Store! +**/.DS_Store + # C extensions *.so diff --git a/src/examples/.DS_Store b/src/examples/.DS_Store deleted file mode 100644 index 29e7c00..0000000 Binary files a/src/examples/.DS_Store and /dev/null differ diff --git a/src/examples/quickstart.ipynb b/src/examples/quickstart.ipynb index 3c1d35a..6b8ff9b 100644 --- a/src/examples/quickstart.ipynb +++ b/src/examples/quickstart.ipynb @@ -14,16 +14,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -46,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -60,8 +53,44 @@ "name": "stderr", "output_type": "stream", "text": [ - "progress using call_size=10: 100%|██████████| 1/1 [00:02<00:00, 2.83s/it]\n", - "100%|██████████| 1/1 [00:00<00:00, 24244.53it/s]\n" + "progress using call_size=10: 100%|██████████| 1/1 [00:02<00:00, 2.77s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:00<00:00, 20867.18it/s]\n" ] } ], @@ -72,14 +101,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "embedding documents: 64it [00:00, 251.67it/s] \n" + "embedding documents: 64it [00:00, 238.48it/s] \n" ] }, { @@ -88,7 +117,7 @@ "(array([0]), array([], dtype=int64))" ] }, - "execution_count": 3, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -110,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -126,7 +155,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "embedding documents: 64it [00:00, 232.51it/s] \n" + "embedding documents: 64it [00:00, 153.23it/s] \n" ] }, { @@ -141,21 +170,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "progress using call_size=10: 86%|████████▌ | 250/291 [02:41<00:41, 1.02s/it]" + "progress using call_size=10: 17%|█▋ | 50/291 [00:42<04:30, 1.12s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Had to call .get_papers at 0x284c6c7c0> 2 times to get a response.\n" + "Had to call .get_papers at 0x2b6e88540> 2 times to get a response.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "progress using call_size=10: 100%|██████████| 291/291 [03:04<00:00, 1.57it/s]\n" + "progress using call_size=10: 100%|██████████| 291/291 [02:56<00:00, 1.64it/s]\n" ] }, { @@ -192,13 +221,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 291/291 [00:00<00:00, 2941066.18it/s]\n", - "Recursively creating atlas data directory at atlas.\n", - "Writing to atlas/publications.pkl.\n", - "Writing to atlas/projection.pkl.\n", - "Writing to atlas/bad_ids.pkl.\n", + "100%|██████████| 291/291 [00:00<00:00, 2506247.36it/s]\n", + "Overwriting existing file at atlas/publications.pkl.\n", + "Overwriting existing file at atlas/projection.pkl.\n", + "Overwriting existing file at atlas/bad_ids.pkl.\n", "No history to save, skipping.\n", - "Writing to atlas/center.pkl.\n", + "Overwriting existing file at atlas/center.pkl.\n", "18 publications were filtered due to missing crucial data or incorrect field of study. There are now 18 total ids that will be excluded in the future.\n", "Found 273 publications not contained in Atlas projection.\n", "Embedding 273 total documents.\n" @@ -215,11 +243,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "embedding documents: 320it [01:27, 3.67it/s] \n", - "Overwriting existing file at atlas/publications.pkl.\n", - "Overwriting existing file at atlas/projection.pkl.\n", - "Overwriting existing file at atlas/bad_ids.pkl.\n", - "Overwriting existing file at atlas/center.pkl.\n" + "embedding documents: 320it [01:34, 3.38it/s] \n" ] }, { @@ -236,108 +260,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:00<00:00, 312.70it/s]\n", - "calculating converged kernel size: 100%|██████████| 274/274 [00:00<00:00, 34918.55it/s]\n", - "Overwriting existing file at atlas/publications.pkl.\n", - "Overwriting existing file at atlas/projection.pkl.\n", - "Overwriting existing file at atlas/bad_ids.pkl.\n", - "Writing to atlas/history.pkl.\n", - "Overwriting existing file at atlas/center.pkl.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Expansion 2\n", - "-------------------------------\n", - "Expansion will include 500 new publications.\n", - "Querying Semantic Scholar for 500 total papers.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 100%|██████████| 500/500 [02:50<00:00, 2.94it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 500/500 [00:00<00:00, 3052623.00it/s]\n", - "Overwriting existing file at atlas/publications.pkl.\n", - "Overwriting existing file at atlas/projection.pkl.\n", - "Overwriting existing file at atlas/bad_ids.pkl.\n", - "No history to save, skipping.\n", - "Overwriting existing file at atlas/center.pkl.\n", - "222 publications were filtered due to missing crucial data or incorrect field of study. There are now 240 total ids that will be excluded in the future.\n", - "Found 278 publications not contained in Atlas projection.\n", - "Embedding 278 total documents.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Atlas has 774 publications and 274 embeddings.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "embedding documents: 320it [01:28, 3.60it/s] \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Atlas has 552 publications and 552 embeddings.\n", - "Tracking atlas...\n", - "Calculating degree of convergence for all publications.\n", - "computing cosine similarity for 552 embeddings with batch size 552.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:00<00:00, 158.11it/s]\n", - "calculating converged kernel size: 100%|██████████| 552/552 [00:00<00:00, 19278.38it/s]\n", + "100%|██████████| 1/1 [00:00<00:00, 274.87it/s]\n", + "calculating converged kernel size: 100%|██████████| 274/274 [00:00<00:00, 35255.98it/s]\n", "Overwriting existing file at atlas/publications.pkl.\n", "Overwriting existing file at atlas/projection.pkl.\n", "Overwriting existing file at atlas/bad_ids.pkl.\n", @@ -350,271 +274,17 @@ "output_type": "stream", "text": [ "\n", - "Expansion 3\n", - "-------------------------------\n", - "Expansion will include 500 new publications.\n", - "Querying Semantic Scholar for 500 total papers.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 88%|████████▊ | 440/500 [04:38<00:53, 1.12it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x284c6e0c0> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 100%|██████████| 500/500 [05:09<00:00, 1.61it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 500/500 [00:00<00:00, 4032984.62it/s]\n", - "Overwriting existing file at atlas/publications.pkl.\n", - "Overwriting existing file at atlas/projection.pkl.\n", - "Overwriting existing file at atlas/bad_ids.pkl.\n", - "No history to save, skipping.\n", - "Overwriting existing file at atlas/center.pkl.\n", - "139 publications were filtered due to missing crucial data or incorrect field of study. There are now 379 total ids that will be excluded in the future.\n", - "Found 361 publications not contained in Atlas projection.\n", - "Embedding 361 total documents.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Atlas has 1052 publications and 552 embeddings.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "embedding documents: 384it [01:51, 3.44it/s] \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Atlas has 913 publications and 913 embeddings.\n", - "Tracking atlas...\n", - "Calculating degree of convergence for all publications.\n", - "computing cosine similarity for 913 embeddings with batch size 913.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:00<00:00, 83.34it/s]\n", - "calculating converged kernel size: 100%|██████████| 913/913 [00:00<00:00, 11717.44it/s]\n", - "Overwriting existing file at atlas/publications.pkl.\n", - "Overwriting existing file at atlas/projection.pkl.\n", - "Overwriting existing file at atlas/bad_ids.pkl.\n", - "Overwriting existing file at atlas/history.pkl.\n", - "Overwriting existing file at atlas/center.pkl.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Expansion 4\n", + "Expansion 2\n", "-------------------------------\n", - "Expansion will include 500 new publications.\n", - "Querying Semantic Scholar for 500 total papers.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 6%|▌ | 30/500 [00:37<11:23, 1.46s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x2b6e8b740> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 12%|█▏ | 60/500 [01:15<10:38, 1.45s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x2b6e8a020> 2 times to get a response.\n" + "Expansion will include 4000 new publications.\n", + "Querying Semantic Scholar for 4000 total papers.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "progress using call_size=10: 14%|█▍ | 70/500 [01:35<11:50, 1.65s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x2848f84a0> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 18%|█▊ | 90/500 [02:04<11:00, 1.61s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x284c6c7c0> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 40%|████ | 200/500 [03:34<05:25, 1.09s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x284c6d800> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 58%|█████▊ | 290/500 [04:53<04:03, 1.16s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x28528d440> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 82%|████████▏ | 410/500 [06:37<02:12, 1.47s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x28528e520> 3 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 94%|█████████▍| 470/500 [07:32<00:37, 1.25s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x28528e840> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 96%|█████████▌| 480/500 [07:52<00:29, 1.47s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x28528e980> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "progress using call_size=10: 100%|██████████| 500/500 [08:15<00:00, 1.01it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Had to call .get_papers at 0x28528ea20> 2 times to get a response.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" + "progress using call_size=10: 100%|██████████| 4000/4000 [22:14<00:00, 3.00it/s]\n" ] }, { @@ -651,47 +321,47 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 500/500 [00:00<00:00, 3355443.20it/s]\n", + "100%|██████████| 4000/4000 [00:00<00:00, 4571448.50it/s]\n", "Overwriting existing file at atlas/publications.pkl.\n", "Overwriting existing file at atlas/projection.pkl.\n", "Overwriting existing file at atlas/bad_ids.pkl.\n", "No history to save, skipping.\n", "Overwriting existing file at atlas/center.pkl.\n", - "167 publications were filtered due to missing crucial data or incorrect field of study. There are now 546 total ids that will be excluded in the future.\n", - "Found 333 publications not contained in Atlas projection.\n", - "Embedding 333 total documents.\n" + "1921 publications were filtered due to missing crucial data or incorrect field of study. There are now 1939 total ids that will be excluded in the future.\n", + "Found 2079 publications not contained in Atlas projection.\n", + "Embedding 2079 total documents.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Atlas has 1413 publications and 913 embeddings.\n" + "Atlas has 4274 publications and 274 embeddings.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "embedding documents: 384it [01:34, 4.06it/s] \n" + "embedding documents: 2112it [11:21, 3.10it/s] \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Atlas has 1246 publications and 1246 embeddings.\n", + "Atlas has 2353 publications and 2353 embeddings.\n", "Tracking atlas...\n", "Calculating degree of convergence for all publications.\n", - "computing cosine similarity for 1246 embeddings with batch size 1000.\n" + "computing cosine similarity for 2353 embeddings with batch size 1000.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 2/2 [00:00<00:00, 55.78it/s]\n", - "calculating converged kernel size: 100%|██████████| 1246/1246 [00:00<00:00, 8390.90it/s]\n", + "100%|██████████| 3/3 [00:00<00:00, 37.68it/s]\n", + "calculating converged kernel size: 100%|██████████| 2353/2353 [00:00<00:00, 4612.18it/s]\n", "Overwriting existing file at atlas/publications.pkl.\n", "Overwriting existing file at atlas/projection.pkl.\n", "Overwriting existing file at atlas/bad_ids.pkl.\n", @@ -706,44 +376,35 @@ "Exiting loop.\n", "Tracking atlas...\n", "Calculating degree of convergence for all publications.\n", - "computing cosine similarity for 1246 embeddings with batch size 1000.\n" + "computing cosine similarity for 2353 embeddings with batch size 1000.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 2/2 [00:00<00:00, 111.06it/s]\n", - "calculating converged kernel size: 100%|██████████| 1246/1246 [00:00<00:00, 8386.33it/s]\n", - "Overwriting existing file at atlas/publications.pkl.\n" + "100%|██████████| 3/3 [00:00<00:00, 53.57it/s]\n", + "calculating converged kernel size: 100%|██████████| 2353/2353 [00:00<00:00, 4600.66it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Expansion loop exited with atlas size 1246 after 4 iterations meeting criteria: {'target_size': True, 'max_failed_expansions': False, 'convergence_func': False}.\n" + "Expansion loop exited with atlas size 2353 after 2 iterations meeting criteria: {'target_size': True, 'max_failed_expansions': False, 'convergence_func': False}.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ + "\n", + "Overwriting existing file at atlas/publications.pkl.\n", "Overwriting existing file at atlas/projection.pkl.\n", "Overwriting existing file at atlas/bad_ids.pkl.\n", "Overwriting existing file at atlas/history.pkl.\n", "Overwriting existing file at atlas/center.pkl.\n" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -763,21 +424,33 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['04da6471743468b6bb1d26dd9a6eac4c03ca73ee']" + "['The Astrophysics Source Code Library (ASCL), founded in 1999, is a free on-line registry for source codes of interest to astronomers and astrophysicists. The library is housed on the discussion forum for Astronomy Picture of the Day (APOD) and can be accessed at this http URL The ASCL has a comprehensive listing that covers a significant number of the astrophysics source codes used to generate results published in or submitted to refereed journals and continues to grow. The ASCL currently has entries for over 500 codes; its records are citable and are indexed by ADS. The editors of the ASCL and members of its Advisory Committee were on hand at a demonstration table in the ADASS poster room to present the ASCL, accept code submissions, show how the ASCL is starting to be used by the astrophysics community, and take questions on and suggestions for improving the resource.',\n", + " 'Observations with the Hubble Space Telescope show that halos of ionized gas are common around star-forming galaxies. The circumgalactic medium (CGM) is fed by galaxy outflows and accretion of intergalactic gas, but its mass, heavy element enrichment, and relation to galaxy properties are poorly constrained by observations. In a survey of the outskirts of 42 galaxies with the Cosmic Origins Spectrograph onboard the Hubble Space Telescope, we detected ubiquitous, large (150-kiloparsec) halos of ionized oxygen surrounding star-forming galaxies; we found much less ionized oxygen around galaxies with little or no star formation. This ionized CGM contains a substantial mass of heavy elements and gas, perhaps far exceeding the reservoirs of gas in the galaxies themselves. Our data indicate that it is a basic component of nearly all star-forming galaxies that is removed or transformed during the quenching of star formation and the transition to passive evolution.',\n", + " 'We present an analysis of the metallicity distribution of the dense circumgalactic medium (CGM) of galaxies at 0.1 ≲ z ≲ 1.1 as probed by partial Lyman limit systems (pLLSs, 16.1 < log < 17.2) and LLSs (17.2 ≤ log < 17.7 in our sample). The new H i-selected sample, drawn from our Hubble Space Telescope COS G140L snapshot survey of 61 QSOs, has 20 pLLSs and 10 LLSs. Combined with our previous survey, we have a total of 44 pLLSs and 11 LLSs. We find that the metallicity distribution of the pLLSs is bimodal at z ≲ 1, with a minimum at [X/H] = −1. The low-metallicity peak comprises (57 ± 8)% of the pLLSs and is centered at [X/H] ≃ −1.87(1.3% solar metallicity), while the high-metallicity peak is centered at [X/H] ≃ −0.32 (48% solar metallicity). Although the sample of LLSs is still small, there is some evidence that the metallicity distributions of the LLSs and pLLSs are different, with a far lower fraction of very metal-poor ([X/H] < −1.4) LLSs than pLLSs. The fraction of LLSs with [X/H] < −1 is similar to that found in pLLSs (∼56%). However, higher H i column density absorbers (log > 19.0) show a much lower fraction of metal-poor gas; therefore, the metallicity distribution of gas in and around galaxies depends sensitively on NH i at z ≲ 1. We interpret the high-metallicity ([X/H] ≥ −1) pLLSs and LLSs as arising in outflows, recycling winds, and tidally stripped gas around galaxies. The low-metallicity pLLSs and LLSs imply that the CGM of z ≲ 1 galaxies hosts a substantial mass of cool, dense, low-metallicity gas that may ultimately accrete onto the galaxies.',\n", + " 'The Feedback In Realistic Environments (FIRE) project explores feedback in cosmological galaxy formation simulations. Previous FIRE simulations used an identical source code (“FIRE-1”) for consistency. Motivated by the development of more accurate numerics – including hydrodynamic solvers, gravitational softening, and supernova coupling algorithms – and exploration of new physics (e.g. magnetic fields), we introduce “FIRE-2”, an updated numerical implementation of FIRE physics for the GIZMO code. We run a suite of simulations and compare against FIRE-1: overall, FIRE-2 improvements do not qualitatively change galaxy-scale properties. We pursue an extensive study of numerics versus physics. Details of the star-formation algorithm, cooling physics, and chemistry have weak effects, provided that we include metal-line cooling and star formation occurs at higher-than-mean densities. We present new resolution criteria for high-resolution galaxy simulations. Most galaxy-scale properties are robust to numerics we test, provided: (1) Toomre masses are resolved; (2) feedback coupling ensures conservation, and (3) individual supernovae are time-resolved. Stellar masses and profiles are most robust to resolution, followed by metal abundances and morphologies, followed by properties of winds and circum-galactic media (CGM). Central (∼kpc) mass concentrations in massive (>L*) galaxies are sensitive to numerics (via trapping/recycling of winds in hot halos). Multiple feedback mechanisms play key roles: supernovae regulate stellar masses/winds; stellar mass-loss fuels late star formation; radiative feedback suppresses accretion onto dwarfs and instantaneous star formation in disks. We provide all initial conditions and numerical algorithms used.',\n", + " '\\n We present a comparison of the physical properties of the ionized gas in the circumgalactic medium and intergalactic medium (IGM) at z ∼ 0 between observations and four cosmological hydrodynamical simulations: Illustris, TNG300 of the IllustrisTNG project, EAGLE, and one of the Magneticum simulations. For the observational data, we use the gas properties that are inferred from cross-correlating the Sunyaev–Zel’dovich effect (SZE) from the Planck CMB maps with haloes and large-scale structure. Both the observational and simulation results indicate that the integrated gas pressure in haloes deviates from the self-similar case, showing that feedback impacts haloes with $M_{500}\\\\sim 10^{12\\\\!-\\\\!13}\\\\, {\\\\rm M_\\\\odot }$. The simulations predict that more than half the baryons are displaced from haloes, while the gas fraction inferred from our observational data roughly equals the cosmic baryon fraction throughout the $M_{500}\\\\sim 10^{12\\\\!-\\\\!14.5}\\\\, {\\\\rm M_\\\\odot }$ halo mass range. All simulations tested here predict that the mean gas temperature in haloes is about the virial temperature, while that inferred from the SZE is up to one order of magnitude lower than that from the simulations (and also from X-ray observations). While a remarkable agreement is found for the average properties of the IGM between the observation and some simulations, we show that their dependence on the large-scale tidal field can break the degeneracy between models that show similar predictions otherwise. Finally, we show that the gas pressure and the electron density profiles from simulations are not well described by a generalized NFW profile. Instead, we present a new model with a mass-dependent shape that fits the profiles accurately.',\n", + " 'Theoretical models of galaxy formation predict that galaxies acquire most of their baryons via cold mode accretion. Observations of high-redshift galaxies, while showing ubiquitous outflows, have so far not revealed convincing traces of the predicted cold streams, which has been interpreted as a challenge for the current models. Using high-resolution, zoom-in smooth particle hydrodynamics simulations of Lyman break galaxy (LBG) haloes combined with ionizing radiative transfer, we quantify the covering factor of the cold streams at z= 2–4. We focus specifically on Lyman limit systems (LLSs) and damped Lyα absorbers (DLAs), which can be probed by absorption spectroscopy using a background galaxy or quasar sightline, and which are closely related to low-ionization metal absorbers. We show that the covering factor of these systems is relatively small and decreases with time. At z= 2, the covering factor of DLAs within the virial radius of the simulated galaxies is ∼3\\xa0per\\xa0cent (∼1\\xa0per\\xa0cent within twice this projected distance), and arises principally from the galaxy itself. The corresponding values for LLSs are ∼10 and 4 per cent. Because of their small covering factor compared to the order unity covering fraction expected for galactic winds, the cold streams are naturally dominated by outflows in stacked spectra. We conclude that the existing observations are consistent with the predictions of cold mode accretion, and outline promising kinematic and chemical diagnostics to separate out the signatures of galactic accretion and feedback.',\n", + " 'We study the hot and cold circum-galactic medium (CGM) of 86 galaxies of the cosmological, hydrodynamical simulation suite, Numerical Investigation of a Hundred Astrophysical Objects (NIHAO). NIHAO allows a study of how the z = 0 CGM varies across five orders of magnitude of stellar mass using O VI and HI as proxies for hot and cold gas. The cool HI covering fraction and column density profiles match observations well, particularly in the inner CGM. O VI shows increasing column densities with mass, a trend seemingly echoed in the observations. As in multiple previous simulations, the O VI column densities in simulations are lower than observed and optically thick HI does not extend as far out as in observations. We take a look at the collisional ionization fraction of O VI as a function of halo mass. We make observable predictions of the bipolarity of outflows and their effect on the general shape of the CGM. Bipolar outflows can be seen out to around 40 kpc in intermediate-and low-mass haloes (M-Halo',\n", + " 'The cosmic star formation rate is observed to drop sharply after redshift z=2. We use a large, cosmological, smoothed particle hydrodynamics simulation to investigate how this decline is related to the evolution of gas accretion and to outflows driven by active galactic nuclei (AGN). We find that the drop in the star formation rate follows a corresponding decline in the global cold-mode accretion rate density onto haloes, but with a delay of order the gas consumption time scale in the interstellar medium. Here we define cold-mode (hot-mode) accretion as gas that is accreted and whose temperature has never exceeded (did exceed) 10^5.5 K. In contrast to cold-mode accretion, which peaks at z~3, the hot mode continues to increase to z~1 and remains roughly constant thereafter. By the present time, the hot mode strongly dominates the global accretion rate onto haloes. Star formation does not track hot-mode halo accretion because most of the hot halo gas never accretes onto galaxies. AGN feedback plays a crucial role by preferentially preventing gas that entered haloes in the hot mode from accreting onto their central galaxies. Consequently, in the absence of AGN feedback, gas accreted in the hot mode would become the dominant source of fuel for star formation and the drop off in the cosmic star formation rate would be much less steep.',\n", + " 'We simulate the formation of a moderately rich cluster in a cold dark matter universe using a technique which allows us to follow both the dark matter and the baryonic gas. Gas is able to shock-heat, and to cool by Compton scattering and collisional radiation. Our initial condition is taken from a large dissipationless simulation carried out by White et al. (1987), and we are able to follow the evolution of the cluster in its proper cosmological context by using a multimass grid technique to represent the gravitational field of external matter. We take 9% of the mass to be in the form of gas. Just over one-third of this gas cools to form dense compact lumps as the cluster evolves',\n", + " '\\n We use a particle tracking analysis to study the origins of the circumgalactic medium (CGM), separating it into (1) accretion from the intergalactic medium (IGM), (2) wind from the central galaxy, and (3) gas ejected from other galaxies. Our sample consists of 21 FIRE-2 simulations, spanning the halo mass range Mh ∼ 1010–1012 M⊙, and we focus on z\\xa0= 0.25 and z\\xa0= 2. Owing to strong stellar feedback, only ∼L⋆ haloes retain a baryon mass $\\\\gtrsim\\\\! 50\\\\hbox{ per cent}$ of their cosmic budget. Metals are more efficiently retained by haloes, with a retention fraction $\\\\gtrsim\\\\! 50\\\\hbox{ per cent}$. Across all masses and redshifts analysed $\\\\gtrsim \\\\!60\\\\hbox{ per cent}$ of the CGM mass originates as IGM accretion (some of which is associated with infalling haloes). Overall, the second most important contribution is wind from the central galaxy, though gas ejected or stripped from satellites can contribute a comparable mass in ∼L⋆ haloes. Gas can persist in the CGM for billions of years, resulting in well mixed-halo gas. Sightlines through the CGM are therefore likely to intersect gas of multiple origins. For low-redshift ∼L⋆ haloes, cool gas (T < 104.7 K) is distributed on average preferentially along the galaxy plane, however with strong halo-to-halo variability. The metallicity of IGM accretion is systematically lower than the metallicity of winds (typically by ≳1 dex), although CGM and IGM metallicities depend significantly on the treatment of subgrid metal diffusion. Our results highlight the multiple physical mechanisms that contribute to the CGM and will inform observational efforts to develop a cohesive picture.']" ] }, - "execution_count": 6, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "# Take a look at 10 publications\n", + "[atl[id].abstract for id in atl.ids[-10:]]" + ] } ], "metadata": {