From d76ea3dc443408be22c97114a98eb16122c7401e Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 23 Jul 2024 20:23:17 +0000 Subject: [PATCH 01/11] First pass at having references in separate directory --- astrodbkit2/astrodb.py | 17 +++++++++++++---- astrodbkit2/tests/test_astrodb.py | 11 ++++++----- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index c705958..0a4f438 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -736,8 +736,9 @@ def save_json(self, name, directory): with open(os.path.join(directory, filename), "w", encoding="utf-8") as f: f.write(json.dumps(data, indent=4, default=json_serializer)) - def save_reference_table(self, table, directory): + def save_reference_table(self, table: str, directory: str, reference_directory: str="reference"): """ + Save the reference table to disk Parameters ---------- @@ -745,16 +746,22 @@ def save_reference_table(self, table, directory): Name of reference table to output directory : str Name of directory in which to save the output JSON + reference_directory : str + Name of sub-directory to use for reference JSON files (eg, data/reference) """ + # Create directory if not already present + if not os.path.isdir(os.path.join(directory, reference_directory)): + os.makedirs(os.path.join(directory, reference_directory)) + results = self.session.query(self.metadata.tables[table]).all() data = [row._asdict() for row in results] filename = table + ".json" if len(data) > 0: - with open(os.path.join(directory, filename), "w", encoding="utf-8") as f: + with open(os.path.join(directory, reference_directory, filename), "w", encoding="utf-8") as f: f.write(json.dumps(data, indent=4, default=json_serializer)) - def save_database(self, directory, clear_first=True): + def save_database(self, directory: str, clear_first: bool=True, reference_directory: str="reference"): """ Output contents of the database into the specified directory as JSON files. Source objects have individual JSON files with all data for that object. @@ -766,6 +773,8 @@ def save_database(self, directory, clear_first=True): Name of directory in which to save the output JSON clear_first : bool First clear the directory of all existing JSON (useful to capture DB deletions). Default: True + reference_directory : str + Name of sub-directory to use for reference JSON files (eg, data/reference) """ # Clear existing files first from that directory @@ -780,7 +789,7 @@ def save_database(self, directory, clear_first=True): if table not in self.metadata.tables.keys(): continue - self.save_reference_table(table, directory) + self.save_reference_table(table, directory, reference_directory=reference_directory) # Output primary objects for row in tqdm(self.query(self.metadata.tables[self._primary_table])): diff --git a/astrodbkit2/tests/test_astrodb.py b/astrodbkit2/tests/test_astrodb.py index 40cf76a..7d8eeea 100644 --- a/astrodbkit2/tests/test_astrodb.py +++ b/astrodbkit2/tests/test_astrodb.py @@ -413,11 +413,12 @@ def test_views(db): def test_save_reference_table(db, db_dir): # Test saving a reference table - if os.path.exists(os.path.join(db_dir, 'Publications.json')): - os.remove(os.path.join(db_dir, 'Publications.json')) - db.save_reference_table('Publications', db_dir) - assert os.path.exists(os.path.join(db_dir, 'Publications.json')) - os.remove(os.path.join(db_dir, 'Publications.json')) # explicitly removing so that the next step will get verified + ref_dir = "reference" + if os.path.exists(os.path.join(db_dir, ref_dir, 'Publications.json')): + os.remove(os.path.join(db_dir, ref_dir, 'Publications.json')) + db.save_reference_table('Publications', db_dir, reference_directory=ref_dir) + assert os.path.exists(os.path.join(db_dir, ref_dir, 'Publications.json')) + os.remove(os.path.join(db_dir, ref_dir, 'Publications.json')) # explicitly removing so that the next step will get verified def test_save_database(db, db_dir): From 1ef0b63aa93b1e5c26d6d97250700f4de30e80f1 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 23 Jul 2024 20:32:16 +0000 Subject: [PATCH 02/11] Iterating on reference directory use --- astrodbkit2/astrodb.py | 10 ++++++++-- astrodbkit2/tests/test_astrodb.py | 15 +++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index 0a4f438..6e64396 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -901,7 +901,7 @@ def load_json(self, filename): temp_dict[self._foreign_key] = source conn.execute(self.metadata.tables[key].insert().values(temp_dict)) - def load_database(self, directory, verbose=False): + def load_database(self, directory: str, verbose: bool=False, reference_directory: str="reference"): """ Reload entire database from a directory of JSON files. Note that this will first clear existing tables. @@ -912,6 +912,8 @@ def load_database(self, directory, verbose=False): Name of directory containing the JSON files verbose : bool Flag to enable diagnostic messages + reference_directory : str + Name of sub-directory to use for reference JSON files (eg, data/reference) """ # Clear existing database contents @@ -926,7 +928,11 @@ def load_database(self, directory, verbose=False): for table in self._reference_tables: if verbose: print(f"Loading {table} table") - self.load_table(table, directory, verbose=verbose) + # Check if the reference table is in the sub-directory + if os.path.exists(os.path.join(directory, reference_directory, table+".json")): + self.load_table(table, os.path.join(directory, reference_directory), verbose=verbose) + else: + self.load_table(table, directory, verbose=verbose) # Load object data if verbose: diff --git a/astrodbkit2/tests/test_astrodb.py b/astrodbkit2/tests/test_astrodb.py index 7d8eeea..0345b5e 100644 --- a/astrodbkit2/tests/test_astrodb.py +++ b/astrodbkit2/tests/test_astrodb.py @@ -3,6 +3,7 @@ import io import json import os +import shutil import pandas as pd import pytest @@ -425,15 +426,13 @@ def test_save_database(db, db_dir): # Test saving the database to JSON files # Clear temporary directory first - # if not os.path.exists(DB_DIR): - # os.mkdir(DB_DIR) - for file in os.listdir(db_dir): - os.remove(os.path.join(db_dir, file)) + shutil.rmtree(db_dir) # also removes folder, so have to recreate + os.mkdir(db_dir) db.save_database(db_dir) # Check JSON data - assert os.path.exists(os.path.join(db_dir, 'Publications.json')) + assert os.path.exists(os.path.join(db_dir, "reference", 'Publications.json')) assert os.path.exists(os.path.join(db_dir, '2mass_j13571237+1428398.json')) assert not os.path.exists(os.path.join(db_dir, '2mass_j13571237+1428398 2.json')) @@ -458,7 +457,7 @@ def test_load_database(db, db_dir): # Reload the database and check DB contents assert os.path.exists(db_dir) - assert os.path.exists(os.path.join(db_dir, 'Publications.json')) + assert os.path.exists(os.path.join(db_dir, "reference", 'Publications.json')) db.load_database(db_dir, verbose=True) assert db.query(db.Publications).count() == 2 assert db.query(db.Photometry).count() == 3 @@ -466,8 +465,8 @@ def test_load_database(db, db_dir): assert db.query(db.Sources.c.source).limit(1).all()[0][0] == '2MASS J13571237+1428398' # Clear temporary directory and files - for file in os.listdir(db_dir): - os.remove(os.path.join(db_dir, file)) + shutil.rmtree(db_dir) + os.mkdir(db_dir) def test_copy_database_schema(): From 23e3199f29df3d9194861f2fcd392f034a221d5c Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 23 Jul 2024 20:37:55 +0000 Subject: [PATCH 03/11] Using shutil to fully remove data directory and any sub-directories --- astrodbkit2/astrodb.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index 6e64396..9613344 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -5,6 +5,7 @@ import json import os import sqlite3 +import shutil import numpy as np import pandas as pd @@ -780,8 +781,8 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct # Clear existing files first from that directory if clear_first: print("Clearing existing JSON files...") - for filename in os.listdir(directory): - os.remove(os.path.join(directory, filename)) + shutil.rmtree(directory) + os.mkdir(directory) # Output reference tables for table in self._reference_tables: From 2d05ef1565680832fe6fc7d9ff1c68ded76de5bd Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 23 Jul 2024 20:42:48 +0000 Subject: [PATCH 04/11] Safer use of shutil for reference directory --- astrodbkit2/astrodb.py | 9 +++++++-- astrodbkit2/tests/test_astrodb.py | 16 ++++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index 9613344..1d9257f 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -781,8 +781,13 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct # Clear existing files first from that directory if clear_first: print("Clearing existing JSON files...") - shutil.rmtree(directory) - os.mkdir(directory) + for file in os.listdir(directory): + file_path = os.path.join(directory, file) + if os.path.isfile(file_path): + os.remove(file_path) + elif os.path.isdir(file_path): + # This is to handle the reference directory + shutil.rmtree(file_path) # Output reference tables for table in self._reference_tables: diff --git a/astrodbkit2/tests/test_astrodb.py b/astrodbkit2/tests/test_astrodb.py index 0345b5e..ad64ff8 100644 --- a/astrodbkit2/tests/test_astrodb.py +++ b/astrodbkit2/tests/test_astrodb.py @@ -426,8 +426,12 @@ def test_save_database(db, db_dir): # Test saving the database to JSON files # Clear temporary directory first - shutil.rmtree(db_dir) # also removes folder, so have to recreate - os.mkdir(db_dir) + for file in os.listdir(db_dir): + file_path = os.path.join(db_dir, file) + if os.path.isfile(file_path): + os.remove(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) db.save_database(db_dir) @@ -465,8 +469,12 @@ def test_load_database(db, db_dir): assert db.query(db.Sources.c.source).limit(1).all()[0][0] == '2MASS J13571237+1428398' # Clear temporary directory and files - shutil.rmtree(db_dir) - os.mkdir(db_dir) + for file in os.listdir(db_dir): + file_path = os.path.join(db_dir, file) + if os.path.isfile(file_path): + os.remove(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) def test_copy_database_schema(): From 3379822e33d1c3d6e6cd5c59ecde883b51823b2d Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 23 Jul 2024 23:51:32 +0000 Subject: [PATCH 05/11] Minor updates --- astrodbkit2/astrodb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index 1d9257f..706aa23 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -790,6 +790,7 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct shutil.rmtree(file_path) # Output reference tables + print("Storing reference tables...") for table in self._reference_tables: # Skip reference tables that are not actually in the database if table not in self.metadata.tables.keys(): @@ -798,6 +799,7 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct self.save_reference_table(table, directory, reference_directory=reference_directory) # Output primary objects + print("Storing individual sources...") for row in tqdm(self.query(self.metadata.tables[self._primary_table])): self.save_json(row, directory) From 2705712b743017d223417e246080eea4753e52e9 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 24 Jul 2024 09:35:02 -0400 Subject: [PATCH 06/11] Updating documentation --- docs/index.rst | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 81037a3..7a46be0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -83,15 +83,18 @@ Loading the Database -------------------- **Astrodbkit2** contains methods to output the full contents of the database as a list of JSON files. -It can likewise read in a directory of these files to populate the database. -This is how SIMPLE is currently version controlled. To load a database of this form, do the following:: +It can likewise read in a directory of these files to populate the database. +By default, reference tables (eg, Publications, Telescopes, etc) are stored in a `reference` sub-directory. +This is how SIMPLE is currently version controlled. + +To load a database of this form, do the following:: from astrodbkit2.astrodb import Database connection_string = 'sqlite:///SIMPLE.db' # SQLite connection string db_dir = 'data' # directory where JSON files are located db = Database(connection_string) - db.load_database(db_dir) + db.load_database(directory=db_dir, reference_directory="reference") .. note:: Database contents are cleared when loading from JSON files to ensure that the database only contains sources from on-disk files. We describe later how to use the :py:meth:`~astrodbkit2.astrodb.Database.save_db` method @@ -406,8 +409,11 @@ Saving the Database =================== If users perform changes to a database, they will want to output this to disk to be version controlled. -**Astrodbkit2** provides methods to save an individual source or reference table as well as the entire data. -We recommend the later to output the entire contents to disk:: +**Astrodbkit2** provides methods to save an individual source or reference table as well as the entire data. +By default, reference tables are stored in a sub-directory called "reference"; this can be overwritten by +supplying a `reference_directory` variable into `save_database` or `save_reference_table`. + +We recommend using `save_database` as that outputs the entire database contents to disk:: # Save single object db.save_json('2MASS J13571237+1428398', 'data') @@ -416,7 +422,7 @@ We recommend the later to output the entire contents to disk:: db.save_reference_table('Publications', 'data') # Save entire database to directory 'data' - db.save_database('data') + db.save_database(directory='data') .. note:: To properly capture database deletes, the contents of the specified directory is first cleared before creating JSON files representing the current state of the database. From f5fe297fd27de215be53a84f907f8490e2de3126 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Fri, 2 Aug 2024 11:58:11 -0400 Subject: [PATCH 07/11] Saving source JSON files to source sub-directory --- astrodbkit2/astrodb.py | 34 +++++++++++++++++++++++-------- astrodbkit2/tests/test_astrodb.py | 5 +++-- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index 706aa23..e9bf728 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -762,7 +762,7 @@ def save_reference_table(self, table: str, directory: str, reference_directory: with open(os.path.join(directory, reference_directory, filename), "w", encoding="utf-8") as f: f.write(json.dumps(data, indent=4, default=json_serializer)) - def save_database(self, directory: str, clear_first: bool=True, reference_directory: str="reference"): + def save_database(self, directory: str, clear_first: bool=True, reference_directory: str="reference", source_directory: str="source"): """ Output contents of the database into the specified directory as JSON files. Source objects have individual JSON files with all data for that object. @@ -771,11 +771,13 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct Parameters ---------- directory : str - Name of directory in which to save the output JSON + Name of top-level directory in which to save the output JSON clear_first : bool First clear the directory of all existing JSON (useful to capture DB deletions). Default: True reference_directory : str Name of sub-directory to use for reference JSON files (eg, data/reference) + source_directory : str + Name of sub-directory to use for source JSON files (eg, data/source) """ # Clear existing files first from that directory @@ -786,8 +788,14 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct if os.path.isfile(file_path): os.remove(file_path) elif os.path.isdir(file_path): - # This is to handle the reference directory + # This is to handle the reference and source directories shutil.rmtree(file_path) + + # Create sub-directories if not already present + if not os.path.isdir(os.path.join(directory, reference_directory)): + os.makedirs(os.path.join(directory, reference_directory)) + if not os.path.isdir(os.path.join(directory, source_directory)): + os.makedirs(os.path.join(directory, source_directory)) # Output reference tables print("Storing reference tables...") @@ -801,7 +809,7 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct # Output primary objects print("Storing individual sources...") for row in tqdm(self.query(self.metadata.tables[self._primary_table])): - self.save_json(row, directory) + self.save_json(row, os.path.join(directory, source_directory)) # Object input methods def add_table_data(self, data, table, fmt="csv"): @@ -909,7 +917,7 @@ def load_json(self, filename): temp_dict[self._foreign_key] = source conn.execute(self.metadata.tables[key].insert().values(temp_dict)) - def load_database(self, directory: str, verbose: bool=False, reference_directory: str="reference"): + def load_database(self, directory: str, verbose: bool=False, reference_directory: str="reference", source_directory: str="source"): """ Reload entire database from a directory of JSON files. Note that this will first clear existing tables. @@ -917,11 +925,13 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory Parameters ---------- directory : str - Name of directory containing the JSON files + Name of top-level directory containing the JSON files verbose : bool Flag to enable diagnostic messages reference_directory : str Name of sub-directory to use for reference JSON files (eg, data/reference) + source_directory : str + Name of sub-directory to use for source JSON files (eg, data/source) """ # Clear existing database contents @@ -945,7 +955,15 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory # Load object data if verbose: print("Loading object tables") - for file in tqdm(os.listdir(directory)): + + # Check if the sources are in the sub-directory + if os.path.exists(os.path.join(directory, source_directory)): + directory_of_sources = os.path.join(directory, source_directory) + else: + directory_of_sources = directory + + # Scan selected directory for JSON source files + for file in tqdm(os.listdir(directory_of_sources)): # Skip reference tables core_name = file.replace(".json", "") if core_name in self._reference_tables: @@ -955,7 +973,7 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory if not file.endswith(".json") or file.startswith("."): continue - self.load_json(os.path.join(directory, file)) + self.load_json(os.path.join(directory_of_sources, file)) def dump_sqlite(self, database_name): """Output database as a sqlite file""" diff --git a/astrodbkit2/tests/test_astrodb.py b/astrodbkit2/tests/test_astrodb.py index ad64ff8..bfa75ca 100644 --- a/astrodbkit2/tests/test_astrodb.py +++ b/astrodbkit2/tests/test_astrodb.py @@ -437,11 +437,12 @@ def test_save_database(db, db_dir): # Check JSON data assert os.path.exists(os.path.join(db_dir, "reference", 'Publications.json')) - assert os.path.exists(os.path.join(db_dir, '2mass_j13571237+1428398.json')) + assert os.path.exists(os.path.join(db_dir, "source", '2mass_j13571237+1428398.json')) assert not os.path.exists(os.path.join(db_dir, '2mass_j13571237+1428398 2.json')) + assert not os.path.exists(os.path.join(db_dir, "source", '2mass_j13571237+1428398 2.json')) # Load source and confirm it is the same - with open(os.path.join(db_dir, '2mass_j13571237+1428398.json'), 'r') as f: + with open(os.path.join(db_dir, "source", '2mass_j13571237+1428398.json'), 'r') as f: data = json.load(f) assert data == db.inventory('2MASS J13571237+1428398') From 1b8c96271b20f5e82dd457d339aecd15cff89cb1 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Fri, 2 Aug 2024 13:02:39 -0400 Subject: [PATCH 08/11] Updating documentation --- docs/index.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 7a46be0..239673c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -411,7 +411,9 @@ Saving the Database If users perform changes to a database, they will want to output this to disk to be version controlled. **Astrodbkit2** provides methods to save an individual source or reference table as well as the entire data. By default, reference tables are stored in a sub-directory called "reference"; this can be overwritten by -supplying a `reference_directory` variable into `save_database` or `save_reference_table`. +supplying a `reference_directory` variable into `save_database` or `save_reference_table`. +Similarly, source/object tables are stored in a sub-directory called "source" which can be overwritten by supplying +`source_directory`. We recommend using `save_database` as that outputs the entire database contents to disk:: @@ -422,7 +424,7 @@ We recommend using `save_database` as that outputs the entire database contents db.save_reference_table('Publications', 'data') # Save entire database to directory 'data' - db.save_database(directory='data') + db.save_database(directory='data', reference_directory='reference', source_directory='source') .. note:: To properly capture database deletes, the contents of the specified directory is first cleared before creating JSON files representing the current state of the database. From ad235d9f6635a22bb11089b94a800847e6bbc3c2 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Fri, 2 Aug 2024 13:04:01 -0400 Subject: [PATCH 09/11] Further updates --- docs/index.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 239673c..1cd0f44 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -84,7 +84,8 @@ Loading the Database **Astrodbkit2** contains methods to output the full contents of the database as a list of JSON files. It can likewise read in a directory of these files to populate the database. -By default, reference tables (eg, Publications, Telescopes, etc) are stored in a `reference` sub-directory. +By default, reference tables (eg, Publications, Telescopes, etc) are stored in a `reference` sub-directory +and the source tables are in a `source` sub-directory. This is how SIMPLE is currently version controlled. To load a database of this form, do the following:: From fac649ec8d86b8d99d0cf48fa8483d6f4a500f8d Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Fri, 2 Aug 2024 14:05:57 -0400 Subject: [PATCH 10/11] Apply suggestions from code review Co-authored-by: Kelle Cruz --- astrodbkit2/astrodb.py | 4 ++-- docs/index.rst | 12 +++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index e9bf728..df2ece6 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -929,9 +929,9 @@ def load_database(self, directory: str, verbose: bool=False, reference_directory verbose : bool Flag to enable diagnostic messages reference_directory : str - Name of sub-directory to use for reference JSON files (eg, data/reference) + Relative path to sub-directory to use for reference JSON files (eg, data/reference) source_directory : str - Name of sub-directory to use for source JSON files (eg, data/source) + Relative path to sub-directory to use for source JSON files (eg, data/source) """ # Clear existing database contents diff --git a/docs/index.rst b/docs/index.rst index 1cd0f44..f4981a4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -84,8 +84,7 @@ Loading the Database **Astrodbkit2** contains methods to output the full contents of the database as a list of JSON files. It can likewise read in a directory of these files to populate the database. -By default, reference tables (eg, Publications, Telescopes, etc) are stored in a `reference` sub-directory -and the source tables are in a `source` sub-directory. +By default, reference tables (eg, Publications, Telescopes, etc) and source tables are respectively stored in `reference/` and `source/` sub-directories of `data/`. This is how SIMPLE is currently version controlled. To load a database of this form, do the following:: @@ -410,11 +409,10 @@ Saving the Database =================== If users perform changes to a database, they will want to output this to disk to be version controlled. -**Astrodbkit2** provides methods to save an individual source or reference table as well as the entire data. -By default, reference tables are stored in a sub-directory called "reference"; this can be overwritten by +**Astrodbkit2** provides methods to save an individual source or reference table as well as all of the data stored in the database. +By default, reference tables are stored in a sub-directory of `data/` called "reference"; this can be overwritten by supplying a `reference_directory` variable into `save_database` or `save_reference_table`. -Similarly, source/object tables are stored in a sub-directory called "source" which can be overwritten by supplying -`source_directory`. +Similarly, source/object tables are stored in a sub-directory of `data/` called "source" which can be overwritten by supplying a `source_directory` variable. We recommend using `save_database` as that outputs the entire database contents to disk:: @@ -424,7 +422,7 @@ We recommend using `save_database` as that outputs the entire database contents # Save single reference table db.save_reference_table('Publications', 'data') - # Save entire database to directory 'data' + # Save entire database to directory 'data/' with 'reference/' and 'source/' subdirectories. db.save_database(directory='data', reference_directory='reference', source_directory='source') .. note:: To properly capture database deletes, the contents of the specified directory is first cleared before From 22797c9a43c1ee6b0bb12a93518e3763aeb6ccfa Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Fri, 2 Aug 2024 14:09:29 -0400 Subject: [PATCH 11/11] Print out path when saving source and reference tables --- astrodbkit2/astrodb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrodbkit2/astrodb.py b/astrodbkit2/astrodb.py index df2ece6..ff22024 100644 --- a/astrodbkit2/astrodb.py +++ b/astrodbkit2/astrodb.py @@ -798,7 +798,7 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct os.makedirs(os.path.join(directory, source_directory)) # Output reference tables - print("Storing reference tables...") + print(f"Storing reference tables to {os.path.join(directory, reference_directory)}...") for table in self._reference_tables: # Skip reference tables that are not actually in the database if table not in self.metadata.tables.keys(): @@ -807,7 +807,7 @@ def save_database(self, directory: str, clear_first: bool=True, reference_direct self.save_reference_table(table, directory, reference_directory=reference_directory) # Output primary objects - print("Storing individual sources...") + print(f"Storing individual sources to {os.path.join(directory, source_directory)}...") for row in tqdm(self.query(self.metadata.tables[self._primary_table])): self.save_json(row, os.path.join(directory, source_directory))