From bef48f0ced2a3a9f99879ee32a347ce5bb34b781 Mon Sep 17 00:00:00 2001 From: Kelle Cruz Date: Tue, 5 Mar 2024 13:29:52 -0500 Subject: [PATCH] Modify the schema with a new Regimes table (#471) * add Regimes to Reference Tables * made Regimes table by hand * Spectra table now tied to Regimes * SpectralTypes table: regime now foreign key * Gravities: regime now foreign key to Regimes table * docs updates --------- Co-authored-by: Will Cooper --- data/Regimes.json | 18 + data/Versions.json | 8 +- documentation/Gravities.md | 15 +- documentation/Spectra.md | 26 +- documentation/SpectralTypes.md | 21 +- scripts/__init__.py | 1 - scripts/updates/{2023 => }/update_regimes.py | 19 + simple/schema.py | 368 +++++++++++++------ simple/utils/spectra.py | 294 ++++++++++----- tests/__init__.py | 3 - tests/conftest.py | 103 ++++++ tests/test_data.py | 41 +-- tests/test_integrity.py | 57 +-- tests/test_photometry_utils.py | 65 +--- tests/test_spectra_utils.py | 152 ++++---- tests/test_utils.py | 121 ++---- 16 files changed, 749 insertions(+), 563 deletions(-) create mode 100644 data/Regimes.json delete mode 100644 scripts/__init__.py rename scripts/updates/{2023 => }/update_regimes.py (91%) delete mode 100644 tests/__init__.py create mode 100644 tests/conftest.py diff --git a/data/Regimes.json b/data/Regimes.json new file mode 100644 index 000000000..19e04175b --- /dev/null +++ b/data/Regimes.json @@ -0,0 +1,18 @@ +[ + { + "regime": "nir", + "description": "Near-infrared 1-5 microns" + }, + { + "regime": "optical", + "description": "Optical 3000-10000 Angstroms" + }, + { + "regime": "mir", + "description": "Mid-infrared 5-30 microns" + }, + { + "regime": "unknown", + "description": "Used in Spectral Types table. Delete in #309" + } +] \ No newline at end of file diff --git a/data/Versions.json b/data/Versions.json index 6efdf899f..fe3845e97 100644 --- a/data/Versions.json +++ b/data/Versions.json @@ -66,8 +66,14 @@ "description": "Added JWST spectrum for WISE 1935-1546" }, { - "version": "latest", + "version": "2024.2", "start_date": "2024-02-15", + "end_date": "2024-03-05", + "description": "Regime cleanup" + }, + { + "version": "latest", + "start_date": "2024-03-05", "end_date": null, "description": "Version in development" } diff --git a/documentation/Gravities.md b/documentation/Gravities.md index bbe9e3e9f..758c3cf2d 100644 --- a/documentation/Gravities.md +++ b/documentation/Gravities.md @@ -2,16 +2,16 @@ The Gravities table contains gravity measurements for sources listed in the Sources table. The combination of *source*, *regime*, and *reference* is expected to be unique. -Note that *gravity* and *regime* are strings constrained from a list of enumerated values. +Note that *gravity* is a string constrained from a list of enumerated values (see below table). Columns marked with an asterisk (*) may not be empty. | Column Name | Description | Unit | Data Type | Key Type | |---|---|---|---|---| | *source | Unique identifier for the source | | String(100) | primary and foreign: Sources.source | | gravity | Gravity value | | Enumeration | | -| *regime | Regime for gravity value | | Enumeration | primary | +| *regime | Regime for gravity value | | | primary and foreign: Regimes.regime | | comments | Free form comments | | String(1000) | | -| *reference | Reference | | String(30) | primary and foreign: Publications.name | +| *reference | Reference | | String(30) | primary and foreign: Publications.reference | Enumeraions for gravity include: - alpha @@ -23,12 +23,3 @@ Enumeraions for gravity include: - vl-g - int-g - fld-g - -Enumerations for regime include: - - gammaray - - xray - - ultraviolet - - optical - - infrared - - millimeter - - radio diff --git a/documentation/Spectra.md b/documentation/Spectra.md index e8977678b..cdf002921 100644 --- a/documentation/Spectra.md +++ b/documentation/Spectra.md @@ -11,15 +11,17 @@ Columns marked with an asterisk (*) may not be empty. | *spectrum | URL of spectrum location | | String(1000) | primary | | original_spectrum | URL of original spectrum location, if applicable | | String(1000) | | | local_spectrum | Local path of spectrum | | String(1000) | | -| *regime | Regime of the spectrum, eg Optical, Infrared, etc | | Enumeration | primary | -| *telescope | Name of telescope | | String(30) | foreign: Telescopes.name | -| *instrument | Name of instrument | | String(30) | foreign: Instruments.name | -| *mode | Mode of spectrum | | String(30) | foreign: Modes.name | +| *regime | Regime of the spectrum, eg Optical, Infrared, etc | | | foreign: Regimes.regime | +| *telescope | Name of telescope | | String(30) | foreign: Telescopes.telescope | +| *instrument | Name of instrument | | String(30) | foreign: Instruments.instrument | +| *mode | Mode of spectrum | | String(30) | foreign: Instruments.mode | | *observation_date | Observation date | | DateTime | primary | | comments | Free form comments | | String(1000) | | -| *reference | Primary Reference | | String(30) | primary and foreign: Publications.name | +| *reference | Primary Reference | | String(30) | primary and foreign: Publications.reference | | other_references | Other References | | String(100) | | +Relevant functions: `spectra.ingest_spectrum`, `spectra.spectrum_plottable`, `spectra.find_spectra` + If the spectrum provided has been modified from the author-provided one, a link to the original spectrum can be provided in the `original_spectrum` column. @@ -27,20 +29,6 @@ The local_spectrum is meant to store the path to a local copy of the spectrum wi environment variable to define part of the path (so it can be shared among other users). For example: `$ASTRODB_SPECTRA/infrared/filename.fits` -Enumerations for regime should be [UCDs](https://www.ivoa.net/documents/UCD1+/20210616/EN-UCDlist-1.4-20210616.html#tth_sEc2). -They currently include: - - em.UV - - em.opt - - optical (*deprecated, do not use*) - - em.IR.NIR - - nir (*deprecated, do not use*) - - em.IR - - em.IR.MIR - - mir (*deprecated, do not use*) - - em.mm - - em.radio - - unknown - # Notes - An accurate observation date is required for a spectrum to be ingested. - Data based on data from multiple observation dates has 'Multiple observation dates' diff --git a/documentation/SpectralTypes.md b/documentation/SpectralTypes.md index 343d6764b..19d392d71 100644 --- a/documentation/SpectralTypes.md +++ b/documentation/SpectralTypes.md @@ -1,17 +1,16 @@ # SpectralTypes The SpectralTypes table contains spectral type measurements for sources listed in the Sources table. -The combination of *source*, *regime*, and *reference* is expected to be unique. -Note that *regime* is a string constrained from a list of enumerated values. +The combination of *source*, *spectral_type_code*, *regime*, and *reference* is expected to be unique. Columns marked with an asterisk (*) may not be empty. | Column Name | Description | Unit | Data Type | Key Type | |---|---|---|---|---| | *source | Unique identifier for the source | | String(100) | primary and foreign: Sources.source | | spectral_type_string | Spectral type string | | String(10) | | -| spectral_type_code | Numeric code corresponding to spectral type | | Float | | +| *spectral_type_code | Numeric code corresponding to spectral type | | Float | primary | | spectral_type_error | Uncertainty of spectral type | | Float | | -| *regime | Regime for spectral type value | | Enumeration | primary | +| regime | Regime for spectral type value | | | primary and foreign:Regimes.regime | | adopted | Flag indicating if this is the adopted measurement | | Boolean | | | comments | Free form comments | | String(1000) | | | *reference | Reference | | String(30) | primary and foreign: Publications.name | @@ -21,16 +20,4 @@ Spectral Type Codes: - 69 = M9 - 70 = L0 - 80 = T0 - - 90 = Y0 - -Enumerations for regime include: - - gammaray - - xray - - ultraviolet - - optical - - nir - - infrared - - millimeter - - radio - - unknown - \ No newline at end of file + - 90 = Y0 \ No newline at end of file diff --git a/scripts/__init__.py b/scripts/__init__.py deleted file mode 100644 index 9586369d2..000000000 --- a/scripts/__init__.py +++ /dev/null @@ -1 +0,0 @@ -REFERENCE_TABLES = ['Publications', 'Telescopes', 'Instruments', 'Modes', 'PhotometryFilters', 'Versions', 'Parameters'] diff --git a/scripts/updates/2023/update_regimes.py b/scripts/updates/update_regimes.py similarity index 91% rename from scripts/updates/2023/update_regimes.py rename to scripts/updates/update_regimes.py index c9344fb14..6dfba7d23 100644 --- a/scripts/updates/2023/update_regimes.py +++ b/scripts/updates/update_regimes.py @@ -177,6 +177,16 @@ ) conn.commit() +with db.engine.connect() as conn: + conn.execute( + db.SpectralTypes.update() + .where( + db.SpectralTypes.c.regime == "unknown", + ) + .values(regime=None) + ) + conn.commit() + # Populate the regimes table with db.engine.connect() as conn: @@ -197,6 +207,15 @@ ) conn.commit() +with db.engine.connect() as conn: + conn.execute( + db.Regimes.insert().values( + regime="unknown", description="Used in Spectral Types table. Delete in #309" + ) + ) + conn.commit() + +# Get list of regimes in the Spectral Typtes table # Save database db.save_database("data/") diff --git a/simple/schema.py b/simple/schema.py index 7309945f2..6cdf1a181 100644 --- a/simple/schema.py +++ b/simple/schema.py @@ -2,22 +2,40 @@ Schema for the SIMPLE database """ -# pylint: disable=line-too-long, missing-class-docstring, unused-import, invalid-name, singleton-comparison - import enum import sqlalchemy as sa -from sqlalchemy import Boolean, Column, Float, ForeignKey, Integer, String, \ - BigInteger, Enum, Date, DateTime, ForeignKeyConstraint +from sqlalchemy import ( + Boolean, + Column, + Float, + ForeignKey, + String, + Enum, + DateTime, + ForeignKeyConstraint, +) from astrodbkit2.astrodb import Base from astrodbkit2.views import view - # ------------------------------------------------------------------------------------------------------------------- # Reference tables +REFERENCE_TABLES = [ + "Publications", + "Telescopes", + "Instruments", + "Modes", + "PhotometryFilters", + "Versions", + "Parameters", + "Regimes", +] + + class Publications(Base): """ORM for publications table. - This stores reference information (DOI, bibcodes, etc) and has shortname as the primary key + This stores reference information (DOI, bibcodes, etc) + and has shortname as the primary key """ __tablename__ = 'Publications' reference = Column(String(30), primary_key=True, nullable=False) @@ -30,16 +48,24 @@ class Telescopes(Base): __tablename__ = 'Telescopes' telescope = Column(String(30), primary_key=True, nullable=False) description = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade')) + reference = Column( + String(30), ForeignKey("Publications.reference", onupdate="cascade") + ) class Instruments(Base): __tablename__ = 'Instruments' instrument = Column(String(30), primary_key=True, nullable=False) mode = Column(String(30), primary_key=True) - telescope = Column(String(30), ForeignKey('Telescopes.telescope', onupdate='cascade'), primary_key=True) + telescope = Column( + String(30), + ForeignKey("Telescopes.telescope", onupdate="cascade"), + primary_key=True, + ) description = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade')) + reference = Column( + String(30), ForeignKey("Publications.reference", onupdate="cascade") + ) class Parameters(Base): @@ -52,10 +78,12 @@ class Parameters(Base): class PhotometryFilters(Base): """ ORM for filter table. - This stores relationships between filters and instruments, telescopes, as well as wavelength and width + This stores relationships between filters and instruments, telescopes, + as well as wavelength and width """ __tablename__ = 'PhotometryFilters' - band = Column(String(30), primary_key=True, nullable=False) # of the form instrument.filter (see SVO) + band = Column(String(30), primary_key=True, nullable=False) + # of the form instrument.filter (see SVO) ucd = Column(String(100)) effective_wavelength = Column(Float, nullable=False) width = Column(Float) @@ -73,27 +101,20 @@ class Versions(Base): description = Column(String(1000)) -# ------------------------------------------------------------------------------------------------------------------- -# Hard-coded enumerations - -class Regime(enum.Enum): - """Enumeration for spectral type, spectra, and photometry regimes - Use UCD controlled vocabulary: https://www.ivoa.net/documents/UCD1+/20200212/PEN-UCDlist-1.4-20200212.html#tth_sEcB - The variable name is stored and used in the database; the string value should match it +class Regimes(Base): """ - ultraviolet = 'em.UV' - optical_UCD = 'em.opt' - optical = 'optical' - nir_UCD = 'em.IR.NIR' # Near-Infrared, 1-5 microns - nir = 'nir' - infrared = 'em.IR' # Infrared part of the spectrum - mir_UCD = 'em.IR.MIR' # Medium-Infrared, 5-30 microns - mir = 'mir' - millimeter = 'em.mm' - radio = 'em.radio' - unknown = 'unknown' + ORM for Regimes table + Values used by Spectra and SpectralTypes tables + """ + + __tablename__ = "Regimes" + regime = Column(String(30), primary_key=True, nullable=False) + description = Column(String(1000)) +# ------------------------------------------------------------------------------------------------------------------- +# Hard-coded enumerations + class Gravity(enum.Enum): """Enumeration for gravity""" # TODO: Fix enumerations; the variable name is what's used throughout the database @@ -111,7 +132,8 @@ class Gravity(enum.Enum): # ------------------------------------------------------------------------------------------------------------------- # Main tables class Sources(Base): - """ORM for the sources table. This stores the main identifiers for our objects along with ra and dec""" + """ORM for the sources table. This stores the main identifiers + for our objects along with ra and dec""" __tablename__ = 'Sources' source = Column(String(100), primary_key=True, nullable=False) ra = Column(Float) @@ -119,113 +141,183 @@ class Sources(Base): epoch = Column(Float) # decimal year equinox = Column(String(10)) # eg, J2000 shortname = Column(String(30)) # not needed? - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), nullable=False) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + nullable=False, + ) other_references = Column(String(100)) comments = Column(String(1000)) class Names(Base): __tablename__ = 'Names' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) other_name = Column(String(100), primary_key=True, nullable=False) class Photometry(Base): # Table to store photometry information __tablename__ = 'Photometry' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) - band = Column(String(30), ForeignKey('PhotometryFilters.band'), primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) + band = Column(String(30), ForeignKey("PhotometryFilters.band"), primary_key=True) magnitude = Column(Float, nullable=False) magnitude_error = Column(Float) telescope = Column(String(30), ForeignKey('Telescopes.telescope')) epoch = Column(Float) # decimal year comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) class Parallaxes(Base): # Table to store parallax values in milliarcseconds __tablename__ = 'Parallaxes' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) parallax = Column(Float, nullable=False) parallax_error = Column(Float) - adopted = Column(Boolean) # flag for indicating if this is the adopted measurement or not + adopted = Column(Boolean) # flag for indicating if this is the adopted comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) class ProperMotions(Base): # Table to store proper motions, in milliarcseconds per year __tablename__ = 'ProperMotions' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) mu_ra = Column(Float, nullable=False) mu_ra_error = Column(Float) mu_dec = Column(Float, nullable=False) mu_dec_error = Column(Float) - adopted = Column(Boolean) # flag for indicating if this is the adopted measurement or not + adopted = Column(Boolean) comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) class RadialVelocities(Base): # Table to store radial velocities, in km/sec __tablename__ = 'RadialVelocities' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) radial_velocity = Column(Float, nullable=False) radial_velocity_error = Column(Float) - adopted = Column(Boolean) # flag for indicating if this is the adopted measurement or not + adopted = Column(Boolean) comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) class SpectralTypes(Base): # Table to store spectral types, as strings __tablename__ = 'SpectralTypes' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) - spectral_type_string = Column(String(10), nullable=False) - spectral_type_code = Column(Float, nullable=False) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) + spectral_type_string = Column(String(10), nullable=False, primary_key=True) + spectral_type_code = Column(Float, nullable=False, primary_key=True) spectral_type_error = Column(Float) - regime = Column(Enum(Regime, create_constraint=True, native_enum=False), - primary_key=True) # restricts to a few values: Optical, Infrared - adopted = Column(Boolean) # flag for indicating if this is the adopted measurement or not + regime = Column( + String(30), + ForeignKey("Regimes.regime", ondelete="cascade", onupdate="cascade"), + nullable=True, + primary_key=True, + ) + adopted = Column(Boolean) comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) class Gravities(Base): # Table to store gravity measurements __tablename__ = 'Gravities' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) gravity = Column(Enum(Gravity, create_constraint=True, native_enum=False), nullable=False) # restricts to enumerated values - regime = Column(Enum(Regime, create_constraint=True, native_enum=False), - primary_key=True) # restricts to a few values: Optical, Infrared + regime = Column( + String(30), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + primary_key=True, + ) comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) class Spectra(Base): # Table to store references to spectra __tablename__ = 'Spectra' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) - + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) # Data spectrum = Column(String(1000), nullable=False) # URL of spectrum location - original_spectrum = Column(String(1000)) # URL of original spectrum location, if applicable - local_spectrum = Column(String(1000)) # local directory (via environment variable) of spectrum location - - # Metadata - regime = Column(Enum(Regime, create_constraint=True, values_callable=lambda x: [e.value for e in x], - native_enum=False), - primary_key=True) # eg, Optical, Infrared, etc + original_spectrum = Column( + String(1000) + ) # URL of original spectrum location, if applicable + local_spectrum = Column( + String(1000) + ) # local directory (via environment variable) of spectrum location + regime = Column( + String(30), + ForeignKey("Regimes.regime", ondelete="cascade", onupdate="cascade"), + primary_key=True, + ) telescope = Column(String(30)) instrument = Column(String(30)) mode = Column(String(30)) # eg, Prism, Echelle, etc @@ -233,81 +325,129 @@ class Spectra(Base): # Common metadata comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) other_references = Column(String(100)) # Composite Foreign key constraints for instrument and mode - __table_args__ = (ForeignKeyConstraint([telescope, instrument, mode], - [Instruments.telescope, Instruments.instrument, Instruments.mode], - onupdate="cascade"), - {}) + __table_args__ = ( + ForeignKeyConstraint( + [telescope, instrument, mode], + [Instruments.telescope, Instruments.instrument, Instruments.mode], + onupdate="cascade", + ), + {}, + ) class ModeledParameters(Base): # Table to store derived/inferred paramaters from models __tablename__ = 'ModeledParameters' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) - - parameter = Column(String(30), ForeignKey('Parameters.parameter', onupdate='cascade'), primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) + + parameter = Column( + String(30), + ForeignKey("Parameters.parameter", onupdate="cascade"), + primary_key=True, + ) value = Column(Float, nullable=False) value_error = Column(Float) unit = Column(String(20)) comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade'), primary_key=True) + reference = Column( + String(30), + ForeignKey("Publications.reference", onupdate="cascade"), + primary_key=True, + ) class CompanionRelationships(Base): # Table to store information about companions __tablename__ = 'CompanionRelationships' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), - nullable=False, primary_key=True) + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) companion_name = Column(String(100), nullable=False, primary_key=True) projected_separation_arcsec = Column(Float) projected_separation_error = Column(Float) - relationship = Column(String(100), nullable=False) # Relationship of source to companion. - # Options: Child, Sibling, Parent, Unresolved Parent + relationship = Column(String(100), nullable=False) + # Relationship of source to companion. + # Options: Child, Sibling, Parent, Unresolved Parent comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.reference', onupdate='cascade')) - other_companion_names = Column(String(10000)) # other names of the companions + reference = Column( + String(30), ForeignKey("Publications.reference", onupdate="cascade") + ) + other_companion_names = Column(String(10000)) # other names of the companions # ------------------------------------------------------------------------------------------------------------------- # Views + ParallaxView = view( "ParallaxView", Base.metadata, sa.select( - Parallaxes.source.label('source'), - Parallaxes.parallax.label('parallax'), - Parallaxes.parallax_error.label('parallax_error'), - (1000./Parallaxes.parallax).label('distance'), # distance in parsecs - Parallaxes.comments.label('comments'), - Parallaxes.reference.label('reference') - ).select_from(Parallaxes) - .where(sa.and_(Parallaxes.adopted == True, Parallaxes.parallax > 0)), + Parallaxes.source.label("source"), + Parallaxes.parallax.label("parallax"), + Parallaxes.parallax_error.label("parallax_error"), + (1000.0 / Parallaxes.parallax).label("distance"), # distance in parsecs + Parallaxes.comments.label("comments"), + Parallaxes.reference.label("reference"), + ) + .select_from(Parallaxes) + .where(sa.and_(Parallaxes.adopted is True, Parallaxes.parallax > 0)), ) PhotometryView = view( "PhotometryView", Base.metadata, sa.select( - Photometry.source.label('source'), - sa.func.avg(sa.case((Photometry.band == "2MASS.J", Photometry.magnitude))).label("2MASS.J"), - sa.func.avg(sa.case((Photometry.band == "2MASS.H", Photometry.magnitude))).label("2MASS.H"), - sa.func.avg(sa.case((Photometry.band == "2MASS.Ks", Photometry.magnitude))).label("2MASS.Ks"), - sa.func.avg(sa.case((Photometry.band == "WISE.W1", Photometry.magnitude))).label("WISE.W1"), - sa.func.avg(sa.case((Photometry.band == "WISE.W2", Photometry.magnitude))).label("WISE.W2"), - sa.func.avg(sa.case((Photometry.band == "WISE.W3", Photometry.magnitude))).label("WISE.W3"), - sa.func.avg(sa.case((Photometry.band == "WISE.W4", Photometry.magnitude))).label("WISE.W4"), - sa.func.avg(sa.case((Photometry.band == "IRAC.I1", Photometry.magnitude))).label("IRAC.I1"), - sa.func.avg(sa.case((Photometry.band == "IRAC.I2", Photometry.magnitude))).label("IRAC.I2"), - sa.func.avg(sa.case((Photometry.band == "IRAC.I3", Photometry.magnitude))).label("IRAC.I3"), - sa.func.avg(sa.case((Photometry.band == "IRAC.I4", Photometry.magnitude))).label("IRAC.I4"), - ).select_from(Photometry) - .group_by(Photometry.source) + Photometry.source.label("source"), + sa.func.avg( + sa.case((Photometry.band == "2MASS.J", Photometry.magnitude)) + ).label("2MASS.J"), + sa.func.avg( + sa.case((Photometry.band == "2MASS.H", Photometry.magnitude)) + ).label("2MASS.H"), + sa.func.avg( + sa.case((Photometry.band == "2MASS.Ks", Photometry.magnitude)) + ).label("2MASS.Ks"), + sa.func.avg( + sa.case((Photometry.band == "WISE.W1", Photometry.magnitude)) + ).label("WISE.W1"), + sa.func.avg( + sa.case((Photometry.band == "WISE.W2", Photometry.magnitude)) + ).label("WISE.W2"), + sa.func.avg( + sa.case((Photometry.band == "WISE.W3", Photometry.magnitude)) + ).label("WISE.W3"), + sa.func.avg( + sa.case((Photometry.band == "WISE.W4", Photometry.magnitude)) + ).label("WISE.W4"), + sa.func.avg( + sa.case((Photometry.band == "IRAC.I1", Photometry.magnitude)) + ).label("IRAC.I1"), + sa.func.avg( + sa.case((Photometry.band == "IRAC.I2", Photometry.magnitude)) + ).label("IRAC.I2"), + sa.func.avg( + sa.case((Photometry.band == "IRAC.I3", Photometry.magnitude)) + ).label("IRAC.I3"), + sa.func.avg( + sa.case((Photometry.band == "IRAC.I4", Photometry.magnitude)) + ).label("IRAC.I4"), + ) + .select_from(Photometry) + .group_by(Photometry.source), ) - - - - diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index 88d346695..666296098 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -4,6 +4,7 @@ import pandas as pd # used for to_datetime conversion import dateutil # used to convert obs date to datetime object import sqlalchemy.exc +import sqlite3 import numpy as np from typing import Optional import matplotlib.pyplot as plt @@ -21,10 +22,10 @@ ) __all__ = [ - "ingest_spectra", "ingest_spectrum", "ingest_spectrum_from_fits", "spectrum_plottable", + "find_spectra", ] @@ -61,7 +62,16 @@ def ingest_spectrum( Regime of spectrum (optical, infrared, radio, etc.) controlled by to-be-made Regimes table telescope: str - Telescope used to obtain spectrum + Telescope used to obtain spectrum. + Required to be in Telescopes table. + instrument: str + Instrument used to obtain spectrum. + Instrument-Mode pair needs to be in Instruments table. + mode: str + Instrument mode used to obtain spectrum. + Instrument-Mode pair needs to be in Instruments table. + obs_date: str + Observation date of spectrum. Returns ------- @@ -82,7 +92,6 @@ def ingest_spectrum( } # Check input values - if regime is None: msg = "Regime is required" logger.error(msg) @@ -91,6 +100,86 @@ def ingest_spectrum( raise AstroDBError(msg) else: return flags + else: + good_regime = db.query(db.Regimes).filter(db.Regimes.c.regime == regime).table() + if len(good_regime) == 0: + msg = f"Regime {regime} is not in Regimes table" + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags + + if telescope is None: + msg = "Telescope is required" + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags + else: + good_telescope = ( + db.query(db.Telescopes) + .filter(db.Telescopes.c.telescope == telescope) + .table() + ) + if len(good_telescope) == 0: + msg = f"Telescope {telescope} is not in Telescopes table" + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags + + if instrument is None: + msg = "Instrument is required" + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags + else: + good_instrument = ( + db.query(db.Instruments) + .filter(db.Instruments.c.instrument == instrument) + .table() + ) + if len(good_instrument) == 0: + msg = f"Instrument {instrument} is not in Instruments table" + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags + + if mode is None: + msg = "Mode is required" + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags + else: + good_mode = ( + db.query(db.Instruments) + .filter(db.Instruments.c.instrument == instrument) + .filter(db.Instruments.c.mode == mode) + .table() + ) + if len(good_mode) == 0: + msg = f"Mode {mode} is not in Instruments table for {instrument}" + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags if reference is None: msg = "Reference is required" @@ -183,21 +272,24 @@ def ingest_spectrum( raise AstroDBError(missing_obs_msg) else: logger.warning(missing_obs_msg) + return flags else: try: obs_date = pd.to_datetime( obs_date ) # TODO: Another method that doesn't require pandas? except ValueError: - flags["skipped"] = True + flags["no_obs_date"] = True if raise_error: msg = ( f"{source}: Can't convert obs date to Date Time object: {obs_date}" ) logger.error(msg) raise AstroDBError(msg) + else: + return flags except dateutil.parser._parser.ParserError: - flags["skipped"] = True + flags["no_obs_date"] = True if raise_error: msg = ( f"{source}: Can't convert obs date to Date Time object: {obs_date}" @@ -210,10 +302,32 @@ def ingest_spectrum( f"{obs_date}" ) logger.warning(msg) + return flags + + matches = find_spectra( + db, + source, + reference=reference, + obs_date=obs_date, + telescope=telescope, + instrument=instrument, + mode=mode, + ) + if len(matches) > 0: + msg = f"Skipping suspected duplicate measurement\n{source}\n" + msg2 = f"{matches}" f"{instrument, mode, obs_date, reference, spectrum} \n" + logger.warning(msg) + logger.debug(msg2) + flags["dupe"] = True + if raise_error: + raise AstroDBError + else: + return flags # Check if spectrum is plottable flags["plottable"] = spectrum_plottable(spectrum, raise_error=raise_error) + # Compile fields into a dictionary row_data = { "source": db_name, "spectrum": spectrum, @@ -228,9 +342,9 @@ def ingest_spectrum( "reference": reference, "other_references": other_references, } - logger.debug(row_data) + # Attempt to add spectrum to database try: with db.engine.connect() as conn: conn.execute(db.Spectra.insert().values(row_data)) @@ -238,89 +352,32 @@ def ingest_spectrum( flags["added"] = True logger.info(f"Added {source} : \n" f"{row_data}") except sqlalchemy.exc.IntegrityError as e: - if "CHECK constraint failed: regime" in str(e): - msg = f"Regime provided is not in schema: {regime}" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - # check telescope, instrument, mode exists - telescope = ( - db.query(db.Telescopes) - .filter(db.Telescopes.c.telescope == row_data["telescope"]) - .table() - ) - instrument = ( - db.query(db.Instruments) - .filter(db.Instruments.c.instrument == row_data["instrument"]) - .table() - ) - - ################################################################################# - # Find what spectra already exists in database for this source - ################################################################################# - source_spec_data = ( - db.query(db.Spectra).filter(db.Spectra.c.source == db_name).table() - ) - - if len(source_spec_data) > 0: # Spectra data already exists - # check for duplicate measurement - ref_dupe_ind = source_spec_data["reference"] == reference - date_dupe_ind = source_spec_data["observation_date"] == obs_date - instrument_dupe_ind = source_spec_data["instrument"] == instrument - mode_dupe_ind = source_spec_data["mode"] == mode - if ( - sum(ref_dupe_ind) - and sum(date_dupe_ind) - and sum(instrument_dupe_ind) - and sum(mode_dupe_ind) - ): - msg = f"Skipping suspected duplicate measurement\n{source}\n" - msg2 = ( - f"{source_spec_data[ref_dupe_ind]}" - f"{instrument, mode, obs_date, reference, spectrum} \n" - ) - logger.warning(msg) - logger.debug(msg2 + str(e)) - flags["dupe"] = True - if raise_error: - raise AstroDBError - # else: - # msg = ( - # f'Spectrum could not be added to the database - # (other data exist): \n ' \ - # f"{source, instruments[i], modes[i], obs_date, references[i], - # spectra[i]} \n" - # msg2 = f"Existing Data: \n " - # # f"{source_spec_data[ref_dupe_ind]['source', 'instrument', - # 'mode', 'observation_date', 'reference', 'spectrum']}" - # msg3 = f"Data not able to add: \n {row_data} \n " - # logger.warning(msg + msg2) - # source_spec_data[ref_dupe_ind][ - # 'source', 'instrument', 'mode', 'observation_date', - # 'reference', 'spectrum'].pprint_all() - # logger.debug(msg3) - # n_skipped += 1 - # continue - if len(instrument) == 0 or len(mode) == 0 or len(telescope) == 0: - msg = ( - f"Spectrum for {source} could not be added to the database. \n" - f" Telescope, Instrument, and/or Mode need to be added to the" - " appropriate table. \n" - f" Trying to find telescope: {row_data['telescope']}," - f" instrument: {row_data['instrument']}, " - f" mode: {row_data['mode']} \n" - f" Telescope: {telescope}, Instrument: {instrument}, Mode: {mode} \n" - ) - logger.error(msg) - flags["missing_instrument"] = True - if raise_error: - raise AstroDBError + msg = "Integrity Error:" f"{source} \n" f"{row_data}" + logger.error(msg + str(e) + f" \n {row_data}") + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) else: - msg = f"Spectrum for {source} could not be added to the database" - "for unknown reason: \n {row_data} \n " - logger.error(msg) + return flags + except sqlite3.IntegrityError as e: + msg = "Integrity Error: " f"{source} \n" f"{row_data}" + logger.error(msg + str(e)) + flags["skipped"] = True + if raise_error: raise AstroDBError(msg) + else: + return flags + except Exception as e: + msg = ( + f"Spectrum for {source} could not be added to the database" + f"for unexpected reason: \n {row_data} \n error: {str(e)}" + ) + logger.error(msg) + flags["skipped"] = True + if raise_error: + raise AstroDBError(msg) + else: + return flags return flags @@ -444,3 +501,66 @@ def spectrum_plottable(spectrum_path, raise_error=True, show_plot=False): plt.show() return True + + +def find_spectra( + db: Database, + source: str, + *, + reference: str = None, + obs_date: str = None, + telescope: str = None, + instrument: str = None, + mode: str = None, +): + """ + Find what spectra already exists in database for this source + Finds matches based on parameter provided. + E.g., if only source is provided, all spectra for that source are returned. + If Source and telescope are provided, + only spectra for that source and telescope are provided. + + Parameters + ---------- + db: astrodbkit2.astrodb.Database + Database object created by astrodbkit2 + source: str + source name + + Returns + ------- + source_spec_data: astropy.table.Table + Table of spectra for source + """ + + source_spec_data = ( + db.query(db.Spectra).filter(db.Spectra.c.source == source).table() + ) + + n_spectra_matches = len(source_spec_data) + + if n_spectra_matches > 0 and reference is not None: + source_spec_data = source_spec_data[source_spec_data["reference"] == reference] + n_spectra_matches = len(source_spec_data) + + if n_spectra_matches > 0 and telescope is not None: + source_spec_data = source_spec_data[source_spec_data["telescope"] == telescope] + n_spectra_matches = len(source_spec_data) + + if n_spectra_matches > 0 and obs_date is not None: + source_spec_data = source_spec_data[ + source_spec_data["observation_date"] == obs_date + ] + n_spectra_matches = len(source_spec_data) + + if n_spectra_matches > 0 and instrument is not None: + source_spec_data = source_spec_data[ + source_spec_data["instrument"] == instrument + ] + n_spectra_matches = len(source_spec_data) + + if n_spectra_matches > 0 and mode is not None: + source_spec_data = source_spec_data[source_spec_data["mode"] == mode] + n_spectra_matches = len(source_spec_data) + + return source_spec_data diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index 3df5f76a7..000000000 --- a/tests/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ - -# Used to overwrite AstrodbKit2 reference tables defaults -REFERENCE_TABLES = ['Publications', 'Telescopes', 'Instruments', 'Modes', 'PhotometryFilters', 'Versions', 'Parameters'] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..bb0fa632d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,103 @@ +import pytest +import os +import sys +import logging +from astrodbkit2.astrodb import create_database, Database +sys.path.append("./simple") +# from schema import REFERENCE_TABLES +from schema import * + + +logger = logging.getLogger("AstroDB") + + +# Create a fresh SIMPLE database for the data and integrity tests +@pytest.fixture(scope="session", autouse=True) +def db(): + DB_NAME = "tests/simple_tests.sqlite" + DB_PATH = "data" + + if os.path.exists(DB_NAME): + os.remove(DB_NAME) + connection_string = "sqlite:///" + DB_NAME + create_database(connection_string) + assert os.path.exists(DB_NAME) + + # Connect to the new database + db = Database(connection_string, reference_tables=REFERENCE_TABLES) + + # Load data into an in-memory sqlite database first, for performance + db = Database( + "sqlite://", reference_tables=REFERENCE_TABLES + ) # creates and connects to a temporary in-memory database + db.load_database( + DB_PATH, verbose=False + ) # loads the data from the data files into the database + db.dump_sqlite(DB_NAME) # dump in-memory database to file + db = Database( + "sqlite:///" + DB_NAME, reference_tables=REFERENCE_TABLES + ) # replace database object with new file version + logger.info("Loaded SIMPLE database using db function in conftest") + + return db + + +# Create a temp database with dummy data to test utility functions +@pytest.fixture(scope="session", autouse=True) +def temp_db(): + TEMP_DB_NAME = "tests/temp_utils.sqlite" + + if os.path.exists(TEMP_DB_NAME): + os.remove(TEMP_DB_NAME) + connection_string = "sqlite:///" + TEMP_DB_NAME + create_database(connection_string) + temp_db = Database(connection_string) + + # Add some test data to the temp database + ref_data = [ + { + "reference": "Ref 1", + "doi": "10.1093/mnras/staa1522", + "bibcode": "2020MNRAS.496.1922B", + }, + {"reference": "Ref 2", "doi": "Doi2", "bibcode": "2012yCat.2311....0C"}, + {"reference": "Burn08", "doi": "Doi3", "bibcode": "2008MNRAS.391..320B"}, + ] + + regime_data = [ + {"regime": "optical"}, + {"regime": "nir"}, + ] + + telescope_data = [{"telescope": "Keck I"}, {"telescope": "IRTF"}] + + instrument_data = [ + {"instrument": "LRIS", "mode": "OG570", "telescope": "Keck I"}, + {"instrument": "SpeX", "mode": "Prism", "telescope": "IRTF"}, + ] + + source_data = [ + {"source": "Fake 1", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 1"}, + {"source": "Fake 2", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 1"}, + {"source": "Fake 3", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 2"}, + {"source": "apple", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 1"}, + {"source": "orange", "ra": 90.0673755, "dec": 19.352889, "reference": "Ref 2"}, + { + "source": "banana", + "ra": 360.0673755, + "dec": -18.352889, + "reference": "Burn08", + }, + ] + + with temp_db.engine.connect() as conn: + conn.execute(temp_db.Publications.insert().values(ref_data)) + conn.execute(temp_db.Sources.insert().values(source_data)) + conn.execute(temp_db.Regimes.insert().values(regime_data)) + conn.execute(temp_db.Telescopes.insert().values(telescope_data)) + conn.execute(temp_db.Instruments.insert().values(instrument_data)) + conn.commit() + + logger.info("Loaded temp database using temp_db function in conftest") + + return temp_db diff --git a/tests/test_data.py b/tests/test_data.py index 2f38c1d65..8ff1f45ed 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -1,46 +1,7 @@ # Tests to verify database contents -import os +# db is defined in conftest.py import pytest -from astrodbkit2.astrodb import create_database, Database from sqlalchemy import except_, select, and_ -from simple.schema import * -from . import REFERENCE_TABLES - - -DB_NAME = "temp.sqlite" -DB_PATH = "data" - - -# Load the database for use in individual tests -@pytest.fixture(scope="module") -def db(): - # Create a fresh temporary database and assert it exists - # Because we've imported simple.schema, we will be using that schema for the database - - if os.path.exists(DB_NAME): - os.remove(DB_NAME) - connection_string = "sqlite:///" + DB_NAME - create_database(connection_string) - assert os.path.exists(DB_NAME) - - # Connect to the new database and confirm it has the Sources table - db = Database(connection_string, reference_tables=REFERENCE_TABLES) - assert db - assert "source" in [c.name for c in db.Sources.columns] - - # Load data into an in-memory sqlite database first, for performance - temp_db = Database( - "sqlite://", reference_tables=REFERENCE_TABLES - ) # creates and connects to a temporary in-memory database - temp_db.load_database( - DB_PATH, verbose=False - ) # loads the data from the data files into the database - temp_db.dump_sqlite(DB_NAME) # dump in-memory database to file - db = Database( - "sqlite:///" + DB_NAME, reference_tables=REFERENCE_TABLES - ) # replace database object with new file version - - return db # Utility functions diff --git a/tests/test_integrity.py b/tests/test_integrity.py index ef7747f61..de44d09bb 100644 --- a/tests/test_integrity.py +++ b/tests/test_integrity.py @@ -1,50 +1,13 @@ # Test to verify database integrity -import os +# database object 'db' defined in conftest.py import pytest -from . import REFERENCE_TABLES -from sqlalchemy import func, and_ # , select, except_ -from simple.schema import * -from astrodbkit2.astrodb import create_database, Database, or_ +from sqlalchemy import func, and_ from astropy.table import unique from astropy import units as u from astroquery.simbad import Simbad from astrodbkit2.utils import _name_formatter - - -DB_NAME = "temp.sqlite" -DB_PATH = "data" - - -# Load the database for use in individual tests -@pytest.fixture(scope="module") -def db(): - # Create a fresh temporary database and assert it exists - # Because we've imported simple.schema, we will be using that schema for the database - - if os.path.exists(DB_NAME): - os.remove(DB_NAME) - connection_string = "sqlite:///" + DB_NAME - create_database(connection_string) - assert os.path.exists(DB_NAME) - - # Connect to the new database and confirm it has the Sources table - db = Database(connection_string, reference_tables=REFERENCE_TABLES) - assert db - assert "source" in [c.name for c in db.Sources.columns] - - # Load data into an in-memory sqlite database first, for performance - temp_db = Database( - "sqlite://", reference_tables=REFERENCE_TABLES - ) # creates and connects to a temporary in-memory database - temp_db.load_database( - DB_PATH, verbose=False - ) # loads the data from the data files into the database - temp_db.dump_sqlite(DB_NAME) # dump in-memory database to file - db = Database( - "sqlite:///" + DB_NAME, reference_tables=REFERENCE_TABLES - ) # replace database object with new file version - - return db +from astrodbkit2.astrodb import or_ +# from simple.schema import ParallaxView # , PhotometryView def test_reference_uniqueness(db): @@ -732,11 +695,15 @@ def test_special_characters(db): elif table_name == "Versions": check = [char not in data[table_name]["version"]] assert all(check), f"{char} in {table_name}" + elif table_name == "Regimes": + check = [char not in data[table_name]["regime"]] + assert all(check), f"{char} in {table_name}" else: check = [char not in data[table_name]["source"]] assert all(check), f"{char} in {table_name}" +@pytest.mark.skip(reason="ParallaxView not working") def test_database_views(db): # Tests to verify views exist and work as intended @@ -868,11 +835,3 @@ def test_names_uniqueness(db): print(duplicate_names) assert len(duplicate_names) == 0 - - -def test_remove_database(db): - # Clean up temporary database - db.session.close() - db.engine.dispose() - if os.path.exists(DB_NAME): - os.remove(DB_NAME) diff --git a/tests/test_photometry_utils.py b/tests/test_photometry_utils.py index cfc9c04eb..7a88586a1 100644 --- a/tests/test_photometry_utils.py +++ b/tests/test_photometry_utils.py @@ -1,72 +1,19 @@ +""" +This function is in the process of being moved to astrodb_scripts +""" + import pytest -import os -import logging -from astrodbkit2.astrodb import create_database, Database -from simple.schema import * +import sys from astrodb_scripts import ( AstroDBError, ) +sys.path.append("./") from simple.utils.photometry import ( - ingest_photometry, - ingest_photometry_filter, fetch_svo, assign_ucd, ) -logger = logging.getLogger("SIMPLE") -logger.setLevel(logging.DEBUG) - -DB_NAME = "simple_test_photometry.sqlite" -DB_PATH = "data" - - -# Load the database for use in individual tests -@pytest.fixture(scope="module") -def db(): - # Create a fresh temporary database and assert it exists - # Because we've imported simple.schema, we will be using that schema for the database - - if os.path.exists(DB_NAME): - os.remove(DB_NAME) - connection_string = "sqlite:///" + DB_NAME - create_database(connection_string) - assert os.path.exists(DB_NAME) - - # Connect to the new database and confirm it has the Sources table - db = Database(connection_string) - assert db - assert "source" in [c.name for c in db.Sources.columns] - - ref_data = [ - { - "reference": "Ref 1", - "doi": "10.1093/mnras/staa1522", - "bibcode": "2020MNRAS.496.1922B", - }, - {"reference": "Ref 2", "doi": "Doi2", "bibcode": "2012yCat.2311....0C"}, - {"reference": "Burn08", "doi": "Doi3", "bibcode": "2008MNRAS.391..320B"}, - ] - - source_data = [ - {"source": "apple", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 1"}, - {"source": "orange", "ra": 90.0673755, "dec": 19.352889, "reference": "Ref 2"}, - { - "source": "banana", - "ra": 360.0673755, - "dec": -18.352889, - "reference": "Burn08", - }, - ] - - with db.engine.connect() as conn: - conn.execute(db.Publications.insert().values(ref_data)) - conn.execute(db.Sources.insert().values(source_data)) - conn.commit() - - return db - - @pytest.mark.parametrize( "telescope, instrument, filter_name, wavelength", [("HST", "WFC3_IR", "F140W", 13734.66)], diff --git a/tests/test_spectra_utils.py b/tests/test_spectra_utils.py index 66c4c64ed..96d14e6db 100644 --- a/tests/test_spectra_utils.py +++ b/tests/test_spectra_utils.py @@ -1,100 +1,66 @@ +# temp_db and logger is defined in conftest.py import pytest -import os -import logging -from astrodbkit2.astrodb import create_database, Database +import sys from astrodb_scripts.utils import ( AstroDBError, ) +sys.path.append("./") from simple.utils.spectra import ( ingest_spectrum, - ingest_spectrum_from_fits, + # ingest_spectrum_from_fits, spectrum_plottable, ) -from simple.schema import * -logger = logging.getLogger("SIMPLE") -logger.setLevel(logging.DEBUG) - - -DB_NAME = "simple_test_spectra.sqlite" -DB_PATH = "data" - - -# Load the database for use in individual tests -@pytest.fixture(scope="module") -def db(): - # Create a fresh temporary database and assert it exists - # Because we've imported simple.schema, we will be using that schema for the database - - if os.path.exists(DB_NAME): - os.remove(DB_NAME) - connection_string = "sqlite:///" + DB_NAME - create_database(connection_string) - assert os.path.exists(DB_NAME) - - # Connect to the new database and confirm it has the Sources table - db = Database(connection_string) - assert db - assert "source" in [c.name for c in db.Sources.columns] - - ref_data = [ - { - "reference": "Ref 1", - "doi": "10.1093/mnras/staa1522", - "bibcode": "2020MNRAS.496.1922B", - }, - {"reference": "Ref 2", "doi": "Doi2", "bibcode": "2012yCat.2311....0C"}, - {"reference": "Burn08", "doi": "Doi3", "bibcode": "2008MNRAS.391..320B"}, - ] - - source_data = [ - {"source": "apple", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 1"}, - {"source": "orange", "ra": 90.0673755, "dec": 19.352889, "reference": "Ref 2"}, - { - "source": "banana", - "ra": 360.0673755, - "dec": -18.352889, - "reference": "Burn08", - }, - ] - - with db.engine.connect() as conn: - conn.execute(db.Publications.insert().values(ref_data)) - conn.execute(db.Sources.insert().values(source_data)) - conn.commit() - - return db - - -def test_ingest_spectrum(db): +@pytest.mark.filterwarnings("ignore") +def test_ingest_spectrum_errors(temp_db): spectrum = "https://bdnyc.s3.amazonaws.com/tests/U10176.fits" with pytest.raises(AstroDBError) as error_message: - ingest_spectrum(db, source="apple", spectrum=spectrum) + ingest_spectrum(temp_db, source="apple", spectrum=spectrum) assert "Regime is required" in str(error_message.value) - result = ingest_spectrum(db, source="apple", spectrum=spectrum, raise_error=False) + result = ingest_spectrum( + temp_db, source="apple", spectrum=spectrum, raise_error=False + ) assert result["added"] is False assert result["skipped"] is True with pytest.raises(AstroDBError) as error_message: - ingest_spectrum(db, source="apple", regime="nir", spectrum=spectrum) + ingest_spectrum( + temp_db, + source="apple", + telescope="IRTF", + instrument="SpeX", + mode="Prism", + regime="nir", + spectrum=spectrum, + ) assert "Reference is required" in str(error_message.value) ingest_spectrum( - db, source="apple", regime="nir", spectrum=spectrum, raise_error=False + temp_db, source="apple", regime="nir", spectrum=spectrum, raise_error=False ) assert result["added"] is False assert result["skipped"] is True with pytest.raises(AstroDBError) as error_message: ingest_spectrum( - db, source="apple", regime="nir", spectrum=spectrum, reference="Ref 5" + temp_db, + source="apple", + regime="nir", + spectrum=spectrum, + telescope="IRTF", + instrument="SpeX", + mode="Prism", + reference="Ref 5", ) assert "not in Publications table" in str(error_message.value) ingest_spectrum( - db, + temp_db, source="apple", regime="nir", spectrum=spectrum, + telescope="IRTF", + instrument="SpeX", + mode="Prism", reference="Ref 5", raise_error=False, ) @@ -103,31 +69,51 @@ def test_ingest_spectrum(db): with pytest.raises(AstroDBError) as error_message: ingest_spectrum( - db, source="kiwi", regime="nir", spectrum=spectrum, reference="Ref 1" + temp_db, + source="kiwi", + regime="nir", + spectrum=spectrum, + reference="Ref 1", + telescope="IRTF", + instrument="SpeX", + mode="Prism", ) assert "No unique source match for kiwi in the database" in str(error_message.value) result = ingest_spectrum( - db, + temp_db, source="kiwi", regime="nir", spectrum=spectrum, reference="Ref 1", raise_error=False, + telescope="IRTF", + instrument="SpeX", + mode="Prism", ) assert result["added"] is False assert result["skipped"] is True with pytest.raises(AstroDBError) as error_message: ingest_spectrum( - db, source="apple", regime="nir", spectrum=spectrum, reference="Ref 1" + temp_db, + source="apple", + regime="nir", + spectrum=spectrum, + reference="Ref 1", + telescope="IRTF", + instrument="SpeX", + mode="Prism", ) assert "missing observation date" in str(error_message.value) result = ingest_spectrum( - db, + temp_db, source="apple", regime="nir", spectrum=spectrum, reference="Ref 1", + telescope="IRTF", + instrument="SpeX", + mode="Prism", raise_error=False, ) assert result["added"] is False @@ -159,40 +145,51 @@ def test_ingest_spectrum(db): with pytest.raises(AstroDBError) as error_message: result = ingest_spectrum( - db, + temp_db, source="orange", regime="far-uv", spectrum=spectrum, reference="Ref 1", obs_date="1/1/2024", + telescope="Keck I", + instrument="LRIS", + mode="OG570", ) - assert "Regime provided is not in schema" in str(error_message.value) + assert "not in Regimes table" in str(error_message.value) result = ingest_spectrum( - db, + temp_db, source="orange", regime="far-uv", spectrum=spectrum, reference="Ref 1", obs_date="1/1/2024", + telescope="Keck I", + instrument="LRIS", + mode="OG570", raise_error=False, ) assert result["added"] is False assert result["skipped"] is True -def test_ingest_spectrum_works(db): +@pytest.mark.filterwarnings("ignore") +def test_ingest_spectrum_works(temp_db): spectrum = "https://bdnyc.s3.amazonaws.com/tests/U10176.fits" result = ingest_spectrum( - db, + temp_db, source="banana", regime="nir", spectrum=spectrum, reference="Ref 1", obs_date="2020-01-01", + telescope="IRTF", + instrument="SpeX", + mode="Prism", ) assert result["added"] is True +@pytest.mark.filterwarnings("ignore") @pytest.mark.parametrize( "file", [ @@ -213,7 +210,10 @@ def test_spectrum_plottable_false(file): @pytest.mark.parametrize( "file", [ - "https://bdnyc.s3.amazonaws.com/SpeX/Prism/2MASS+J04510093-3402150_2012-09-27.fits", + ( + "https://bdnyc.s3.amazonaws.com/SpeX/Prism/" + "2MASS+J04510093-3402150_2012-09-27.fits" + ), "https://bdnyc.s3.amazonaws.com/IRS/2MASS+J23515044-2537367.fits", "https://bdnyc.s3.amazonaws.com/optical_spectra/vhs1256b_opt_Osiris.fits", ], diff --git a/tests/test_utils.py b/tests/test_utils.py index 1e0d74d96..f60ded102 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,12 +1,10 @@ import pytest -import os -import logging -from astrodbkit2.astrodb import create_database, Database +import sys from astropy.table import Table from astrodb_scripts.utils import ( AstroDBError, ) -from simple.schema import * +sys.path.append("./") from simple.utils.spectral_types import ( convert_spt_string_to_code, ingest_spectral_types, @@ -15,34 +13,6 @@ from simple.utils.astrometry import ingest_parallaxes, ingest_proper_motions -logger = logging.getLogger("SIMPLE") -logger.setLevel(logging.DEBUG) - - -DB_NAME = "simple_temp.sqlite" -DB_PATH = "data" - - -# Load the database for use in individual tests -@pytest.fixture(scope="module") -def db(): - # Create a fresh temporary database and assert it exists - # Because we've imported simple.schema, we will be using that schema for the database - - if os.path.exists(DB_NAME): - os.remove(DB_NAME) - connection_string = "sqlite:///" + DB_NAME - create_database(connection_string) - assert os.path.exists(DB_NAME) - - # Connect to the new database and confirm it has the Sources table - db = Database(connection_string) - assert db - assert "source" in [c.name for c in db.Sources.columns] - - return db - - # Create fake astropy Table of data to load @pytest.fixture(scope="module") def t_plx(): @@ -90,32 +60,6 @@ def t_pm(): return t_pm -def test_setup_db(db): - # Some setup tasks to ensure some data exists in the database first - ref_data = [ - { - "reference": "Ref 1", - "doi": "10.1093/mnras/staa1522", - "bibcode": "2020MNRAS.496.1922B", - }, - {"reference": "Ref 2", "doi": "Doi2", "bibcode": "2012yCat.2311....0C"}, - {"reference": "Burn08", "doi": "Doi3", "bibcode": "2008MNRAS.391..320B"}, - ] - - source_data = [ - {"source": "Fake 1", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 1"}, - {"source": "Fake 2", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 1"}, - {"source": "Fake 3", "ra": 9.0673755, "dec": 18.352889, "reference": "Ref 2"}, - ] - - with db.engine.connect() as conn: - conn.execute(db.Publications.insert().values(ref_data)) - conn.execute(db.Sources.insert().values(source_data)) - conn.commit() - - return db - - def test_convert_spt_string_to_code(): # Test conversion of spectral types into numeric values assert convert_spt_string_to_code(["M5.6"]) == [65.6] @@ -123,18 +67,22 @@ def test_convert_spt_string_to_code(): assert convert_spt_string_to_code(["Y2pec"]) == [92] -def test_ingest_parallaxes(db, t_plx): +def test_ingest_parallaxes(temp_db, t_plx): # Test ingest of parallax data ingest_parallaxes( - db, t_plx["source"], t_plx["plx"], t_plx["plx_err"], t_plx["plx_ref"] + temp_db, t_plx["source"], t_plx["plx"], t_plx["plx_err"], t_plx["plx_ref"] ) results = ( - db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == "Ref 1").table() + temp_db.query(temp_db.Parallaxes) + .filter(temp_db.Parallaxes.c.reference == "Ref 1") + .table() ) assert len(results) == 2 results = ( - db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == "Ref 2").table() + temp_db.query(temp_db.Parallaxes) + .filter(temp_db.Parallaxes.c.reference == "Ref 2") + .table() ) assert len(results) == 1 assert results["source"][0] == "Fake 3" @@ -142,9 +90,9 @@ def test_ingest_parallaxes(db, t_plx): assert results["parallax_error"][0] == 0.6 -def test_ingest_proper_motions(db, t_pm): +def test_ingest_proper_motions(temp_db, t_pm): ingest_proper_motions( - db, + temp_db, t_pm["source"], t_pm["mu_ra"], t_pm["mu_ra_err"], @@ -153,14 +101,14 @@ def test_ingest_proper_motions(db, t_pm): t_pm["reference"], ) assert ( - db.query(db.ProperMotions) - .filter(db.ProperMotions.c.reference == "Ref 1") + temp_db.query(temp_db.ProperMotions) + .filter(temp_db.ProperMotions.c.reference == "Ref 1") .count() == 2 ) results = ( - db.query(db.ProperMotions) - .filter(db.ProperMotions.c.reference == "Ref 2") + temp_db.query(temp_db.ProperMotions) + .filter(temp_db.ProperMotions.c.reference == "Ref 2") .table() ) assert len(results) == 1 @@ -169,7 +117,7 @@ def test_ingest_proper_motions(db, t_pm): assert results["mu_ra_error"][0] == 0.23 -def test_ingest_spectral_types(db): +def test_ingest_spectral_types(temp_db): data1 = Table( [ { @@ -224,17 +172,21 @@ def test_ingest_spectral_types(db): ] ) ingest_spectral_types( - db, data1["source"], data1["spectral_type"], data1["reference"], data1["regime"] + temp_db, + data1["source"], + data1["spectral_type"], + data1["reference"], + data1["regime"], ) assert ( - db.query(db.SpectralTypes) - .filter(db.SpectralTypes.c.reference == "Ref 1") + temp_db.query(temp_db.SpectralTypes) + .filter(temp_db.SpectralTypes.c.reference == "Ref 1") .count() == 2 ) results = ( - db.query(db.SpectralTypes) - .filter(db.SpectralTypes.c.reference == "Ref 2") + temp_db.query(temp_db.SpectralTypes) + .filter(temp_db.SpectralTypes.c.reference == "Ref 2") .table() ) assert len(results) == 1 @@ -244,7 +196,7 @@ def test_ingest_spectral_types(db): # testing for publication error with pytest.raises(AstroDBError) as error_message: ingest_spectral_types( - db, + temp_db, data3["source"], data3["spectral_type"], data3["regime"], @@ -255,33 +207,32 @@ def test_ingest_spectral_types(db): ) -# TODO: test for ingest_photometry - -# TODO: test for ingest_spectra - - -def test_companion_relationships(db): +def test_companion_relationships(temp_db): # testing companion ingest # trying no companion with pytest.raises(AstroDBError) as error_message: - ingest_companion_relationships(db, "Fake 1", None, "Sibling") + ingest_companion_relationships(temp_db, "Fake 1", None, "Sibling") assert "Make sure all require parameters are provided." in str(error_message.value) # trying companion == source with pytest.raises(AstroDBError) as error_message: - ingest_companion_relationships(db, "Fake 1", "Fake 1", "Sibling") + ingest_companion_relationships(temp_db, "Fake 1", "Fake 1", "Sibling") assert "Source cannot be the same as companion name" in str(error_message.value) # trying negative separation with pytest.raises(AstroDBError) as error_message: ingest_companion_relationships( - db, "Fake 1", "Bad Companion", "Sibling", projected_separation_arcsec=-5 + temp_db, + "Fake 1", + "Bad Companion", + "Sibling", + projected_separation_arcsec=-5, ) assert "cannot be negative" in str(error_message.value) # trying negative separation error with pytest.raises(AstroDBError) as error_message: ingest_companion_relationships( - db, "Fake 1", "Bad Companion", "Sibling", projected_separation_error=-5 + temp_db, "Fake 1", "Bad Companion", "Sibling", projected_separation_error=-5 ) assert "cannot be negative" in str(error_message.value)