Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add test for spectrum urls #404

Merged
merged 9 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions scripts/updates/fix_IRS_spectra_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from scripts.ingests.utils import load_simpledb
from astropy.table import Table


SAVE_DB = True # save the data files in addition to modifying the .db file
RECREATE_DB = True # recreates the .db file from the data files

# LOAD THE DATABASE
db = load_simpledb('SIMPLE.db', recreatedb=RECREATE_DB)


# link to live google sheet
link = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQG5cGkI2aHPHD4b6ZZPTU4jjQMirU_z-yhl5ElI3p6nCIufIL64crC-yFalF58OauWHxmYvEKR_isY/pub?gid=0&single=true&output=csv'
kelle marked this conversation as resolved.
Show resolved Hide resolved
columns = ['source', 'original_spectrum', 'fixed_spectrum']
spectra_link_table = Table.read(link, format='ascii', data_start=2, names=columns, guess=False, fast_reader=False, delimiter=',')

for row in spectra_link_table:
# t = db.query(db.Spectra).filter(db.Spectra.c.original_spectrum == row['original_spectrum']).astropy()
# print(t['spectrum'])
with db.engine.connect() as conn:
conn.execute(db.Spectra.update().where(db.Spectra.c.original_spectrum == row['original_spectrum']).values(spectrum=row['fixed_spectrum']))
conn.commit()

# WRITE THE JSON FILES
if SAVE_DB:
db.save_database(directory='data/')
57 changes: 57 additions & 0 deletions tests/scheduled_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os
import pytest
import sys
import requests

sys.path.append('.')
from simple.schema import *
from astrodbkit2.astrodb import create_database, Database
from sqlalchemy import except_, select, and_
from . import REFERENCE_TABLES
from scripts.ingests.utils import check_internet_connection

DB_NAME = 'temp.db'
DB_PATH = 'data'


# Load the database for use in individual tests
@pytest.fixture(scope="module")
def db():
# Create a fresh temporary database and assert it exists
# Because we've imported simple.schema, we will be using that schema for the database

if os.path.exists(DB_NAME):
os.remove(DB_NAME)
connection_string = 'sqlite:///' + DB_NAME
create_database(connection_string)
assert os.path.exists(DB_NAME)

# Connect to the new database and confirm it has the Sources table
db = Database(connection_string, reference_tables=REFERENCE_TABLES)
assert db
assert 'source' in [c.name for c in db.Sources.columns]

# Load data into an in-memory sqlite database first, for performance
temp_db = Database('sqlite://', reference_tables=REFERENCE_TABLES) # creates and connects to a temporary in-memory database
temp_db.load_database(DB_PATH, verbose=False) # loads the data from the data files into the database
temp_db.dump_sqlite(DB_NAME) # dump in-memory database to file
db = Database('sqlite:///' + DB_NAME, reference_tables=REFERENCE_TABLES) # replace database object with new file version

return db


def test_spectra_urls(db):
spectra_urls = db.query(db.Spectra.c.spectrum).astropy()
broken_urls = []
codes = []
internet = check_internet_connection()
if internet:
kelle marked this conversation as resolved.
Show resolved Hide resolved
for spectrum_url in spectra_urls['spectrum']:
request_response = requests.head(spectrum_url)
status_code = request_response.status_code
# The website is up if the status code is 200
# cuny academic commons links give 301 status code
if status_code != 200 and status_code != 301:
broken_urls.append(spectrum_url)
codes.append(status_code)
assert len(broken_urls) == 149, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}'
kelle marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import os
import pytest
import sys
import requests

sys.path.append('.')
from simple.schema import *
from astrodbkit2.astrodb import create_database, Database
from sqlalchemy import except_, select, and_
from . import REFERENCE_TABLES
from scripts.ingests.utils import check_internet_connection
kelle marked this conversation as resolved.
Show resolved Hide resolved

DB_NAME = 'temp.db'
DB_PATH = 'data'
Expand Down Expand Up @@ -420,6 +422,7 @@ def test_spectral_types(db):
n_spectral_types = db.query(db.SpectralTypes).count()
assert len(m_dwarfs) + len(l_dwarfs) + len(t_dwarfs) + len(y_dwarfs) == n_spectral_types


# Individual ingest tests
# -----------------------------------------------------------------------------------------
def test_Manj19_data(db):
Expand Down