Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Select Entities from Entities Table: #71

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 91 additions & 32 deletions tests/test_architect.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from timechop.architect import Architect
from timechop import builders
from tests.utils import create_features_and_labels_schemas
from tests.utils import create_schemas
from tests.utils import create_entity_date_df
from tests.utils import convert_string_column_to_date
from tests.utils import NamedTempFile
from tests.utils import TemporaryDirectory

import testing.postgresql
import csv
import datetime
import pandas as pd
import os
from sqlalchemy import create_engine
from unittest import TestCase
from sqlalchemy import create_engine
from metta import metta_io as metta
from mock import Mock
import pytest


# make some fake features data
Expand Down Expand Up @@ -132,6 +134,7 @@
'features_schema_name': 'features',
'labels_schema_name': 'labels',
'labels_table_name': 'labels',
'entities_table_name': 'staging.entities'
}

def test_build_labels_query():
Expand All @@ -141,11 +144,12 @@ def test_build_labels_query():
# set up labeling config variables
dates = [datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0)]
entities = [0, 1, 3]

with testing.postgresql.Postgresql() as postgresql:
# create an engine and generate a table with fake feature data
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)
create_schemas(engine, features_tables, labels, entities)

# make a dataframe of labels to test against
labels_df = pd.DataFrame(
Expand All @@ -160,11 +164,17 @@ def test_build_labels_query():
]
)
labels_df['as_of_date'] = convert_string_column_to_date(labels_df['as_of_date'])

labels_df = labels_df[labels_df['entity_id'].isin(entities)]

# create an engine and generate a table with fake feature data
with testing.postgresql.Postgresql() as postgresql:
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)
create_schemas(
engine=engine,
features_tables=features_tables,
labels=labels,
entities=entities
)
with TemporaryDirectory() as temp_dir:
architect = Architect(
beginning_of_time = datetime.datetime(2010, 1, 1, 0, 0),
Expand Down Expand Up @@ -208,10 +218,16 @@ def test_write_to_csv():
""" Test the write_to_csv function by checking whether the csv contains the
correct number of lines.
"""
entities = [0, 2, 3]
with testing.postgresql.Postgresql() as postgresql:
# create an engine and generate a table with fake feature data
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)
create_schemas(
engine=engine,
features_tables=features_tables,
labels=labels,
entities=entities
)

with TemporaryDirectory() as temp_dir:
architect = Architect(
Expand Down Expand Up @@ -239,7 +255,6 @@ def test_write_to_csv():
reader = csv.reader(f)
assert(len([row for row in reader]) == len(table) + 1)


def test_make_entity_date_table():
""" Test that the make_entity_date_table function contains the correct
values.
Expand All @@ -248,11 +263,14 @@ def test_make_entity_date_table():
datetime.datetime(2016, 2, 1, 0, 0),
datetime.datetime(2016, 3, 1, 0, 0)]

entities = [0, 1, 2]

# make a dataframe of entity ids and dates to test against
ids_dates = create_entity_date_df(
dates,
labels,
dates,
entities,
'booking',
'binary',
'1 month'
Expand All @@ -261,7 +279,12 @@ def test_make_entity_date_table():
with testing.postgresql.Postgresql() as postgresql:
# create an engine and generate a table with fake feature data
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)
create_schemas(
engine=engine,
features_tables=features_tables,
labels=labels,
entities=entities
)

with TemporaryDirectory() as temp_dir:
architect = Architect(
Expand All @@ -281,15 +304,14 @@ def test_make_entity_date_table():
as_of_times=dates,
label_type='binary',
label_name='booking',
feature_table_names=['features0', 'features1'],
matrix_uuid='my_uuid',
matrix_type='train',
label_window='1 month'
)

# read in the table
result = pd.read_sql(
"select * from features.{} order by entity_id, as_of_date".format(entity_date_table_name),
"select * from {} order by entity_id, as_of_date".format(entity_date_table_name),
engine
)
labels_df = pd.read_sql('select * from labels.labels', engine)
Expand All @@ -305,17 +327,28 @@ def test_make_entity_date_table():
print(test)
assert(test.all().all())

# test that the table disappears after session closes
engine.dispose()
engine2 = create_engine(postgresql.url())
try:
engine2.execute('select * from {}'.format(entity_date_table_name))
except:
programmingerror = True
assert(programmingerror)

def test_build_outer_join_query():
"""
"""
dates = [datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0)]

entities = [1, 2, 3]
# make dataframe for entity ids and dates
ids_dates = create_entity_date_df(
dates,
labels,
dates,
entities,
'booking',
'binary',
'1 month'
Expand All @@ -342,8 +375,12 @@ def test_build_outer_join_query():
# create an engine and generate a table with fake feature data
with testing.postgresql.Postgresql() as postgresql:
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)

create_schemas(
engine=engine,
features_tables=features_tables,
labels=labels,
entities=entities
)
with TemporaryDirectory() as temp_dir:
architect = Architect(
beginning_of_time = datetime.datetime(2010, 1, 1, 0, 0),
Expand All @@ -360,7 +397,6 @@ def test_build_outer_join_query():
as_of_times=dates,
label_type='binary',
label_name='booking',
feature_table_names=['features0', 'features1'],
matrix_type='train',
matrix_uuid='my_uuid',
label_window='1 month'
Expand All @@ -373,7 +409,7 @@ def test_build_outer_join_query():
query = architect.builder.build_outer_join_query(
as_of_times = dates,
right_table_name = 'features.{}'.format(table_name),
entity_date_table_name = 'features.{}'.format(entity_date_table_name),
entity_date_table_name = entity_date_table_name,
right_column_selections = architect.builder._format_imputations(
features[table_number]
)
Expand Down Expand Up @@ -596,14 +632,21 @@ def test_generate_plans():

class TestBuildMatrix(object):
def test_train_matrix(self):
dates = [
datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0),
datetime.datetime(2016, 3, 1, 0, 0)
]
entities = [0, 1, 2]
with testing.postgresql.Postgresql() as postgresql:
# create an engine and generate a table with fake feature data
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)

dates = [datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0),
datetime.datetime(2016, 3, 1, 0, 0)]
create_schemas(
engine=engine,
features_tables=features_tables,
labels=labels,
entities=entities
)

with TemporaryDirectory() as temp_dir:
architect = Architect(
Expand Down Expand Up @@ -644,17 +687,25 @@ def test_train_matrix(self):
)
with open(matrix_filename, 'r') as f:
reader = csv.reader(f)
assert(len([row for row in reader]) == 12)
assert(len([row for row in reader]) == 9)

def test_test_matrix(self):
dates = [
datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0),
datetime.datetime(2016, 3, 1, 0, 0)
]
entities = [0, 1, 3]

with testing.postgresql.Postgresql() as postgresql:
# create an engine and generate a table with fake feature data
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)

dates = [datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0),
datetime.datetime(2016, 3, 1, 0, 0)]
create_schemas(
engine=engine,
features_tables=features_tables,
labels=labels,
entities=entities
)

with TemporaryDirectory() as temp_dir:
architect = Architect(
Expand Down Expand Up @@ -702,17 +753,25 @@ def test_test_matrix(self):

with open(matrix_filename, 'r') as f:
reader = csv.reader(f)
assert(len([row for row in reader]) == 13)
assert(len([row for row in reader]) == 10)

def test_replace(self):
dates = [
datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0),
datetime.datetime(2016, 3, 1, 0, 0)
]
entities = [0, 2, 3]

with testing.postgresql.Postgresql() as postgresql:
# create an engine and generate a table with fake feature data
engine = create_engine(postgresql.url())
create_features_and_labels_schemas(engine, features_tables, labels)

dates = [datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 2, 1, 0, 0),
datetime.datetime(2016, 3, 1, 0, 0)]
create_schemas(
engine=engine,
features_tables=features_tables,
labels=labels,
entities=entities
)

with TemporaryDirectory() as temp_dir:
architect = Architect(
Expand Down Expand Up @@ -761,7 +820,7 @@ def test_replace(self):

with open(matrix_filename, 'r') as f:
reader = csv.reader(f)
assert(len([row for row in reader]) == 13)
assert(len([row for row in reader]) == 10)

# rerun
architect.builder.make_entity_date_table = Mock()
Expand Down
16 changes: 15 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def convert_string_column_to_date(column):
[datetime.datetime.strptime(date, '%Y-%m-%d').date() for date in column]
)

def create_features_and_labels_schemas(engine, features_tables, labels):
def create_schemas(engine, features_tables, labels, entities):
""" This function makes a features schema and populates it with the fake
data from above.

Expand Down Expand Up @@ -41,6 +41,18 @@ def create_features_and_labels_schemas(engine, features_tables, labels):
'insert into labels.labels values (%s, %s, %s, %s, %s, %s)',
row
)
# create entities table
engine.execute('drop table if exists staging cascade; create schema staging;')
engine.execute(
"""
create table staging.entities (
entity_id int
)
"""
)
for entity in entities:
engine.execute('insert into staging.entities values (%s)', entity)


def create_features_table(table_number, table, engine):
engine.execute(
Expand All @@ -62,6 +74,7 @@ def create_entity_date_df(
dates,
labels,
as_of_dates,
entities,
label_name,
label_type,
label_window
Expand Down Expand Up @@ -89,6 +102,7 @@ def create_entity_date_df(
'%Y-%m-%d'
).date() for date in ids_dates['as_of_date']]
ids_dates = ids_dates[ids_dates['as_of_date'].isin(dates)]
ids_dates = ids_dates[ids_dates['entity_id'].isin(entities)]
print(ids_dates)
print(dates)

Expand Down
Loading