diff --git a/docs/api.rst b/docs/api.rst index 1f35bbad..aff85d2e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -32,6 +32,7 @@ available quickly with no configuration by accessing methods on the class. DataStore.codebook DataStore.counties DataStore.ejscreen + DataStore.lodes_codebook DataStore.ltdb DataStore.msa_definitions DataStore.msas diff --git a/geosnap/_data.py b/geosnap/_data.py index 4a8e74dd..20921b01 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -79,6 +79,7 @@ def __dir__(self): "codebook", "counties", "ejscreen", + "lodes_codebook", "ltdb", "msa_definitions", "msas", @@ -107,6 +108,18 @@ def show_data_dir(self, verbose=True): print(self.data_dir) return self.data_dir + def lodes_codebook(self): + """_summary_ + + Returns + ------- + pandas.DataFrame + decription of variables returned with LODES/LEHD data. + """ + return pd.read_csv( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "io/lodes.csv") + ) + def bea_regions(self): """Table that maps states to their respective BEA regions @@ -119,7 +132,7 @@ def bea_regions(self): os.path.join( os.path.dirname(os.path.abspath(__file__)), "io/bea_regions.csv" ), - converters={'stfips':str} + converters={"stfips": str}, ) def acs(self, year=2018, level="tract", states=None): @@ -208,7 +221,7 @@ def seda( "gcs", "cs", ], "`standardize` argument must be either 'cs' for cohort-standardized or 'gcs' for grade-cohort-standardized" - if pooling=='poolsub': + if pooling == "poolsub": fn = f"seda_{level}_{pooling}_{standardize}_4.1_corrected" else: fn = f"seda_{level}_{pooling}_{standardize}_4.1" diff --git a/geosnap/tests/test_datastore.py b/geosnap/tests/test_datastore.py index ab751fd5..fb22031a 100644 --- a/geosnap/tests/test_datastore.py +++ b/geosnap/tests/test_datastore.py @@ -1,61 +1,81 @@ from geosnap import DataStore -datasets=DataStore() + +datasets = DataStore() def test_data_dir(): loc = datasets.show_data_dir() assert len(loc) > 5 + def test_acs(): df = datasets.acs(year=2012, states=["11"]) assert df.shape == (179, 104) + def test_tracts90(): df = datasets.tracts_1990(states=["11"]) assert df.shape == (192, 164) + def test_tracts00(): df = datasets.tracts_2000(states=["11"]) assert df.shape == (188, 192) + def test_tracts10(): df = datasets.tracts_2010(states=["11"]) assert df.shape == (179, 194) + def test_tracts20(): df = datasets.tracts_2020(states=["11"]) assert df.shape == (206, 15) + def test_counties(): assert datasets.counties().shape == (3233, 2) + def test_states(): assert datasets.states().shape == (51, 3) + def test_msas(): df = datasets.msas() assert df.shape == (939, 4) + def test_msa_defs(): df = datasets.msa_definitions() assert df.shape == (1916, 13) + def test_codebook(): df = datasets.codebook() assert df.shape == (194, 12) + def test_bea(): df = datasets.bea_regions() assert df.shape == (51, 4) + def test_blocks_2000(): - df = datasets.blocks_2000(states=['11']) + df = datasets.blocks_2000(states=["11"]) assert df.shape == (5674, 3) + def test_blocks_2010(): - df = datasets.blocks_2010(states=['11']) + df = datasets.blocks_2010(states=["11"]) assert df.shape == (6507, 5) + def test_blocks_2020(): - df = datasets.blocks_2020(states=['11']) + df = datasets.blocks_2020(states=["11"]) assert df.shape == (6012, 7) + + +def test_lodes_codebook(): + df = datasets.lodes_codebook() + assert df.shape == (42, 4)