From c38d87f7bc1d5ad02f903abf9470e9a095cd96c7 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Mon, 2 Oct 2023 09:32:02 -0700 Subject: [PATCH] add 2020 cartographic tracts --- geosnap/_data.py | 44 +++++++++++++++++++++++++++------ geosnap/tests/test_datastore.py | 4 +++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/geosnap/_data.py b/geosnap/_data.py index 2f4863cd..7ae93feb 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -54,8 +54,7 @@ def __delitem__(self, key): class DataStore: """Storage for geosnap data. Currently supports data from several U.S. federal agencies and national research centers.""" - def __init__(self, data_dir="auto"): - self + def __init__(self, data_dir="auto", disclaimer=False): appname = "geosnap" appauthor = "geosnap" @@ -63,11 +62,12 @@ def __init__(self, data_dir="auto"): self.data_dir = user_data_dir(appname, appauthor) else: self.data_dir = data_dir - warn( - "The geosnap data storage class is provided for convenience only. The geosnap developers make no promises " - "regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. " - "The end-user is responsible for any and all analyses or applications created with the package." - ) + if disclaimer: + warn( + "The geosnap data storage class is provided for convenience only. The geosnap developers make no promises " + "regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. " + "The end-user is responsible for any and all analyses or applications created with the package." + ) def __dir__(self): @@ -75,6 +75,7 @@ def __dir__(self): "acs", "blocks_2000", "blocks_2010", + "blocks_2020", "codebook", "counties", "ejscreen", @@ -89,6 +90,7 @@ def __dir__(self): "tracts_1990", "tracts_2000", "tracts_2010", + "tracts_2020" ] return atts @@ -474,6 +476,34 @@ def tracts_2010( t["year"] = 2010 return t + def tracts_2020( + self, + states=None, + ): + """Nationwide Census Tracts as drawn in 2010 (cartographic 500k). + + Parameters + ---------- + states : list-like + list of state fips to subset the national dataframe + + Returns + ------- + pandas.DataFrame or geopandas.GeoDataFrame + 2010 tracts as a geodataframe or as a dataframe with geometry + stored as well-known binary on the 'wkb' column. + + """ + msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance" + local = pathlib.Path(self.data_dir, "tracts_2020_500k.parquet") + remote = "s3://spatial-ucr/census/tracts_cartographic/tracts_2020_500k.parquet" + t = _fetcher(local, remote, msg) + + if states: + t = t[t.geoid.str[:2].isin(states)] + t["year"] = 2020 + return t + def msas(self): """Metropolitan Statistical Areas as drawn in 2020. diff --git a/geosnap/tests/test_datastore.py b/geosnap/tests/test_datastore.py index d9883f1d..eb0ebc35 100644 --- a/geosnap/tests/test_datastore.py +++ b/geosnap/tests/test_datastore.py @@ -26,6 +26,10 @@ def test_tracts10(): df = datasets.tracts_2010(states=["11"]) assert df.shape == (179, 194) +def test_tracts20(): + df = datasets.tracts_2020(states=["11"]) + assert df.shape == (206, 15) + def test_counties(): assert datasets.counties().shape == (3233, 2)