From c38d87f7bc1d5ad02f903abf9470e9a095cd96c7 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Mon, 2 Oct 2023 09:32:02 -0700 Subject: [PATCH 1/2] add 2020 cartographic tracts --- geosnap/_data.py | 44 +++++++++++++++++++++++++++------ geosnap/tests/test_datastore.py | 4 +++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/geosnap/_data.py b/geosnap/_data.py index 2f4863cd..7ae93feb 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -54,8 +54,7 @@ def __delitem__(self, key): class DataStore: """Storage for geosnap data. Currently supports data from several U.S. federal agencies and national research centers.""" - def __init__(self, data_dir="auto"): - self + def __init__(self, data_dir="auto", disclaimer=False): appname = "geosnap" appauthor = "geosnap" @@ -63,11 +62,12 @@ def __init__(self, data_dir="auto"): self.data_dir = user_data_dir(appname, appauthor) else: self.data_dir = data_dir - warn( - "The geosnap data storage class is provided for convenience only. The geosnap developers make no promises " - "regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. " - "The end-user is responsible for any and all analyses or applications created with the package." - ) + if disclaimer: + warn( + "The geosnap data storage class is provided for convenience only. The geosnap developers make no promises " + "regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. " + "The end-user is responsible for any and all analyses or applications created with the package." + ) def __dir__(self): @@ -75,6 +75,7 @@ def __dir__(self): "acs", "blocks_2000", "blocks_2010", + "blocks_2020", "codebook", "counties", "ejscreen", @@ -89,6 +90,7 @@ def __dir__(self): "tracts_1990", "tracts_2000", "tracts_2010", + "tracts_2020" ] return atts @@ -474,6 +476,34 @@ def tracts_2010( t["year"] = 2010 return t + def tracts_2020( + self, + states=None, + ): + """Nationwide Census Tracts as drawn in 2010 (cartographic 500k). + + Parameters + ---------- + states : list-like + list of state fips to subset the national dataframe + + Returns + ------- + pandas.DataFrame or geopandas.GeoDataFrame + 2010 tracts as a geodataframe or as a dataframe with geometry + stored as well-known binary on the 'wkb' column. + + """ + msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance" + local = pathlib.Path(self.data_dir, "tracts_2020_500k.parquet") + remote = "s3://spatial-ucr/census/tracts_cartographic/tracts_2020_500k.parquet" + t = _fetcher(local, remote, msg) + + if states: + t = t[t.geoid.str[:2].isin(states)] + t["year"] = 2020 + return t + def msas(self): """Metropolitan Statistical Areas as drawn in 2020. diff --git a/geosnap/tests/test_datastore.py b/geosnap/tests/test_datastore.py index d9883f1d..eb0ebc35 100644 --- a/geosnap/tests/test_datastore.py +++ b/geosnap/tests/test_datastore.py @@ -26,6 +26,10 @@ def test_tracts10(): df = datasets.tracts_2010(states=["11"]) assert df.shape == (179, 194) +def test_tracts20(): + df = datasets.tracts_2020(states=["11"]) + assert df.shape == (206, 15) + def test_counties(): assert datasets.counties().shape == (3233, 2) From 0c559d1dcb047087f89802717d1ed48b177d9a25 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Mon, 2 Oct 2023 10:02:59 -0700 Subject: [PATCH 2/2] tracts20 to docs --- .github/workflows/unittests.yml | 2 +- docs/api.rst | 2 +- geosnap/_data.py | 35 +++++++++++---------------------- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 521a1386..e3e24c2a 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -46,7 +46,7 @@ jobs: - name: Test geosnap run: | - pytest -v --color yes --cov geosnap --cov-append --cov-report term-missing --cov-report xml . + pytest -v --color yes --cov geosnap/tests --cov-append --cov-report term-missing --cov-report xml . - uses: codecov/codecov-action@v3 diff --git a/docs/api.rst b/docs/api.rst index 0c753c8a..62bbb8e3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -41,7 +41,7 @@ available quickly with no configuration by accessing methods on the class. DataStore.tracts_1990 DataStore.tracts_2000 DataStore.tracts_2010 - + DataStore.tracts_2020 Storing data ''''''''''''''' diff --git a/geosnap/_data.py b/geosnap/_data.py index 7ae93feb..f2d3c627 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -433,9 +433,8 @@ def tracts_2000(self, states=None): Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - 2000 tracts as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + 2000 tracts as a geodataframe """ local = pathlib.Path(self.data_dir, "tracts_2000_500k.parquet") @@ -461,9 +460,8 @@ def tracts_2010( Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - 2010 tracts as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + 2010 tracts as a geodataframe """ msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance" @@ -480,7 +478,7 @@ def tracts_2020( self, states=None, ): - """Nationwide Census Tracts as drawn in 2010 (cartographic 500k). + """Nationwide Census Tracts as drawn in 2020 (cartographic 500k). Parameters ---------- @@ -489,9 +487,8 @@ def tracts_2020( Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - 2010 tracts as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + 2020 tracts as a geodataframe """ msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance" @@ -513,9 +510,8 @@ def msas(self): Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - 2010 MSAs as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + 2010 MSAs as a geodataframe """ local = pathlib.Path(self.data_dir, "msas.parquet") @@ -530,9 +526,8 @@ def states(self): Returns ------- - pandas.DataFrame or geopandas.GeoDataFrame - US States as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + geopandas.GeoDataFrame + US States as a geodataframe """ local = pathlib.Path(self.data_dir, "states.parquet") @@ -545,16 +540,10 @@ def states(self): def counties(self): """Nationwide counties as drawn in 2010. - Parameters - ---------- - convert : bool - if True, return geodataframe, else return dataframe (the default is True). - Returns ------- geopandas.GeoDataFrame - 2010 counties as a geodataframe or as a dataframe with geometry - stored as well-known binary on the 'wkb' column. + 2010 counties as a geodataframe. """ local = pathlib.Path(self.data_dir, "counties.parquet")