From 6a0c6548d35d45089b64cba77a4d8a751e3b5040 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 08:23:49 -0800 Subject: [PATCH 1/5] fix bea table --- geosnap/io/bea_regions.csv | 163 ++++++++++++------------------------- 1 file changed, 52 insertions(+), 111 deletions(-) diff --git a/geosnap/io/bea_regions.csv b/geosnap/io/bea_regions.csv index 68379433..31ac5c0d 100644 --- a/geosnap/io/bea_regions.csv +++ b/geosnap/io/bea_regions.csv @@ -1,111 +1,52 @@ -,geoid,metro,state,bea_region -0,10420,Akron,OH,Great Lakes -1,10580,Albany-Schenectady-Troy,NY,Mideast -2,10740,Albuquerque,NM,Southwest -3,10900,Allentown-Bethlehem-Easton,PA-NJ,Mideast -4,12060,Atlanta-Sandy Springs-Alpharetta,GA,Southeast -5,12260,Augusta-Richmond County,GA-SC,Southeast -6,12420,Austin-Round Rock-Georgetown,TX,Southwest -7,12540,Bakersfield,CA,Far West -8,12580,Baltimore-Columbia-Towson,MD,Mideast -9,12940,Baton Rouge,LA,Southeast -10,13820,Birmingham-Hoover,AL,Southeast -11,14260,Boise City,ID,Rocky Mountain -12,14460,Boston-Cambridge-Newton,MA-NH,New England -13,14860,Bridgeport-Stamford-Norwalk,CT,New England -14,15380,Buffalo-Cheektowaga,NY,Mideast -15,15980,Cape Coral-Fort Myers,FL,Southeast -16,16700,Charleston-North Charleston,SC,Southeast -17,16740,Charlotte-Concord-Gastonia,NC-SC,Southeast -18,16860,Chattanooga,TN-GA,Southeast -19,16980,Chicago-Naperville-Elgin,IL-IN-WI,Great Lakes -20,17140,Cincinnati,OH-KY-IN,Great Lakes -21,17460,Cleveland-Elyria,OH,Great Lakes -22,17820,Colorado Springs,CO,Rocky Mountain -23,17900,Columbia,SC,Southeast -24,18140,Columbus,OH,Great Lakes -25,19100,Dallas-Fort Worth-Arlington,TX,Southwest -26,19430,Dayton-Kettering,OH,Great Lakes -27,19660,Deltona-Daytona Beach-Ormond Beach,FL,Southeast -28,19740,Denver-Aurora-Lakewood,CO,Rocky Mountain -29,19780,Des Moines-West Des Moines,IA,Plains -30,19820,Detroit-Warren-Dearborn,MI,Great Lakes -31,20500,Durham-Chapel Hill,NC,Southeast -32,21340,El Paso,TX,Southwest -33,22180,Fayetteville,NC,Southeast -34,22220,Fayetteville-Springdale-Rogers,AR,Southeast -35,23420,Fresno,CA,Far West -36,24340,Grand Rapids-Kentwood,MI,Great Lakes -37,24660,Greensboro-High Point,NC,Southeast -38,24860,Greenville-Anderson,SC,Southeast -39,25420,Harrisburg-Carlisle,PA,Mideast -40,25540,Hartford-East Hartford-Middletown,CT,New England -41,26420,Houston-The Woodlands-Sugar Land,TX,Southwest -42,26900,Indianapolis-Carmel-Anderson,IN,Great Lakes -43,27140,Jackson,MS,Southeast -44,27260,Jacksonville,FL,Southeast -45,28140,Kansas City,MO-KS,Plains -46,28940,Knoxville,TN,Southeast -47,29460,Lakeland-Winter Haven,FL,Southeast -48,29540,Lancaster,PA,Mideast -49,29620,Lansing-East Lansing,MI,Great Lakes -50,29820,Las Vegas-Henderson-Paradise,NV,Far West -51,30460,Lexington-Fayette,KY,Southeast -52,30780,Little Rock-North Little Rock-Conway,AR,Southeast -53,31080,Los Angeles-Long Beach-Anaheim,CA,Far West -54,31140,Louisville/Jefferson County,KY-IN,Southeast -55,31540,Madison,WI,Great Lakes -56,32580,McAllen-Edinburg-Mission,TX,Southwest -57,32820,Memphis,TN-MS-AR,Southeast -58,33100,Miami-Fort Lauderdale-Pompano Beach,FL,Southeast -59,33340,Milwaukee-Waukesha,WI,Great Lakes -60,33460,Minneapolis-St. Paul-Bloomington,MN-WI,Plains -61,33700,Modesto,CA,Far West -62,34980,Nashville-Davidson--Murfreesboro--Franklin,TN,Southeast -63,35300,New Haven-Milford,CT,New England -64,35380,New Orleans-Metairie,LA,Southeast -65,35620,New York-Newark-Jersey City,NY-NJ-PA,Mideast -66,35840,North Port-Sarasota-Bradenton,FL,Southeast -67,36260,Ogden-Clearfield,UT,Rocky Mountain -68,36420,Oklahoma City,OK,Southwest -69,36540,Omaha-Council Bluffs,NE-IA,Plains -70,36740,Orlando-Kissimmee-Sanford,FL,Southeast -71,37100,Oxnard-Thousand Oaks-Ventura,CA,Far West -72,37340,Palm Bay-Melbourne-Titusville,FL,Southeast -73,37980,Philadelphia-Camden-Wilmington,PA-NJ-DE-MD,Mideast -74,38060,Phoenix-Mesa-Chandler,AZ,Southwest -75,38300,Pittsburgh,PA,Mideast -76,38860,Portland-South Portland,ME,New England -77,38900,Portland-Vancouver-Hillsboro,OR-WA,Far West -78,39100,Poughkeepsie-Newburgh-Middletown,NY,Mideast -79,39300,Providence-Warwick,RI-MA,New England -80,39340,Provo-Orem,UT,Rocky Mountain -81,39580,Raleigh-Cary,NC,Southeast -82,40060,Richmond,VA,Southeast -83,40140,Riverside-San Bernardino-Ontario,CA,Far West -84,40380,Rochester,NY,Mideast -85,40900,Sacramento-Roseville-Folsom,CA,Far West -86,41620,Salt Lake City,UT,Rocky Mountain -87,41700,San Antonio-New Braunfels,TX,Southwest -88,41740,San Diego-Chula Vista-Carlsbad,CA,Far West -89,41860,San Francisco-Oakland-Berkeley,CA,Far West -90,41940,San Jose-Sunnyvale-Santa Clara,CA,Far West -91,42220,Santa Rosa-Petaluma,CA,Far West -92,42540,Scranton--Wilkes-Barre,PA,Mideast -93,42660,Seattle-Tacoma-Bellevue,WA,Far West -94,44060,Spokane-Spokane Valley,WA,Far West -95,44140,Springfield,MA,New England -96,41180,St. Louis,MO-IL,Plains -97,44700,Stockton,CA,Far West -98,45060,Syracuse,NY,Mideast -99,45300,Tampa-St. Petersburg-Clearwater,FL,Southeast -100,45780,Toledo,OH,Great Lakes -101,46060,Tucson,AZ,Southwest -102,46140,Tulsa,OK,Southwest -103,46520,Urban Honolulu,HI,Far West -104,47260,Virginia Beach-Norfolk-Newport News,VA-NC,Southeast -105,47900,Washington-Arlington-Alexandria,DC-VA-MD-WV,Mideast -106,48620,Wichita,KS,Plains -107,49180,Winston-Salem,NC,Southeast -108,49340,Worcester,MA-CT,New England -109,49660,Youngstown-Warren-Boardman,OH-PA,Great Lakes +geoid,name,stfips,state +91000,"New England",09000,"Connecticut" +91000,"New England",23000,"Maine" +91000,"New England",25000,"Massachusetts" +91000,"New England",33000,"New Hampshire" +91000,"New England",44000,"Rhode Island" +91000,"New England",50000,"Vermont" +92000,"Mideast",10000,"Delaware" +92000,"Mideast",11000,"District of Columbia" +92000,"Mideast",24000,"Maryland" +92000,"Mideast",34000,"New Jersey" +92000,"Mideast",36000,"New York" +92000,"Mideast",42000,"Pennsylvania" +93000,"Great Lakes",17000,"Illinois" +93000,"Great Lakes",18000,"Indiana" +93000,"Great Lakes",26000,"Michigan" +93000,"Great Lakes",39000,"Ohio" +93000,"Great Lakes",55000,"Wisconsin" +94000,"Plains",19000,"Iowa" +94000,"Plains",20000,"Kansas" +94000,"Plains",27000,"Minnesota" +94000,"Plains",29000,"Missouri" +94000,"Plains",31000,"Nebraska" +94000,"Plains",38000,"North Dakota" +94000,"Plains",46000,"South Dakota" +95000,"Southeast",01000,"Alabama" +95000,"Southeast",05000,"Arkansas" +95000,"Southeast",12000,"Florida" +95000,"Southeast",13000,"Georgia" +95000,"Southeast",21000,"Kentucky" +95000,"Southeast",22000,"Louisiana" +95000,"Southeast",28000,"Mississippi" +95000,"Southeast",37000,"North Carolina" +95000,"Southeast",45000,"South Carolina" +95000,"Southeast",47000,"Tennessee" +95000,"Southeast",51000,"Virginia" +95000,"Southeast",54000,"West Virginia" +96000,"Southwest",04000,"Arizona" +96000,"Southwest",35000,"New Mexico" +96000,"Southwest",40000,"Oklahoma" +96000,"Southwest",48000,"Texas" +97000,"Rocky Mountain",08000,"Colorado" +97000,"Rocky Mountain",16000,"Idaho" +97000,"Rocky Mountain",30000,"Montana" +97000,"Rocky Mountain",49000,"Utah" +97000,"Rocky Mountain",56000,"Wyoming" +98000,"Far West",02000,"Alaska" +98000,"Far West",06000,"California" +98000,"Far West",15000,"Hawaii" +98000,"Far West",32000,"Nevada" +98000,"Far West",41000,"Oregon" +98000,"Far West",53000,"Washington" \ No newline at end of file From 62c2c112ad5889e253c616918ae0d96ce5028ed8 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 08:55:57 -0800 Subject: [PATCH 2/5] update bea table and msa_defs --- geosnap/_data.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/geosnap/_data.py b/geosnap/_data.py index ac499a60..c5f32da4 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -70,7 +70,6 @@ def __init__(self, data_dir="auto", disclaimer=False): ) def __dir__(self): - atts = [ "acs", "blocks_2000", @@ -90,7 +89,7 @@ def __dir__(self): "tracts_1990", "tracts_2000", "tracts_2010", - "tracts_2020" + "tracts_2020", ] return atts @@ -107,6 +106,14 @@ def show_data_dir(self, verbose=True): print(self.data_dir) return self.data_dir + def bea_regions(self): + return pd.read_csv( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), "io/bea_regions.csv" + ), + converters={'stfips':str} + ) + def acs(self, year=2018, level="tract", states=None): """American Community Survey Data (5-year estimates). @@ -187,12 +194,15 @@ def seda( assert pooling in [ "pool", "long", - "poolsub" + "poolsub", ], "`pool` argument must be either 'pool', 'long', or 'poolsub'" - assert standardize in [ - "gcs", - "cs", - ], "`standardize` argument must be either 'cs' for cohort-standardized or 'gcs' for grade-cohort-standardized" + assert ( + standardize + in [ + "gcs", + "cs", + ] + ), "`standardize` argument must be either 'cs' for cohort-standardized or 'gcs' for grade-cohort-standardized" fn = f"seda_{level}_{pooling}_{standardize}_4.1" local_path = pathlib.Path(self.data_dir, "seda", f"{fn}.parquet") remote_path = f"https://stacks.stanford.edu/file/druid:xv742vh9296/{fn}.csv" @@ -565,16 +575,11 @@ def msa_definitions(self): dataframe that stores state/county --> MSA crosswalk definitions. """ - local = pathlib.Path(self.data_dir, "msa_definitions.parquet") - remote = "s3://spatial-ucr/census/administrative/msa_definitions.parquet" - msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance" - try: - t = pd.read_parquet(local) - except FileNotFoundError: - warn(msg) - t = pd.read_parquet(remote, storage_options={"anon": True}) - - return t + return pd.read_csv( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), "io/msa_definitions.csv" + ) + ) def ltdb(self): """Longitudinal Tract Database (LTDB). @@ -622,5 +627,3 @@ def codebook(self): return pd.read_csv( os.path.join(os.path.dirname(os.path.abspath(__file__)), "io/variables.csv") ) - - From b2cdf4e6df99b61eca8ee078063c6b500b13e249 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 13:01:30 -0800 Subject: [PATCH 3/5] test blocks and nces; add docstring for bea --- .ci/{39.yml => 312.yml} | 2 +- .github/workflows/unittests.yml | 8 ++++---- docs/api.rst | 1 + geosnap/_data.py | 8 ++++++++ geosnap/tests/test_datastore.py | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 5 deletions(-) rename .ci/{39.yml => 312.yml} (97%) diff --git a/.ci/39.yml b/.ci/312.yml similarity index 97% rename from .ci/39.yml rename to .ci/312.yml index a3b1243e..979160f6 100644 --- a/.ci/39.yml +++ b/.ci/312.yml @@ -2,7 +2,7 @@ name: test channels: - conda-forge dependencies: - - python =3.9 + - python =3.12 - pandas - giddy >=2.2.1 - libpysal diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 3604d200..d965747c 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -18,13 +18,13 @@ jobs: matrix: os: [ubuntu-latest] environment-file: - - .ci/39.yml - - .ci/310.yml + - .ci/10.yml - .ci/311.yml + - .ci/312.yml include: - - environment-file: .ci/310.yml + - environment-file: .ci/311.yml os: macos-latest - - environment-file: .ci/310.yml + - environment-file: .ci/311.yml os: windows-latest defaults: run: diff --git a/docs/api.rst b/docs/api.rst index 62bbb8e3..3cd95fa0 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -25,6 +25,7 @@ available quickly with no configuration by accessing methods on the class. DataStore DataStore.acs + DataStore.bea_regions DataStore.blocks_2000 DataStore.blocks_2010 DataStore.blocks_2020 diff --git a/geosnap/_data.py b/geosnap/_data.py index c5f32da4..4d601c59 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -72,6 +72,7 @@ def __init__(self, data_dir="auto", disclaimer=False): def __dir__(self): atts = [ "acs", + "bea_regions", "blocks_2000", "blocks_2010", "blocks_2020", @@ -107,6 +108,13 @@ def show_data_dir(self, verbose=True): return self.data_dir def bea_regions(self): + """Table that maps states to their respective BEA regions + + Returns + ------- + pandas.DataFrame + BEA region table + """ return pd.read_csv( os.path.join( os.path.dirname(os.path.abspath(__file__)), "io/bea_regions.csv" diff --git a/geosnap/tests/test_datastore.py b/geosnap/tests/test_datastore.py index eb0ebc35..b802ed81 100644 --- a/geosnap/tests/test_datastore.py +++ b/geosnap/tests/test_datastore.py @@ -52,3 +52,35 @@ def test_msa_defs(): def test_codebook(): df = datasets.codebook() assert df.shape == (194, 12) + +def test_bea(): + df = datasets.bea_regions() + assert df.shape == (51, 4) + +def test_blocks_2000(): + df = datasets.blocks_2000(states=['11']) + assert df.shape == (5674, 3) + +def test_blocks_2010(): + df = datasets.blocks_2010(states=['11']) + assert df.shape == (6507, 5) + +def test_blocks_2020(): + df = datasets.blocks_2020(states=['11']) + assert df.shape == (6012, 7) + +def test_ejscreen(): + df = datasets.ejscreen(states=['11'], year=2019) + assert df.shape==(450, 368) + +def test_nces_schools(): + d = datasets.nces(dataset='schools', year='1516') + assert d.shape == (102209, 26) + +def test_nces_districts(): + d = datasets.nces(dataset='school_districts') + assert d.shape == (13352, 18) + +def test_nces_sabs(): + df = datasets.nces(dataset='sabs') + assert df.shape == (75128, 15) \ No newline at end of file From f6ee87efad54882abdfe97f6b3893ce7fc0f239b Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 13:21:54 -0800 Subject: [PATCH 4/5] add bg test; dont double test nces --- geosnap/_data.py | 13 +++++-------- geosnap/tests/test_constructors.py | 5 ++++- geosnap/tests/test_datastore.py | 25 ------------------------- 3 files changed, 9 insertions(+), 34 deletions(-) diff --git a/geosnap/_data.py b/geosnap/_data.py index ec46d236..4a8e74dd 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -230,8 +230,8 @@ def seda( remote_path, converters={"sedasch": str, "fips": str} ) t.sedasch = t.sedasch.str.rjust(12, "0") - except FileNotFoundError: - raise FileNotFoundError( + except FileNotFoundError as e: + raise FileNotFoundError from e( "Unable to access local or remote SEDA data" ) elif level == "geodist": @@ -240,8 +240,8 @@ def seda( remote_path, converters={"sedalea": str, "fips": str} ) t.sedalea = t.sedalea.str.rjust(7, "0") - except FileNotFoundError: - raise FileNotFoundError( + except FileNotFoundError as e: + raise FileNotFoundError from e( "Unable to access local or remote SEDA data" ) t.fips = t.fips.str.rjust(2, "0") @@ -264,10 +264,7 @@ def nces(self, year=1516, dataset="sabs"): geopandas.GeoDataFrame geodataframe of NCES data """ - if dataset == "school_districts": - selector = "districts" - else: - selector = dataset + selector = "districts" if dataset == "school_districts" else dataset local_path = pathlib.Path(self.data_dir, "nces", f"{dataset}_{year}.parquet") remote_path = f"s3://spatial-ucr/nces/{selector}/{dataset}_{year}.parquet" msg = "Streaming data from S3. Use `geosnap.io.store_nces()` to store the data locally for better performance" diff --git a/geosnap/tests/test_constructors.py b/geosnap/tests/test_constructors.py index 6b6077ef..09b1c890 100644 --- a/geosnap/tests/test_constructors.py +++ b/geosnap/tests/test_constructors.py @@ -35,10 +35,13 @@ def test_nces_sabs(): assert sabs.shape == (75128, 15) -def test_acs(): +def test_acs_tract(): acs = io.get_acs(store, fips="11", years=[2018], level="tract") assert acs.shape == (179, 157) +def test_acs_blockgroup(): + acs = io.get_acs(store, fips="11", years=[2018], level="bg") + assert acs.shape == (450, 38) @pytest.mark.skipif(not LTDB, reason="unable to locate LTDB data") def test_ltdb_from_boundary(): diff --git a/geosnap/tests/test_datastore.py b/geosnap/tests/test_datastore.py index b802ed81..ab751fd5 100644 --- a/geosnap/tests/test_datastore.py +++ b/geosnap/tests/test_datastore.py @@ -6,22 +6,18 @@ def test_data_dir(): loc = datasets.show_data_dir() assert len(loc) > 5 - def test_acs(): df = datasets.acs(year=2012, states=["11"]) assert df.shape == (179, 104) - def test_tracts90(): df = datasets.tracts_1990(states=["11"]) assert df.shape == (192, 164) - def test_tracts00(): df = datasets.tracts_2000(states=["11"]) assert df.shape == (188, 192) - def test_tracts10(): df = datasets.tracts_2010(states=["11"]) assert df.shape == (179, 194) @@ -30,25 +26,20 @@ def test_tracts20(): df = datasets.tracts_2020(states=["11"]) assert df.shape == (206, 15) - def test_counties(): assert datasets.counties().shape == (3233, 2) - def test_states(): assert datasets.states().shape == (51, 3) - def test_msas(): df = datasets.msas() assert df.shape == (939, 4) - def test_msa_defs(): df = datasets.msa_definitions() assert df.shape == (1916, 13) - def test_codebook(): df = datasets.codebook() assert df.shape == (194, 12) @@ -68,19 +59,3 @@ def test_blocks_2010(): def test_blocks_2020(): df = datasets.blocks_2020(states=['11']) assert df.shape == (6012, 7) - -def test_ejscreen(): - df = datasets.ejscreen(states=['11'], year=2019) - assert df.shape==(450, 368) - -def test_nces_schools(): - d = datasets.nces(dataset='schools', year='1516') - assert d.shape == (102209, 26) - -def test_nces_districts(): - d = datasets.nces(dataset='school_districts') - assert d.shape == (13352, 18) - -def test_nces_sabs(): - df = datasets.nces(dataset='sabs') - assert df.shape == (75128, 15) \ No newline at end of file From a37672806b095c0c3f667647c708ae33d359afbf Mon Sep 17 00:00:00 2001 From: eli knaap Date: Thu, 18 Jan 2024 13:23:18 -0800 Subject: [PATCH 5/5] typo in 310 workflow --- .github/workflows/unittests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index d965747c..d8850643 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -18,7 +18,7 @@ jobs: matrix: os: [ubuntu-latest] environment-file: - - .ci/10.yml + - .ci/310.yml - .ci/311.yml - .ci/312.yml include: