diff --git a/.ci/310.yml b/.ci/310.yml index acb866c9..da8352ca 100644 --- a/.ci/310.yml +++ b/.ci/310.yml @@ -14,8 +14,6 @@ dependencies: - seaborn - pyarrow >=0.14.1 - platformdirs - - dash - - dash-bootstrap-components - palettable - pip - tqdm @@ -26,7 +24,6 @@ dependencies: - fsspec - s3fs - segregation >=2.0 - - versioneer - pyproj >=3 - pandana - pooch @@ -39,7 +36,6 @@ dependencies: - coverage - coveralls - python-wget - - proplot - contextily - scikit-plot - python-graphviz diff --git a/.ci/38.yml b/.ci/311.yml similarity index 87% rename from .ci/38.yml rename to .ci/311.yml index 600b123c..8f429479 100644 --- a/.ci/38.yml +++ b/.ci/311.yml @@ -2,7 +2,7 @@ name: test channels: - conda-forge dependencies: - - python =3.8 + - python =3.11 - pandas - giddy >=2.2.1 - libpysal @@ -14,8 +14,6 @@ dependencies: - seaborn - pyarrow >=0.14.1 - platformdirs - - dash - - dash-bootstrap-components - palettable - pip - tqdm @@ -26,7 +24,6 @@ dependencies: - fsspec - s3fs - segregation >=2.0 - - versioneer - pyproj >=3 - pandana - pooch @@ -39,7 +36,6 @@ dependencies: - coverage - coveralls - python-wget - - proplot - contextily - scikit-plot - python-graphviz diff --git a/.ci/39.yml b/.ci/39.yml index 70edb23b..f7c445d2 100644 --- a/.ci/39.yml +++ b/.ci/39.yml @@ -14,8 +14,6 @@ dependencies: - hdbscan - pyarrow >=0.14.1 - platformdirs - - dash - - dash-bootstrap-components - palettable - pip - tqdm @@ -26,7 +24,6 @@ dependencies: - fsspec - s3fs - segregation >=2.0 - - versioneer - pyproj >=3 - pandana - pooch @@ -39,7 +36,6 @@ dependencies: - coverage - coveralls - python-wget - - proplot - contextily - scikit-plot - python-graphviz diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 00000000..f5435449 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,16 @@ +changelog: + exclude: + labels: + - ignore-for-release + authors: + - dependabot + categories: + - title: Bug Fixes + labels: + - bug + - title: Enhancements + labels: + - enhancement + - title: Other Changes + labels: + - "*" diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index 5dea646e..d9c7e113 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -8,33 +8,42 @@ docs: name: CI (${{ matrix.os }}-${{ matrix.environment-file }}) runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.experimental }} + timeout-minutes: 20 strategy: matrix: os: ['ubuntu-latest'] environment-file: [.ci/39.yml] - + experimental: [false] + defaults: + run: + shell: bash -l {0} steps: - - name: checkout repo - uses: actions/checkout@v2 - - - name: setup micromamba - uses: mamba-org/provision-with-micromamba@main + - uses: actions/checkout@v2 + - uses: actions/cache@v2 + env: + CACHE_NUMBER: 0 with: - environment-file: ${{ matrix.environment-file }} - micromamba-version: 'latest' - mamba-version: "*" - channels: conda-forge - channel-priority: true - - - name: install geosnap - bash - shell: bash -l {0} - run: pip install -e . --no-deps --force-reinstall - if: matrix.os != 'windows-latest' - - - name: build docs - shell: bash -l {0} - run: cd docs; make html - + path: ~/conda_pkgs_dir + key: ${{ matrix.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles(matrix.environment-file) }} + - uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: 'latest' + channels: conda-forge + channel-priority: true + auto-update-conda: true + auto-activate-base: false + environment-file: ${{ matrix.environment-file }} + activate-environment: test + use-only-tar-bz2: true + - run: conda info --all + - run: conda list + - run: conda config --show-sources + - run: conda config --show + - run: pip install -e . --no-deps --force-reinstall + - run: | + cd docs + make html - name: Commit documentation changes run: | git clone https://github.com/ammaraskar/sphinx-action-test.git --branch gh-pages --single-branch gh-pages @@ -46,7 +55,6 @@ git commit -m "Update documentation" -a || true # The above command will fail if no changes were present, so we ignore # the return code. - - name: Push changes uses: ad-m/github-push-action@master with: diff --git a/.github/workflows/upload_package.yml b/.github/workflows/upload_package.yml index 2d1146a8..b67a74aa 100644 --- a/.github/workflows/upload_package.yml +++ b/.github/workflows/upload_package.yml @@ -1,64 +1,50 @@ - -name: Release Package +name: Release & Publish on: push: # Sequence of patterns matched against refs/tags tags: - - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + - "v*" # Push events to matching v*, i.e. v1.0, v20.15.10 + workflow_dispatch: + inputs: + version: + description: Manual Release + default: test + required: false + jobs: build: + name: Create release & publish to PyPI runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine jupyter urllib3 pandas pyyaml - python setup.py develop --no-deps - python setup.py sdist bdist_wheel - - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@master - with: - password: ${{ secrets.pypi_password }} - - name: Run Changelog - run: | - jupyter nbconvert --to notebook --execute --inplace --ExecutePreprocessor.timeout=-1 --ExecutePreprocessor.kernel_name=python3 tools/gitcount.ipynb - - name: Cat Changelog - uses: pCYSl5EDgo/cat@master - id: changetxt - with: - path: ./tools/changelog.md - env: - TEXT: ${{ steps.changetxt.outputs.text }} - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token - with: - tag_name: ${{ github.ref }} - release_name: Release ${{ github.ref }} - body: ${{ steps.changetxt.outputs.text }} - draft: false - prerelease: false - - name: Get Asset name - run: | - export PKG=$(ls dist/) - set -- $PKG - echo "name=$1" >> $GITHUB_ENV - - name: Upload Release Asset - id: upload-release-asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps - asset_path: dist/${{ env.name }} - asset_name: ${{ env.name }} - asset_content_type: application/zip + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Set up python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade build twine + python -m build + twine check --strict dist/* + + - name: Create Release Notes + uses: actions/github-script@v6 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + await github.request(`POST /repos/${{ github.repository }}/releases`, { + tag_name: "${{ github.ref }}", + generate_release_notes: true + }); + + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index c2d42439..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include LICENSE.txt MANIFEST.in requirements_docs.txt requirements_tests.txt requirements.txt environment.yml README.md -include geosnap/io/*.csv -include versioneer.py -include geosnap/_version.py -include setup.cfg diff --git a/README.md b/README.md index 8cf10b85..7e09674f 100644 --- a/README.md +++ b/README.md @@ -68,10 +68,10 @@ The package supports social scientists examining questions such as: ## Installation The recommended method for installing geosnap is with -[anaconda](https://www.anaconda.com/download/). +[anaconda](https://www.anaconda.com/download/). In particular, we recommend the [mambaforge distribution](https://github.com/conda-forge/miniforge#mambaforge) ```bash -conda install -c conda-forge geosnap +mamba install -c conda-forge geosnap ``` `geosnap` is alaso available on PyPI and can be install with pip via @@ -100,7 +100,7 @@ following commands: ```bash conda env create -f environment.yml conda activate geosnap -python setup.py develop +pip install -e . --no-deps ``` This will download the appropriate dependencies and install geosnap in its own conda environment. diff --git a/docs/conf.py b/docs/conf.py index 28f3c2b8..cf5fdf92 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -38,9 +38,25 @@ "sphinx.ext.intersphinx", "numpydoc", "matplotlib.sphinxext.plot_directive", + "myst-parser" ] - +myst_enable_extensions = [ + "amsmath", + "attrs_inline", + "colon_fence", + "deflist", + "dollarmath", + "fieldlist", + "html_admonition", + "html_image", + "linkify", + "replacements", + "smartquotes", + "strikethrough", + "substitution", + "tasklist", +] # sphinx_gallery_conf = { # # path to your examples scripts diff --git a/environment.yml b/environment.yml index 215f0d93..62b68643 100644 --- a/environment.yml +++ b/environment.yml @@ -25,7 +25,6 @@ dependencies: - spopt >=0.3.0 - s3fs - segregation >=2.0 - - versioneer - pyproj >=3 - pandana - pooch diff --git a/geosnap/_data.py b/geosnap/_data.py index d6c59ed1..99c7c1d8 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -26,11 +26,11 @@ def __init__(self, *args, **kwargs): super(_Map, self).__init__(*args, **kwargs) for arg in args: if isinstance(arg, dict): - for k, v in arg.iteritems(): + for k, v in arg.items(): self[k] = v if kwargs: - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): self[k] = v def __getattr__(self, attr): @@ -184,7 +184,8 @@ def seda( assert pooling in [ "pool", "long", - ], "`pool` argument must be either 'pool' or 'long'" + "poolsub" + ], "`pool` argument must be either 'pool', 'long', or 'poolsub'" assert standardize in [ "gcs", "cs", diff --git a/geosnap/analyze/__init__.py b/geosnap/analyze/__init__.py index d906f57b..bd6d3bbf 100644 --- a/geosnap/analyze/__init__.py +++ b/geosnap/analyze/__init__.py @@ -1,5 +1,5 @@ -from .geodemo import cluster, regionalize, ModelResults, find_k, find_region_k -from .dynamics import sequence, transition, predict_markov_labels -from .incs import linc, lincs_from_gdf from . import segdyn -from .network import compute_travel_cost_adjlist, isochrone, isochrones \ No newline at end of file +from .dynamics import predict_markov_labels, sequence, transition +from .geodemo import ModelResults, cluster, find_k, find_region_k, regionalize +from .incs import linc, lincs_from_gdf +from .network import compute_travel_cost_adjlist, isochrone, isochrones diff --git a/geosnap/analyze/_cluster_wrappers.py b/geosnap/analyze/_cluster_wrappers.py index 80b2d25c..0e4c7c1b 100644 --- a/geosnap/analyze/_cluster_wrappers.py +++ b/geosnap/analyze/_cluster_wrappers.py @@ -16,11 +16,11 @@ def _import_tryer(package, func, name): try: return exec(f"from {package} import {func}", globals(), globals()) - except ImportError: + except ImportError as e: raise ImportError( f"You must have the {name} package installed to use this clusterer " "but it could not be imported." - ) + ) from e # Sklearn a-spatial models diff --git a/geosnap/analyze/_model_results.py b/geosnap/analyze/_model_results.py index b7d8d861..357f5ab3 100644 --- a/geosnap/analyze/_model_results.py +++ b/geosnap/analyze/_model_results.py @@ -476,7 +476,7 @@ def plot_next_best_label( ncols=None, save_fig=None, alpha=0.5, - cmap="set1", + cmap="Set1", title="Next-Best Label", dpi=500, plot_kwargs=None, diff --git a/geosnap/analyze/_region_wrappers.py b/geosnap/analyze/_region_wrappers.py index 6951c7c0..de7ce41b 100644 --- a/geosnap/analyze/_region_wrappers.py +++ b/geosnap/analyze/_region_wrappers.py @@ -1,5 +1,4 @@ import numpy as np - from spopt.region import ( AZP, MaxPHeuristic, @@ -150,6 +149,7 @@ def skater( w=w, attrs_name=columns, floor=floor, + islands=islands, spanning_forest_kwds=cluster_args, ) model.solve() diff --git a/geosnap/analyze/dynamics.py b/geosnap/analyze/dynamics.py index 8fa32bc3..e690c76b 100644 --- a/geosnap/analyze/dynamics.py +++ b/geosnap/analyze/dynamics.py @@ -564,7 +564,7 @@ def _draw_labels_from_probs(classes, probs, seed): def _conditional_probs_from_smk(labels, lags, smk, fill_null_probs=True): - """Given a set of existing labels and associated lags, return a vetor of + """Given a set of existing labels and associated lags, return a vector of transition probabilities from a giddy.Spatial_Markov model Parameters diff --git a/geosnap/analyze/geodemo.py b/geosnap/analyze/geodemo.py index 566d78f6..04c18e13 100644 --- a/geosnap/analyze/geodemo.py +++ b/geosnap/analyze/geodemo.py @@ -129,7 +129,7 @@ def cluster( if scaler == "std": scaler = StandardScaler() - if method not in specification.keys(): + if method not in specification: raise ValueError( "`method` must of one of ['kmeans', 'ward', 'affinity_propagation', 'spectral', 'gaussian_mixture', 'hdbscan']" ) @@ -144,7 +144,7 @@ def cluster( if not columns: raise ValueError("You must provide a subset of columns as input") - + gdf = gdf.copy() times = gdf[temporal_index].unique() @@ -358,7 +358,7 @@ def regionalize( "kmeans_spatial": kmeans_spatial, } - if method not in specification.keys(): + if method not in specification: raise ValueError(f"`method` must be one of {specification.keys()}") if model_colname is None: if method in gdf.columns.tolist(): @@ -389,10 +389,7 @@ def regionalize( contiguity_weights = {"queen": Queen, "rook": Rook} - if spatial_weights in contiguity_weights.keys(): - W = contiguity_weights[spatial_weights] - else: - W = spatial_weights + W = contiguity_weights.get(spatial_weights, spatial_weights) models = _Map() clusters = [] @@ -549,17 +546,18 @@ def find_k( output[i] = results output = pd.DataFrame(output).T summary = output.agg( - { - "silhouette_score": "idxmax", - "calinski_harabasz_score": "idxmax", - "davies_bouldin_score": "idxmin", # min score is better here - } - ).to_frame(name="best_k") + { + "silhouette_score": "idxmax", + "calinski_harabasz_score": "idxmax", + "davies_bouldin_score": "idxmin", # min score is better here + } + ).to_frame(name="best_k") if return_table: return summary, output return summary + def find_region_k( gdf, method=None, @@ -631,7 +629,7 @@ def find_region_k( ) times = list() - for time_period in results.keys(): + for time_period in results: res = pd.Series( { @@ -658,14 +656,14 @@ def find_region_k( output = pd.concat(output) summary = output.groupby("time_period").agg( - { - "silhouette_score": "idxmax", - "calinski_harabasz_score": "idxmax", - "path_silhouette": "idxmax", - "boundary_silhouette": "idxmax", - "davies_bouldin_score": "idxmin", # min score is better here - } - ) + { + "silhouette_score": "idxmax", + "calinski_harabasz_score": "idxmax", + "path_silhouette": "idxmax", + "boundary_silhouette": "idxmax", + "davies_bouldin_score": "idxmin", # min score is better here + } + ) if return_table: return summary, output - return summary \ No newline at end of file + return summary diff --git a/geosnap/analyze/incs.py b/geosnap/analyze/incs.py index 89e2839e..0db9fdbc 100644 --- a/geosnap/analyze/incs.py +++ b/geosnap/analyze/incs.py @@ -3,8 +3,9 @@ """ from collections import defaultdict -import numpy as np + import geopandas as gpd +import numpy as np def _labels_to_neighborhoods(labels): @@ -151,7 +152,6 @@ def lincs_from_gdf(gdf, unit_index, temporal_index, cluster_col, periods="all"): gdf.pivot(index=unit_index, columns=temporal_index, values=cluster_col) .dropna() .astype("int"), - ) gdf = geoms.join(df) diff --git a/geosnap/analyze/network.py b/geosnap/analyze/network.py index 5d00437f..41fc09c9 100644 --- a/geosnap/analyze/network.py +++ b/geosnap/analyze/network.py @@ -1,7 +1,7 @@ import geopandas as gpd import numpy as np import pandas as pd -from libpysal.cg import alpha_shape_auto +from libpysal.cg import alpha_shape_auto, alpha_shape from tqdm.auto import tqdm diff --git a/geosnap/io/__init__.py b/geosnap/io/__init__.py index 0b5c6b58..98c49169 100644 --- a/geosnap/io/__init__.py +++ b/geosnap/io/__init__.py @@ -8,11 +8,16 @@ store_blocks_2000, store_blocks_2010, store_census, + store_ejscreen, store_ltdb, store_ncdb, - store_ejscreen, store_nces, - store_seda - + store_seda, +) +from .util import ( + adjust_inflation, + convert_census_gdb, + get_census_gdb, + get_lehd, + process_acs, ) -from .util import adjust_inflation, convert_census_gdb, get_census_gdb, get_lehd, process_acs diff --git a/geosnap/io/storage.py b/geosnap/io/storage.py index 4d67a8fd..003528a1 100644 --- a/geosnap/io/storage.py +++ b/geosnap/io/storage.py @@ -80,8 +80,8 @@ def store_seda(data_dir="auto", accept_eula=False): ) t.sedasch = t.sedasch.str.rjust(12, "0") t.fips = t.fips.str.rjust(2, "0") - except FileNotFoundError: - raise FileNotFoundError("Unable to access remote SEDA data") + except FileNotFoundError as e: + raise FileNotFoundError("Unable to access remote SEDA data") from e t.to_parquet(pathlib.Path(pth, f"{fn}.parquet")) @@ -95,8 +95,8 @@ def store_seda(data_dir="auto", accept_eula=False): ) t.sedalea = t.sedalea.str.rjust(7, "0") t.fips = t.fips.str.rjust(2, "0") - except FileNotFoundError: - raise FileNotFoundError("Unable to access remote SEDA data") + except FileNotFoundError as e: + raise FileNotFoundError("Unable to access remote SEDA data") from e t.to_parquet(pathlib.Path(pth, f"{fn}.parquet")) @@ -204,10 +204,7 @@ def store_nces(years="all", dataset="all", data_dir="auto"): """ - if dataset == "all": - datasets = ["sabs", "districts", "schools"] - else: - datasets = [dataset] + datasets = ["sabs", "districts", "schools"] if dataset == "all" else [dataset] pth = pathlib.Path(_make_data_dir(data_dir), "nces") pathlib.Path(pth).mkdir(parents=True, exist_ok=True) @@ -242,8 +239,8 @@ def store_acs(years="all", level="tract", data_dir="auto"): Parameters ---------- years : list (optional) - subset of years to collect. Currently 2012-2018 vintages - are available. Pass 'all' (default) to fetch every available vintage. + subset of years to collect. Default is 'all' to fetch every available vintage. + Currently 2012-2021 vintages are available level : str (optional) geography level to fetch. Options: {'tract', 'bg'} for tract or blockgroup diff --git a/geosnap/io/util.py b/geosnap/io/util.py index 9b7ea88e..c778bd10 100644 --- a/geosnap/io/util.py +++ b/geosnap/io/util.py @@ -9,9 +9,7 @@ from tqdm.auto import tqdm -def get_census_gdb( - years=None, geom_level="blockgroup", output_dir=".", protocol="http" -): +def get_census_gdb(years=None, geom_level="blockgroup", output_dir=".", protocol="ftp"): """Fetch geodatabase of ACS demographic profile from the Census bureau server. NOTE: Recommended to use `convert_census_gdb` to read/convert files directly from @@ -26,6 +24,8 @@ def get_census_gdb( geographic unit to download (tract or blockgroup), by default "blockgroup" output_dir : str, optional output directory to write files, by default "." + protocol: + whether to download over ftp or http. ftp is generally more reliable Returns ------- @@ -45,15 +45,13 @@ def get_census_gdb( fn = f"{year}_ACS_5YR_{geom_level.capitalize()}.gdb.zip" out_fn = f"ACS_{year}_5YR_{levels[geom_level].upper()}.gdb.zip" pth = pathlib.PurePath(output_dir, out_fn) - ftp_url = f"ftp://ftp2.census.gov/geo/tiger/TIGER_DP/{year}ACS/{fn}" - www_url = f"https://www2.census.gov/geo/tiger/TIGER_DP/{year}ACS/{fn}" - if protocol == "http": - url = www_url - elif protocol == "ftp": - url = ftp_url - else: + urls = { + "ftp": f"ftp://ftp2.census.gov/geo/tiger/TIGER_DP/{year}ACS/{fn}", + "https": f"https://www2.census.gov/geo/tiger/TIGER_DP/{year}ACS/{fn}", + } + if protocol not in urls.keys(): raise ValueError("`protocol` must be either 'https' or 'ftp'") - pooch.retrieve(url, None, progressbar=True, fname=fn, path=pth) + pooch.retrieve(urls[protocol], None, progressbar=True, fname=fn, path=pth) def reformat_acs_vars(col): @@ -86,6 +84,7 @@ def convert_census_gdb( overwrite=False, combine=True, output_dir=".", + npartitions=16, ): """Convert a geodatabase from Census into parquet files with standardized columns. @@ -130,6 +129,8 @@ def convert_census_gdb( raise Exception( "This function requires the `pyogrio` package\n" "`conda install pyogrio`" ) from e + import dask_geopandas as dgpd + if gdb_path is None: warn("No `gdb_path` given. Data will be pulled from the Census server") gdb_path = f"https://www2.census.gov/geo/tiger/TIGER_DP/{year}ACS/ACS_{year}_5YR_{level.upper()}.gdb.zip" @@ -162,10 +163,12 @@ def convert_census_gdb( ) # remove prefix for bgs tables.append(df) else: - df = ogr.read_dataframe(gdb_path, layer=i).set_index("GEOID") - if "ACS_" in i: # only the geoms have the ACS prefix - df = gpd.GeoDataFrame(df) - else: + df = ( + dgpd.read_file(gdb_path, layer=i, npartitions=npartitions) + .compute() + .set_index("GEOID") + ) + if not "ACS_" in i: # only the geoms have the ACS prefix df = df[df.columns[df.columns.str.contains("e")]] df.columns = pd.Series(df.columns).apply(reformat_acs_vars) df = df.dropna(axis=1, how="all") @@ -300,7 +303,7 @@ def process_acs(df): """Calculate variables from the geosnap codebook to match the LTDB veriable set. This function expects a massive input dataframe generated by downloading all - necessaryvaribales from the geosnap codebook. The best way to get all these + necessary varibales from the geosnap codebook. The best way to get all these variables is to use the `geosnap.io.process_census_gdb` function. Note that calling this function on the full dataset requires *a lot* of memory. diff --git a/geosnap/tests/test_plots.py b/geosnap/tests/test_plots.py index ecdfccd5..84690ce8 100644 --- a/geosnap/tests/test_plots.py +++ b/geosnap/tests/test_plots.py @@ -2,9 +2,9 @@ import shutil import numpy -import proplot +#import proplot import pytest - +import matplotlib from geosnap import DataStore from geosnap.analyze import cluster, regionalize, transition from geosnap.io import get_census @@ -39,28 +39,28 @@ def test_cont_timeseries_pooled(): p = plot_timeseries(dc_df, column='median_household_income', temporal_index='year', time_subset=[2010], dpi=50) - assert isinstance(p, proplot.gridspec.SubplotGrid) + assert isinstance(p[0], matplotlib.axes.SubplotBase) def test_cont_timeseries_unpooled(): p = plot_timeseries(dc_df, column='median_household_income', temporal_index='year', time_subset=[2010], dpi=50, pooled=False) - assert isinstance(p, proplot.gridspec.SubplotGrid) + assert isinstance(p[0], matplotlib.axes.SubplotBase ) def test_cont_timeseries_unpooled_layout(): p = plot_timeseries(dc_df, column='median_household_income', temporal_index='year', time_subset=[2000,2010], dpi=50, pooled=False) - assert isinstance(p, proplot.gridspec.SubplotGrid) + assert isinstance(p[0], matplotlib.axes.SubplotBase) def test_cat_timeseries(): p = plot_timeseries(dc_df,column='ward', categorical=True, temporal_index='year', time_subset=[2010],dpi=50) - assert isinstance(p, proplot.gridspec.SubplotGrid) + assert isinstance(p[0], matplotlib.axes.SubplotBase) def test_heatmaps(): t = transition(dc_df, cluster_col='ward') p = plot_transition_matrix(dc_df, cluster_col='ward', figsize=(5,5), transition_model=t) - assert isinstance(p, numpy.ndarray) + assert isinstance(p[0], matplotlib.axes.SubplotBase) def test_heatmaps_no_model(): p = plot_transition_matrix(dc_df, cluster_col='ward', figsize=(5,5)) - assert isinstance(p, numpy.ndarray) + assert isinstance(p[0], matplotlib.axes.SubplotBase) @pytest.mark.skipif(NOGRAPHVIZ, reason="pygraphviz couldn't be imported.") @@ -83,20 +83,20 @@ def test_violins(): def test_boundary_silplot(): p = region_mod[1990].plot_boundary_silhouette(dpi=50,) - assert isinstance(p, proplot.gridspec.SubplotGrid + assert isinstance(p[0], matplotlib.axes.SubplotBase ) def test_path_silplot(): p = region_mod[1990].plot_path_silhouette(dpi=50,) - assert isinstance(p, proplot.gridspec.SubplotGrid + assert isinstance(p[0], matplotlib.axes.SubplotBase ) def test_next_label_plot(): p = cluster_mod.plot_next_best_label() - assert isinstance(p, proplot.gridspec.SubplotGrid + assert isinstance(p, numpy.ndarray ) def test_silmap_plot(): p = cluster_mod.plot_silhouette_map(dpi=50, ) - assert isinstance(p, proplot.gridspec.SubplotGrid + assert isinstance(p, numpy.ndarray ) diff --git a/geosnap/visualize/mapping.py b/geosnap/visualize/mapping.py index 29583359..15a4e8c3 100644 --- a/geosnap/visualize/mapping.py +++ b/geosnap/visualize/mapping.py @@ -37,7 +37,7 @@ def gif_from_path( Parameters ---------- - path :str, required + path : str, required path to directory of images figsize : tuple, optional output figure size passed to matplotlib.pyplot @@ -160,9 +160,20 @@ def plot_timeseries( web_mercator : bool, optional whether to reproject the data into web mercator (epsg 3857) """ + try: + import proplot as plot + + HAS_PROPLOT = True + f, axs = plot.subplots(ncols=ncols, nrows=nrows, figsize=figsize, share=False) + + except ImportError: + warn("`proplot` is not installed. Falling back to matplotlib") + import matplotlib.pyplot as plot + + HAS_PROPLOT = False + # proplot needs to be used as a function-level import, # as it influences all figures when imported at the top of the file - import proplot as plot if ctxmap == "default": ctxmap = ctx.providers.Stamen.TonerLite @@ -182,7 +193,7 @@ def plot_timeseries( elif not cmap: cmap = "Blues" if legend_kwds == "default": - legend_kwds = {"ncols": 1, "loc": "b"} + legend_kwds = {"ncols": 1, "loc": "b"} if HAS_PROPLOT else None if missing_kwds == "default": missing_kwds = { "color": "lightgrey", @@ -202,7 +213,12 @@ def plot_timeseries( sqcols = int(np.ceil(np.sqrt(len(time_subset)))) ncols = sqcols nrows = sqcols - f, axs = plot.subplots(ncols=ncols, nrows=nrows, figsize=figsize, share=False) + + if HAS_PROPLOT is True: + f, axs = plot.subplots(ncols=ncols, nrows=nrows, figsize=figsize, share=False) + else: + f, axs = plot.subplots(ncols=ncols, nrows=nrows, figsize=figsize) + axs = [axs] if not hasattr(axs, "shape") else axs.flatten() for i, time in enumerate(sorted(time_subset)): # sort to prevent graphing out of order @@ -222,7 +238,7 @@ def plot_timeseries( df.query(f"{temporal_index}=={time}").plot( column=column, ax=axs[i], - scheme="user_defined", + scheme="userdefined", classification_kwds={"bins": classifier.bins}, k=k, cmap=cmap, @@ -245,11 +261,11 @@ def plot_timeseries( ctx.add_basemap(axs[i], source=ctxmap, crs=df.crs.to_string()) axs[i].set_title(time) axs[i].axis("off") - - if not title: # only use title when passed - axs.format(suptitle=column) - else: - axs.format(suptitle=title) + if HAS_PROPLOT: + if not title: # only use title when passed + axs.format(suptitle=column) + else: + axs.format(suptitle=title) if save_fig: f.savefig(save_fig, dpi=dpi, bbox_inches="tight") @@ -284,50 +300,52 @@ def animate_timeseries( Parameters ---------- - column : str - column to be graphed in a time series - filename : str, required - output file name - title : str, optional - desired title of figure - temporal_index : str, required - column on the gdf that stores time periods + column : str + column to be graphed in a time series + filename : str, required + output file name + title : str, optional + desired title of figure + temporal_index : str, required + column on the gdf that stores time periods time_periods: list, optional - subset of time periods to include in the animation. If None, then all times will be used - scheme : string, optional - matplotlib scheme to be used - default is 'quantiles' - k : int, optional - number of bins to graph. k may be ignored - or unnecessary for some schemes, like headtailbreaks, maxp, and maximum_breaks - Default is 5. - legend : bool, optional - whether to display a legend on the plot - categorical : bool, optional - whether the data should be plotted as categorical as opposed to continuous - alpha: : float, optional - transparency parameter passed to matplotlib - dpi : int, optional - dpi of the saved image if save_fig=True - default is 500 - figsize : tuple, optional - the desired size of the matplotlib figure - ctxmap : contextily map provider, optional - contextily basemap. Set to False for no basemap. - figsize : tuple, optional - output figure size passed to matplotlib.pyplot - fps : float, optional - frames per second, used to speed up or slow down animation - interval : int, optional - interval between frames in miliseconds, default 500 + subset of time periods to include in the animation. If None, then all + times will be used + scheme : string, optional + matplotlib scheme to be used + default is 'quantiles' + k : int, optional + number of bins to graph. k may be ignored + or unnecessary for some schemes, like headtailbreaks, maxp, and maximum_breaks + Default is 5. + legend : bool, optional + whether to display a legend on the plot + categorical : bool, optional + whether the data should be plotted as categorical as opposed to continuous + alpha : float, optional + transparency parameter passed to matplotlib + dpi : int, optional + dpi of the saved image if save_fig=True + default is 500 + figsize : tuple, optional + the desired size of the matplotlib figure + ctxmap : contextily map provider, optional + contextily basemap. Set to False for no basemap. + figsize : tuple, optional + output figure size passed to matplotlib.pyplot + fps : float, optional + frames per second, used to speed up or slow down animation + interval : int, optional + interval between frames in miliseconds, default 500 repeat_delay : int, optional - time before animation repeats in miliseconds, default 1000 + time before animation repeats in miliseconds, default 1000 plot_kwargs: dict, optional additional keyword arguments passed to geopandas.DataFrame.plot color_col: str, optional A column on the geodataframe holding hex coodes used to color each observation. I.e. to create a categorical color-mapping manually """ + classification_kwds = {} if plot_kwargs is None: plot_kwargs = dict() @@ -338,7 +356,7 @@ def animate_timeseries( raise ValueError("When passing a color column, use `categorical=False`") if color_col is not None and cmap is not None: - raise ValueError('Only `color_col` or `cmap` can be used, but not both') + raise ValueError("Only `color_col` or `cmap` can be used, but not both") gdf = gdf.copy() if not gdf.crs.equals(3857): @@ -373,9 +391,15 @@ def animate_timeseries( scheme = None k = None else: - classifier = schemes[scheme](gdf[column].dropna().values, k=k) + if scheme == "userdefined": + classifier = schemes[scheme]( + gdf[column].dropna().values, + bins=classification_kwds["bins"], + ) + else: + classifier = schemes[scheme](gdf[column].dropna().values, k=k) classification_kwds = {"bins": classifier.bins} - scheme = "user_defined" + scheme = "userdefined" temp.plot( column, scheme=scheme, diff --git a/pyproject.toml b/pyproject.toml index d99864d1..edc9d32f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,9 +14,9 @@ authors = [ maintainers = [{ name = "geosnap contributors" }] license = { text = "BSD 3-Clause" } description = "The Geospatial Neighborhood Analysis Package" -keywords = ["geodemographics, neighborhood dynamics"] +keywords = ["geodemographics, neighborhood dynamics, urban data science, spatial analysis"] readme = { text = """\ -geosnap provides a suite of tools for exploring, modeling, and visualizing the social context and spatial extent of neighborhoods and regions over time" +A suite of tools for exploring, modeling, and visualizing the social context and spatial extent of neighborhoods and regions over time" """, content-type = "text/x-rst" } classifiers = [ @@ -38,7 +38,6 @@ dependencies = [ "mapclassify", "giddy>=2.2.1", "xlrd", - "cenpy", "platformdirs", "tqdm", "quilt3>=3.6", @@ -50,7 +49,7 @@ dependencies = [ "fsspec", "s3fs", "segregation>=2.1", - "proplot>=0.9", + #"proplot>=0.9", "pyproj>=3", "pandana", "pooch" @@ -61,7 +60,7 @@ Home = "https://github.com/oturns/geosnap/" Repository = "https://github.com/oturns/geosnap" [project.optional-dependencies] -dev = ["pre-commit", "pyogrio"] +dev = ["pre-commit"] docs = [ "nbsphinx", "numpydoc", @@ -69,6 +68,8 @@ docs = [ "sphinx", "sphinxcontrib-bibtex", "sphinx_bootstrap_theme", + "mkdocs-jupyter", + "myst-parser" ] tests = [ "codecov",