From 0fbaf5cd2b5eb9d61e3ef5c120fef1a56173387e Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 21:09:04 -0700 Subject: [PATCH 1/8] blocks 2020 and lodes 8 --- .ci/310.yml | 1 - .ci/311.yml | 1 - .ci/39.yml | 1 - geosnap/__init__.py | 11 +- geosnap/_data.py | 38 +++ geosnap/_version.py | 623 ------------------------------------- geosnap/io/constructors.py | 42 +-- geosnap/io/util.py | 9 +- 8 files changed, 60 insertions(+), 666 deletions(-) delete mode 100644 geosnap/_version.py diff --git a/.ci/310.yml b/.ci/310.yml index 830ae11c..bb5fd5f2 100644 --- a/.ci/310.yml +++ b/.ci/310.yml @@ -6,7 +6,6 @@ dependencies: - pandas - giddy >=2.2.1 - libpysal - - cenpy - geopandas >=0.9 - hdbscan - matplotlib diff --git a/.ci/311.yml b/.ci/311.yml index 87a1112a..671a370e 100644 --- a/.ci/311.yml +++ b/.ci/311.yml @@ -6,7 +6,6 @@ dependencies: - pandas - giddy >=2.2.1 - libpysal - - cenpy - geopandas >=0.9 - hdbscan - matplotlib diff --git a/.ci/39.yml b/.ci/39.yml index cc0f3105..a3b1243e 100644 --- a/.ci/39.yml +++ b/.ci/39.yml @@ -6,7 +6,6 @@ dependencies: - pandas - giddy >=2.2.1 - libpysal - - cenpy - geopandas >=0.9 - matplotlib - scikit-learn diff --git a/geosnap/__init__.py b/geosnap/__init__.py index d9a2ab85..41aa8060 100644 --- a/geosnap/__init__.py +++ b/geosnap/__init__.py @@ -1,4 +1,3 @@ - r""" geosnap: Geospatial Neighborhood Analysis Package. @@ -25,12 +24,8 @@ import contextlib from importlib.metadata import PackageNotFoundError, version -from . import analyze -from . import io -from . import util -from . import visualize -from . import harmonize -from ._data import _Map, DataStore +from . import analyze, harmonize, io, util, visualize +from ._data import DataStore, _Map with contextlib.suppress(PackageNotFoundError): - __version__ = version("geosnap") \ No newline at end of file + __version__ = version("geosnap") diff --git a/geosnap/_data.py b/geosnap/_data.py index 99c7c1d8..2f4863cd 100644 --- a/geosnap/_data.py +++ b/geosnap/_data.py @@ -358,6 +358,44 @@ def blocks_2010(self, states=None, fips=None): return blocks + def blocks_2020(self, states=None, fips=None): + """Census blocks for 2020. + + Parameters + ---------- + states : list-like + list of state fips codes to return as a datafrrame. + + Returns + ------- + type + pandas.DataFrame or geopandas.GeoDataFrame + 2010 blocks as a geodataframe or as a dataframe with geometry + stored as well-known binary on the 'wkb' column. + + """ + msg = ( + "Unable to locate local census 2020 block data. Streaming instead.\n" + "If you plan to use census data repeatedly you can store it locally " + "with the io.store_blocks_2010 function for better performance" + ) + if isinstance(states, (str, int)): + states = [states] + blks = {} + for state in states: + local = pathlib.Path(self.data_dir, "blocks_2020", f"{state}.parquet") + remote = f"s3://spatial-ucr/census/blocks_2020/{state}.parquet" + blks[state] = _fetcher(local, remote, msg) + + if fips: + blks[state] = blks[state][blks[state]["geoid"].str.startswith(fips)] + + blks[state]["year"] = 2020 + blocks = list(blks.values()) + blocks = gpd.GeoDataFrame(pd.concat(blocks, sort=True)) + + return blocks + def tracts_1990(self, states=None): """Nationwide Census Tracts as drawn in 1990 (cartographic 500k). diff --git a/geosnap/_version.py b/geosnap/_version.py deleted file mode 100644 index 15885618..00000000 --- a/geosnap/_version.py +++ /dev/null @@ -1,623 +0,0 @@ - -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.20 (https://github.com/python-versioneer/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: # pylint: disable=too-few-public-methods - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "pep440" - cfg.tag_prefix = "v" - cfg.parentdir_prefix = "geosnap-" - cfg.versionfile_source = "geosnap/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -# pylint:disable=too-many-arguments,consider-using-with # noqa -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post0.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post0.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} diff --git a/geosnap/io/constructors.py b/geosnap/io/constructors.py index 92dbb2da..949f5aca 100644 --- a/geosnap/io/constructors.py +++ b/geosnap/io/constructors.py @@ -175,7 +175,7 @@ def get_acs( ] if years == "all": - years = list(range(2012, 2020)) + years = list(range(2012, 2022)) elif isinstance(years, (str,)): years = [int(years)] @@ -483,6 +483,7 @@ def get_lodes( boundary=None, years=2015, dataset="wac", + version=8, ): """Extract a subset of data from Census LEHD/LODES . @@ -531,29 +532,16 @@ def get_lodes( msa_counties = _msa_to_county(datastore, msa_fips) states, allfips = _fips_to_states(state_fips, county_fips, msa_counties, fips) - if boundary: - if not boundary.crs.equals(4326): - boundary = boundary.copy().to_crs(4326) + if boundary and not boundary.crs.equals(4326): + boundary = boundary.copy().to_crs(4326) - if any(year < 2010 for year in years): - gdf00 = datastore.blocks_2000(states=states, fips=(tuple(allfips))) - gdf00 = gdf00.drop(columns=["year"]) - gdf00 = _fips_filter( - state_fips=state_fips, - county_fips=county_fips, - msa_fips=msa_fips, - fips=fips, - data=gdf00, - ) - if isinstance(boundary, gpd.GeoDataFrame): - if boundary.crs != gdf00.crs: - warn( - "Unable to determine whether boundary CRS is WGS84 " - "if this produces unexpected results, try reprojecting" - ) - gdf00 = gdf00[gdf00.representative_point().intersects(boundary.unary_union)] + if version == 5: + gdf = datastore.blocks_2000(states=states, fips=(tuple(allfips))) + elif version == 7: + gdf = datastore.blocks_2010(states=states, fips=(tuple(allfips))) + elif version == 8: + gdf = datastore.blocks_2020(states=states, fips=(tuple(allfips))) - gdf = datastore.blocks_2010(states=states, fips=(tuple(allfips))) gdf = gdf.drop(columns=["year"]) gdf = _fips_filter( state_fips=state_fips, @@ -586,12 +574,8 @@ def get_lodes( if name == "PR": raise Exception("does not yet include built-in data for Puerto Rico") try: - df = get_lehd(dataset=dataset, year=year, state=name) - if year < 2010: - df = gdf00.merge(df, right_index=True, left_on="geoid", how="left") - else: - df = gdf.merge(df, right_index=True, left_on="geoid", how="left") - + df = get_lehd(dataset=dataset, year=year, state=name, version=version) + df = gdf.merge(df, right_index=True, left_on="geoid", how="left") df["year"] = year merged_year.append(df) except ValueError: @@ -604,7 +588,7 @@ def get_lodes( out = pd.concat(dfs, sort=True) out = out.groupby(["geoid", "year"]).first().reset_index() out.crs = 4326 - return out.reset_index() + return out def _msa_to_county(datastore, msa_fips): diff --git a/geosnap/io/util.py b/geosnap/io/util.py index c778bd10..d41d7baa 100644 --- a/geosnap/io/util.py +++ b/geosnap/io/util.py @@ -188,7 +188,7 @@ def convert_census_gdb( ) -def get_lehd(dataset="wac", state="dc", year=2015): +def get_lehd(dataset="wac", state="dc", year=2015, version=8): """Grab data from the LODES FTP server as a pandas DataFrame. Parameters @@ -202,6 +202,9 @@ def get_lehd(dataset="wac", state="dc", year=2015): year : str which year to collect. First year avaialable for most states is 2002. Consult the LODES documentation for more details. The default is 2015. + version : int + which version of LODES to query. Options include 5,7 and 8, which are keyed + to census 2000, 2010, and 2020 blocks respectively Returns ------- @@ -216,8 +219,8 @@ def get_lehd(dataset="wac", state="dc", year=2015): renamer = dict(zip(lodes_vars["variable"].tolist(), lodes_vars["name"].tolist())) state = state.lower() - url = "https://lehd.ces.census.gov/data/lodes/LODES7/{state}/{dataset}/{state}_{dataset}_S000_JT00_{year}.csv.gz".format( - dataset=dataset, state=state, year=year + url = "https://lehd.ces.census.gov/data/lodes/LODES{version}/{state}/{dataset}/{state}_{dataset}_S000_JT00_{year}.csv.gz".format( + dataset=dataset, state=state, year=year, version=version ) try: df = pd.read_csv(url, converters={"w_geocode": str, "h_geocode": str}) From 8c3d7bc5e1c183a654c735b3e16a0e2b5326e3c8 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 21:27:52 -0700 Subject: [PATCH 2/8] store 2020 blocks; --- .github/release.yml | 3 +++ docs/api.rst | 6 ++++-- docs/data.rst | 3 +++ geosnap/analyze/__init__.py | 2 +- geosnap/io/storage.py | 15 +++++++++++++++ 5 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.github/release.yml b/.github/release.yml index f5435449..f235a4ba 100644 --- a/.github/release.yml +++ b/.github/release.yml @@ -11,6 +11,9 @@ changelog: - title: Enhancements labels: - enhancement + - title: Maintenance + labels: + - maintenance - title: Other Changes labels: - "*" diff --git a/docs/api.rst b/docs/api.rst index c585628a..0c753c8a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -27,6 +27,7 @@ available quickly with no configuration by accessing methods on the class. DataStore.acs DataStore.blocks_2000 DataStore.blocks_2010 + DataStore.blocks_2020 DataStore.codebook DataStore.counties DataStore.ejscreen @@ -59,6 +60,7 @@ files instead of streaming over the web. io.store_census io.store_blocks_2000 io.store_blocks_2010 + io.store_blocks_2020 io.store_ejscreen io.store_ltdb io.store_ncdb @@ -108,9 +110,9 @@ Model neighborhood change using optimal-matching algorithms or spatial discrete .. autosummary:: :toctree: generated/ + analyze.draw_sequence_from_gdf analyze.linc analyze.lincs_from_gdf - analyze.predict_markov_labels analyze.sequence analyze.transition @@ -141,7 +143,7 @@ Compute shortest path distance along a network using pandana, and visualize trav analyze.compute_travel_cost_adjlist analyze.isochrone - analyze.isocrones + analyze.isochrones The ModelResults Class ''''''''''''''''''''''''''''''''''''''''''''' diff --git a/docs/data.rst b/docs/data.rst index 14b3c87f..57ae9a4a 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -41,6 +41,9 @@ Geo Data | Blocks | https://www2.census.gov/geo/tiger/TIGER2018/TABBLOCK/ | | 2010 | | +---------+------------------------------------------------------------+ +| Blocks | https://www2.census.gov/geo/tiger/TIGER2021/TABBLOCK/ | +| 2020 | | ++---------+------------------------------------------------------------+ | Tracts | https://github.co | | 1990 | m/loganpowell/census-geojson/tree/master/GeoJSON/500k/1990 | +---------+------------------------------------------------------------+ diff --git a/geosnap/analyze/__init__.py b/geosnap/analyze/__init__.py index bd6d3bbf..dd305aa0 100644 --- a/geosnap/analyze/__init__.py +++ b/geosnap/analyze/__init__.py @@ -1,5 +1,5 @@ from . import segdyn -from .dynamics import predict_markov_labels, sequence, transition +from .dynamics import predict_markov_labels, sequence, transition, draw_sequence_from_gdf from .geodemo import ModelResults, cluster, find_k, find_region_k, regionalize from .incs import linc, lincs_from_gdf from .network import compute_travel_cost_adjlist, isochrone, isochrones diff --git a/geosnap/io/storage.py b/geosnap/io/storage.py index 003528a1..cfcb228f 100644 --- a/geosnap/io/storage.py +++ b/geosnap/io/storage.py @@ -152,6 +152,21 @@ def store_blocks_2010(data_dir="auto"): quilt3.Package.install("census/blocks_2010", "s3://spatial-ucr", dest=pth) +def store_blocks_2020(data_dir="auto"): + """Save census 2020 census block data to the local quilt package storage. + + Returns + ------- + None + Data will be available in the geosnap.data.datasets and will be used + in place of streaming data for all census queries. + + """ + pth = pathlib.Path(_make_data_dir(data_dir), "blocks_2020") + pathlib.Path(pth).mkdir(parents=True, exist_ok=True) + quilt3.Package.install("census/blocks_2020", "s3://spatial-ucr", dest=pth) + + def store_ejscreen(years="all", data_dir="auto"): """Save EPA EJScreen data to the local geosnap storage. Each year is about 1GB. From c8b8abaa714032ca1250d8807f374df135e75b89 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 21:34:38 -0700 Subject: [PATCH 3/8] update test workflow --- .github/workflows/unittests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 21380d4d..70c73cb2 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -8,7 +8,7 @@ branches: - '*' schedule: - - cron: '59 23 * * *' + - cron: "0 0 * * 1,4" jobs: unittests: From dbe563e8409cda9dbdc40935acdbc56ee9be20b9 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 21:36:18 -0700 Subject: [PATCH 4/8] update test workflow --- .github/workflows/unittests.yml | 150 +++++++++++++------------------- 1 file changed, 60 insertions(+), 90 deletions(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 70c73cb2..3f8b892f 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -1,91 +1,61 @@ - name: Continuous Integration - - on: - push: - branches: - - '*' - pull_request: - branches: - - '*' - schedule: - - cron: "0 0 * * 1,4" +name: Tests - jobs: - unittests: - name: CI (${{ matrix.os }}-${{ matrix.environment-file }}) - runs-on: ${{ matrix.os }} - continue-on-error: true - timeout-minutes: 90 - strategy: - matrix: - os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - environment-file: [.ci/39.yml, .ci/310.yml, .ci/311.yml] - - steps: - - name: checkout repo - uses: actions/checkout@v2 - - - name: setup micromamba - uses: mamba-org/provision-with-micromamba@main - with: - environment-file: ${{ matrix.environment-file }} - micromamba-version: 'latest' - mamba-version: "*" - channels: conda-forge - channel-priority: true - - - - name: install geosnap - bash - shell: bash -l {0} - run: pip install -e . --no-deps --force-reinstall - if: matrix.os != 'windows-latest' - - - name: install geosnap - powershell - shell: powershell - run: pip install -e . --no-deps --force-reinstall - if: matrix.os == 'windows-latest' - - - - name: download data - bash - shell: bash -l {0} - run: python geosnap/tests/_dl_data.py - env: - COMBO_DATA: ${{ secrets.COMBO_DATA }} - if: matrix.os != 'windows-latest' - - - name: download data - powershell - shell: powershell - run: python geosnap/tests/_dl_data.py - env: - COMBO_DATA: ${{ secrets.COMBO_DATA }} - if: matrix.os == 'windows-latest' - - - - name: run tests - bash - shell: bash -l {0} - run: pytest -v geosnap --cov=geosnap --cov-report=xml - env: - LTDB_SAMPLE: ${{ secrets.COMBO_DATA }} # check whether we can pull secrets - LTDB_FULL: ${{ secrets.COMBO_DATA }} - NCDB: ${{ secrets.COMBO_DATA }} - if: matrix.os != 'windows-latest' - - - name: run tests - powershell - shell: powershell - run: pytest -v geosnap - env: - LTDB_SAMPLE: ${{ secrets.COMBO_DATA }} # check whether we can pull secrets - LTDB_FULL: ${{ secrets.COMBO_DATA }} - NCDB: ${{ secrets.COMBO_DATA }} - if: matrix.os == 'windows-latest' - - - - name: codecov - uses: codecov/codecov-action@v2 - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: ./coverage.xml - flags: unittests # optional - name: geosnap-codecov - verbose: true - if: matrix.os != 'windows-latest' +on: + push: + branches: [main] + pull_request: + branches: + - "*" + schedule: + - cron: "0 0 * * 1,4" + +jobs: + Test: + name: ${{ matrix.os }}, ${{ matrix.environment-file }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + environment-file: + - ci/39.yaml + - ci/310.yaml + - ci/311.yaml + include: + - environment-file: ci/310.yaml + os: macos-latest + - environment-file: ci/310.yaml + os: windows-latest + defaults: + run: + shell: bash -l {0} + + steps: + - uses: actions/checkout@v4 + + - name: setup micromamba + uses: mamba-org/provision-with-micromamba@main + with: + environment-file: ${{ matrix.environment-file }} + micromamba-version: "latest" + + - name: Install geosnap + run: python geosnap/tests/_dl_data.py; + env: + COMBO_DATA: ${{ secrets.COMBO_DATA }} + + - name: Test geosnap + run: | + pytest -v --color yes --cov geosnap --cov-append --cov-report term-missing --cov-report xml . + + - uses: codecov/codecov-action@v3 + + - name: Generate and publish the report + if: | + failure() + && steps.status.outcome == 'failure' + && github.event_name == 'schedule' + && github.repository_owner == 'pysal' + uses: xarray-contrib/issue-from-pytest-log@v1 + with: + log-path: pytest-log.jsonl \ No newline at end of file From 9a90836b165347030a4f68e8d562cca53bc88cd7 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 21:37:04 -0700 Subject: [PATCH 5/8] update test workflow --- .github/workflows/unittests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 3f8b892f..874f4760 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -18,13 +18,13 @@ jobs: matrix: os: [ubuntu-latest] environment-file: - - ci/39.yaml - - ci/310.yaml - - ci/311.yaml + - .ci/39.yaml + - .ci/310.yaml + - .ci/311.yaml include: - - environment-file: ci/310.yaml + - environment-file: .ci/310.yaml os: macos-latest - - environment-file: ci/310.yaml + - environment-file: .ci/310.yaml os: windows-latest defaults: run: From f0902bd39ce28effc1fd7f9834e2fa550fbd49ca Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 21:37:58 -0700 Subject: [PATCH 6/8] update test workflow yaml-->yml --- .github/workflows/unittests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 874f4760..23a6060e 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -18,13 +18,13 @@ jobs: matrix: os: [ubuntu-latest] environment-file: - - .ci/39.yaml - - .ci/310.yaml - - .ci/311.yaml + - .ci/39.yml + - .ci/310.yml + - .ci/311.yml include: - - environment-file: .ci/310.yaml + - environment-file: .ci/310.yml os: macos-latest - - environment-file: .ci/310.yaml + - environment-file: .ci/310.yml os: windows-latest defaults: run: From 856e0e85381b936fb236da4028c6abcbe6dedb65 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 21:40:57 -0700 Subject: [PATCH 7/8] install module in tests --- .github/workflows/unittests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 23a6060e..521a1386 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -40,7 +40,7 @@ jobs: micromamba-version: "latest" - name: Install geosnap - run: python geosnap/tests/_dl_data.py; + run: pip install . ;python geosnap/tests/_dl_data.py; env: COMBO_DATA: ${{ secrets.COMBO_DATA }} From 8d3412a3b0a3cfe0596fde2913771e3988f5a6da Mon Sep 17 00:00:00 2001 From: eli knaap Date: Sun, 1 Oct 2023 22:03:40 -0700 Subject: [PATCH 8/8] fix lehd test --- geosnap/tests/test_add_data.py | 15 +- geosnap/tests/test_get_gadm.py | 2 +- tools/gitcount.ipynb | 708 --------------------------------- 3 files changed, 13 insertions(+), 712 deletions(-) delete mode 100644 tools/gitcount.ipynb diff --git a/geosnap/tests/test_add_data.py b/geosnap/tests/test_add_data.py index c7a4c9ed..8b1952e0 100644 --- a/geosnap/tests/test_add_data.py +++ b/geosnap/tests/test_add_data.py @@ -40,14 +40,23 @@ def test_store_ncdb(): assert datasets.ncdb().shape == (328633, 76) -def test_get_lehd(): +def test_get_lehd_v7(): - wac = io.get_lehd() - rac = io.get_lehd("rac") + wac = io.get_lehd(version=7) + rac = io.get_lehd("rac", version=7) assert wac.shape == (3074, 52) assert rac.shape == (4382, 42) +def test_get_lehd_v8(): + + wac = io.get_lehd(version=8) + rac = io.get_lehd("rac", version=8) + + assert wac.shape == (3269, 52) + assert rac.shape == (4553, 42) + + def test_store_acs(): io.store_acs(2012) diff --git a/geosnap/tests/test_get_gadm.py b/geosnap/tests/test_get_gadm.py index 72439abf..2451e5e4 100644 --- a/geosnap/tests/test_get_gadm.py +++ b/geosnap/tests/test_get_gadm.py @@ -3,7 +3,7 @@ GADM_DOWN=False -@pytest.mark.skipif( GADM_DOWN, reason="GADM is down at the moment") +@pytest.mark.skipif(GADM_DOWN, reason="GADM is down at the moment") def test_get_gadm(): dr = get_gadm(code="DOM") assert dr.shape == (1, 3) diff --git a/tools/gitcount.ipynb b/tools/gitcount.ipynb deleted file mode 100644 index 3fed2766..00000000 --- a/tools/gitcount.ipynb +++ /dev/null @@ -1,708 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## geosnap Change Log Statistics\n", - "\n", - "This notebook generates the summary statistics for a package. \n", - "\n", - "It assumes you are running this under the `tools` directory at the toplevel of the package\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# get date of last tag\n", - "from subprocess import Popen, PIPE\n", - "x, err = Popen('git log -1 --tags --simplify-by-decoration --pretty=\"%ai\"| cat', stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True).communicate()\n", - "start_date = x.split()[0].decode('utf-8')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "start_date" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# today's date\n", - "import datetime\n", - "release_date = str(datetime.datetime.today()).split()[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "release_date" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "package_name = 'geosnap'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook will generate a file in the current directory with the name \"changelog_VERSION.md\". You can edit and append this on front of the CHANGELOG file for the package release." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from __future__ import print_function\n", - "import os\n", - "import json\n", - "import re\n", - "import sys\n", - "import pandas\n", - "\n", - "from datetime import datetime, timedelta\n", - "from time import sleep\n", - "from subprocess import check_output\n", - "try:\n", - " from urllib import urlopen\n", - "except:\n", - " from urllib.request import urlopen\n", - "\n", - "import ssl\n", - "import yaml\n", - "\n", - "context = ssl._create_unverified_context()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "CWD = os.path.abspath(os.path.curdir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "CWD" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "since_date = '--since=\"{start}\"'.format(start=start_date)\n", - "since_date\n", - "since = datetime.strptime(start_date+\" 0:0:0\", \"%Y-%m-%d %H:%M:%S\")\n", - "since" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "cd ../" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import versioneer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "f = versioneer.get_version()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "f" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Total commits by subpackage" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "cmd = ['git', 'log', '--oneline', since_date]\n", - "ncommits = len(check_output(cmd).splitlines())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ncommits" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## List Contributors" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Some of our contributors have many aliases for the same identity. So, we've added a mapping to make sure that individuals are listed once (and only once). " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),\n", - " 'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),\n", - " 'Wei Kang': ('Wei Kang', 'weikang9009'),\n", - " 'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas'),\n", - " 'Eli Knaap': ('Eli Knaap', 'eli knaap', 'knaaptime'),\n", - " 'Andrew Warfield': ('Andrew Warfield', 'Angwar26', 'Angwar-26')\n", - "}\n", - "\n", - "def regularize_identity(string):\n", - " string = string.decode()\n", - " for name, aliases in identities.items():\n", - " for alias in aliases:\n", - " if alias in string:\n", - " string = string.replace(alias, name)\n", - " if len(string.split(' '))>1:\n", - " string = string.title()\n", - " return string.lstrip('* ')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "author_cmd = ['git', 'log', '--format=* %aN', since_date]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "\n", - "ncommits = len(check_output(cmd).splitlines())\n", - "all_authors = check_output(author_cmd).splitlines()\n", - "counter = Counter([regularize_identity(author) for author in all_authors])\n", - "# global_counter += counter\n", - "# counters.update({'.'.join((package,subpackage)): counter})\n", - "unique_authors = sorted(set(all_authors))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "unique_authors = counter.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "unique_authors" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Disaggregate by PR, Issue" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from datetime import datetime, timedelta\n", - "ISO8601 = \"%Y-%m-%dT%H:%M:%SZ\"\n", - "PER_PAGE = 100\n", - "element_pat = re.compile(r'<(.+?)>')\n", - "rel_pat = re.compile(r'rel=[\\'\"](\\w+)[\\'\"]')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "\n", - "def parse_link_header(headers):\n", - " link_s = headers.get('link', '')\n", - " urls = element_pat.findall(link_s)\n", - " rels = rel_pat.findall(link_s)\n", - " d = {}\n", - " for rel,url in zip(rels, urls):\n", - " d[rel] = url\n", - " return d\n", - "\n", - "def get_paged_request(url):\n", - " \"\"\"get a full list, handling APIv3's paging\"\"\"\n", - " results = []\n", - " while url:\n", - " #print(\"fetching %s\" % url, file=sys.stderr)\n", - " f = urlopen(url)\n", - " results.extend(json.load(f))\n", - " links = parse_link_header(f.headers)\n", - " url = links.get('next')\n", - " return results\n", - "\n", - "def get_issues(project=\"oturns/geosnap\", state=\"closed\", pulls=False):\n", - " \"\"\"Get a list of the issues from the Github API.\"\"\"\n", - " which = 'pulls' if pulls else 'issues'\n", - " url = \"https://api.github.com/repos/%s/%s?state=%s&per_page=%i\" % (project, which, state, PER_PAGE)\n", - " return get_paged_request(url)\n", - "\n", - "\n", - "def _parse_datetime(s):\n", - " \"\"\"Parse dates in the format returned by the Github API.\"\"\"\n", - " if s:\n", - " return datetime.strptime(s, ISO8601)\n", - " else:\n", - " return datetime.fromtimestamp(0)\n", - "\n", - "\n", - "def issues2dict(issues):\n", - " \"\"\"Convert a list of issues to a dict, keyed by issue number.\"\"\"\n", - " idict = {}\n", - " for i in issues:\n", - " idict[i['number']] = i\n", - " return idict\n", - "\n", - "\n", - "def is_pull_request(issue):\n", - " \"\"\"Return True if the given issue is a pull request.\"\"\"\n", - " return 'pull_request_url' in issue\n", - "\n", - "\n", - "def issues_closed_since(period=timedelta(days=365), project=\"oturns/geosnap\", pulls=False):\n", - " \"\"\"Get all issues closed since a particular point in time. period\n", - "can either be a datetime object, or a timedelta object. In the\n", - "latter case, it is used as a time before the present.\"\"\"\n", - "\n", - " which = 'pulls' if pulls else 'issues'\n", - "\n", - " if isinstance(period, timedelta):\n", - " period = datetime.now() - period\n", - " url = \"https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i\" % (project, which, period.strftime(ISO8601), PER_PAGE)\n", - " allclosed = get_paged_request(url)\n", - " # allclosed = get_issues(project=project, state='closed', pulls=pulls, since=period)\n", - " filtered = [i for i in allclosed if _parse_datetime(i['closed_at']) > period]\n", - "\n", - " # exclude rejected PRs\n", - " if pulls:\n", - " filtered = [ pr for pr in filtered if pr['merged_at'] ]\n", - "\n", - " return filtered\n", - "\n", - "\n", - "def sorted_by_field(issues, field='closed_at', reverse=False):\n", - " \"\"\"Return a list of issues sorted by closing date date.\"\"\"\n", - " return sorted(issues, key = lambda i:i[field], reverse=reverse)\n", - "\n", - "\n", - "def report(issues, show_urls=False):\n", - " \"\"\"Summary report about a list of issues, printing number and title.\n", - " \"\"\"\n", - " # titles may have unicode in them, so we must encode everything below\n", - " if show_urls:\n", - " for i in issues:\n", - " role = 'ghpull' if 'merged_at' in i else 'ghissue'\n", - " print('* :%s:`%d`: %s' % (role, i['number'],\n", - " i['title'].encode('utf-8')))\n", - " else:\n", - " for i in issues:\n", - " print('* %d: %s' % (i['number'], i['title'].encode('utf-8')))\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "\n", - "all_issues = {}\n", - "all_pulls = {}\n", - "total_commits = 0\n", - "#prj='pysal/libpysal'\n", - "prj = 'oturns/{package}'.format(package=package_name)\n", - "issues = issues_closed_since(since, project=prj,pulls=False)\n", - "pulls = issues_closed_since(since, project=prj,pulls=True)\n", - "issues = sorted_by_field(issues, reverse=True)\n", - "pulls = sorted_by_field(pulls, reverse=True)\n", - "n_issues, n_pulls = map(len, (issues, pulls))\n", - "n_total = n_issues + n_pulls\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "issue_listing = []\n", - "for issue in issues:\n", - " entry = \"{title} (#{number})\".format(title=issue['title'],number=issue['number'])\n", - " issue_listing.append(entry)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "pull_listing = []\n", - "for pull in pulls:\n", - " entry = \"{title} (#{number})\".format(title=pull['title'],number=pull['number'])\n", - " pull_listing.append(entry)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "pull_listing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "message = \"We closed a total of {total} issues (enhancements and bug fixes) through {pr} pull requests\".format(total=n_total, pr=n_pulls)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "message = \"{msg}, since our last release on {previous}.\".format(msg=message, previous=str(start_date))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "message" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "message += \"\\n\\n## Issues Closed\\n\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "print(message)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "issues = \"\\n\".join([\" - \"+issue for issue in issue_listing])\n", - "message += issues\n", - "message += \"\\n\\n## Pull Requests\\n\"\n", - "pulls = \"\\n\".join([\" - \"+pull for pull in pull_listing])\n", - "message += pulls" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "print(message)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "people = \"\\n\".join([\" - \"+person for person in unique_authors])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "print(people)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "message +=\"\\n\\nThe following individuals contributed to this release:\\n\\n{people}\".format(people=people)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "print(message)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "head = \"# Changes\\n\\nVersion {version} ({release_date})\\n\\n\".format(version=f, release_date=release_date)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "print(head+message)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "outfile = 'changelog.md'\n", - "with open(outfile, 'w') as of:\n", - " of.write(head+message)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:geosnap]", - "language": "python", - "name": "conda-env-geosnap-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}