From 40eee64ef690dca10c810191ae5a3536df4a0e6c Mon Sep 17 00:00:00 2001 From: Vit Zikmund Date: Wed, 24 Apr 2024 14:17:31 +0200 Subject: [PATCH 1/5] chore(github): use GitHub login in Identity.id The Identity.id field is used in the JWTAuthenticator._generate_token_for_action as one of the fields in the preauth JWT. The human-readable GitHub login is better suitable for this place than an opaque number internally used by GitHub (which they also happen to call an 'id'). Additionally, add some comments to the code I managed to forget how it works :) --- giftless/auth/github.py | 49 +++++++++++++++++++++++++-------------- tests/auth/test_github.py | 4 ++-- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/giftless/auth/github.py b/giftless/auth/github.py index 07e1b06..cd515a0 100644 --- a/giftless/auth/github.py +++ b/giftless/auth/github.py @@ -243,11 +243,17 @@ class GithubIdentity(Identity): """ def __init__( - self, login: str, id_: str, name: str, email: str, *, cc: CacheConfig + self, + login: str, + github_id: str, + name: str, + email: str, + *, + cc: CacheConfig, ) -> None: super().__init__() - self.login = login - self.id = id_ + self.id = login + self.github_id = github_id self.name = name self.email = email @@ -274,30 +280,36 @@ def expiration(_key: Any, value: set[Permission], now: float) -> float: def __repr__(self) -> str: return ( - f"<{self.__class__.__name__}" - f"login:{self.login} id:{self.id} name:{self.name}>" + f"<{self.__class__.__name__} " + f"id:{self.id} github_id:{self.github_id} name:{self.name}>" ) def __eq__(self, other: object) -> bool: - return isinstance(other, self.__class__) and (self.login, self.id) == ( - other.login, - other.id, - ) + return isinstance(other, self.__class__) and ( + self.id, + self.github_id, + ) == (other.id, other.github_id) def __hash__(self) -> int: - return hash((self.login, self.id)) + return hash((self.id, self.github_id)) def permissions( self, org: str, repo: str, *, authoritative: bool = False ) -> set[Permission] | None: + """Return user's permission set for an org/repo.""" key = cachetools.keys.hashkey(org, repo) with self._auth_cache_lock: + # first check if the permissions are in the proxy cache if authoritative: + # pop the entry from the proxy cache to be stored properly permission = self._auth_cache_read_proxy.pop(key, None) else: + # just get it when only peeking permission = self._auth_cache_read_proxy.get(key) + # if not found in the proxy, check the regular auth cache if permission is None: return self._auth_cache.get(key) + # try moving proxy permissions to the regular cache if authoritative: with suppress(ValueError): self._auth_cache[key] = permission @@ -306,7 +318,10 @@ def permissions( def authorize( self, org: str, repo: str, permissions: set[Permission] | None ) -> None: + """Save user's permission set for an org/repo.""" key = cachetools.keys.hashkey(org, repo) + # put the discovered permissions into the proxy cache + # to ensure at least one successful 'authoritative' read with self._auth_cache_lock: self._auth_cache_read_proxy[key] = ( permissions if permissions is not None else set() @@ -439,21 +454,19 @@ def _authorize(self, ctx: CallContext, user: GithubIdentity) -> None: if (permissions := user.permissions(org, repo)) is not None: perm_list = self._perm_list(permissions) _logger.debug( - f"{user.login} is already temporarily authorized for " + f"{user.id} is already temporarily authorized for " f"{org_repo}: {perm_list}" ) else: - _logger.debug( - f"Checking {user.login}'s permissions for {org_repo}" - ) + _logger.debug(f"Checking {user.id}'s permissions for {org_repo}") try: repo_data = self._api_get( - f"/repos/{org_repo}/collaborators/{user.login}/permission", + f"/repos/{org_repo}/collaborators/{user.id}/permission", ctx, ) except requests.exceptions.RequestException as e: msg = ( - f"Failed to find {user.login}'s permissions for " + f"Failed to find {user.id}'s permissions for " f"{org_repo}: {e}" ) _logger.warning(msg) @@ -461,7 +474,7 @@ def _authorize(self, ctx: CallContext, user: GithubIdentity) -> None: gh_permission = repo_data.get("permission") _logger.debug( - f"User {user.login} has '{gh_permission}' GitHub permission " + f"User {user.id} has '{gh_permission}' GitHub permission " f"for {org_repo}" ) permissions = set() @@ -472,7 +485,7 @@ def _authorize(self, ctx: CallContext, user: GithubIdentity) -> None: perm_list = self._perm_list(permissions) ttl = user.cache_ttl(permissions) _logger.debug( - f"Authorizing {user.login} (for {ttl}s) for " + f"Authorizing {user.id} (for {ttl}s) for " f"{org_repo}: {perm_list}" ) user.authorize(org, repo, permissions) diff --git a/tests/auth/test_github.py b/tests/auth/test_github.py index 74a4066..89c2d0b 100644 --- a/tests/auth/test_github.py +++ b/tests/auth/test_github.py @@ -197,9 +197,9 @@ def test_github_identity_core() -> None: user_dict = DEFAULT_USER_DICT | {"other_field": "other_value"} cache_cfg = DEFAULT_CONFIG.cache user = gh.GithubIdentity.from_dict(user_dict, cc=cache_cfg) - assert (user.login, user.id, user.name, user.email) == DEFAULT_USER_ARGS + assert (user.id, user.github_id, user.name, user.email) == DEFAULT_USER_ARGS assert all(arg in repr(user) for arg in DEFAULT_USER_ARGS[:3]) - assert hash(user) == hash((user.login, user.id)) + assert hash(user) == hash((user.id, user.github_id)) args2 = (*DEFAULT_USER_ARGS[:2], "spammer", "spam@camelot.gov.uk") user2 = gh.GithubIdentity(*args2, cc=cache_cfg) From 3016c4b8b32d7b9c79524bb70a160d97d48780b0 Mon Sep 17 00:00:00 2001 From: Vit Zikmund Date: Wed, 24 Apr 2024 19:21:31 +0200 Subject: [PATCH 2/5] fix(preauth): Improve action's 'authenticated' flag The LFS batch response always included authenticated = true, even when the default "PRE_AUTHORIZED_ACTION_PROVIDER" was disabled (= null). This led the git client to forever retry the download/upload attempts without realizing it should try the credentials from its creds store. This comes in handy with the GitHub auth provider which can handle both the usual authentication and the same for the streaming transfer adapter. --- giftless/auth/__init__.py | 2 +- giftless/transfer/__init__.py | 44 +++++++++++++++------------- giftless/transfer/basic_external.py | 4 +-- giftless/transfer/basic_streaming.py | 4 +-- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/giftless/auth/__init__.py b/giftless/auth/__init__.py index f8a336e..213082b 100644 --- a/giftless/auth/__init__.py +++ b/giftless/auth/__init__.py @@ -53,7 +53,7 @@ def get_authz_query_params( oid: str | None = None, lifetime: int | None = None, ) -> dict[str, str]: - """Authorize an action by adding credientaisl to the query string.""" + """Authorize an action by adding credentials to the query string.""" @abc.abstractmethod def get_authz_header( diff --git a/giftless/transfer/__init__.py b/giftless/transfer/__init__.py index 95726df..bde540f 100644 --- a/giftless/transfer/__init__.py +++ b/giftless/transfer/__init__.py @@ -16,6 +16,7 @@ PreAuthorizedActionAuthenticator, authentication, ) +from giftless.auth.identity import Identity from giftless.util import add_query_params, get_callable from giftless.view import ViewProvider @@ -82,6 +83,25 @@ def __init__(self) -> None: def set_auth_module(self, auth_module: Authentication) -> None: self._auth_module = auth_module + @property + def _preauth_handler_and_identity( + self, + ) -> tuple[PreAuthorizedActionAuthenticator | None, Identity | None]: + if ( + self._auth_module is None + or self._auth_module.preauth_handler is None + ): + return None, None + handler = cast( + PreAuthorizedActionAuthenticator, self._auth_module.preauth_handler + ) + identity = self._auth_module.get_identity() + return handler, identity + + @property + def _provides_preauth(self) -> bool: + return None not in self._preauth_handler_and_identity + def _preauth_url( self, original_url: str, @@ -91,16 +111,8 @@ def _preauth_url( oid: str | None = None, lifetime: int | None = None, ) -> str: - if self._auth_module is None: - return original_url - if self._auth_module.preauth_handler is None: - return original_url - - handler = cast( - PreAuthorizedActionAuthenticator, self._auth_module.preauth_handler - ) - identity = self._auth_module.get_identity() - if identity is None: + handler, identity = self._preauth_handler_and_identity + if handler is None or identity is None: return original_url params = handler.get_authz_query_params( @@ -117,18 +129,10 @@ def _preauth_headers( oid: str | None = None, lifetime: int | None = None, ) -> dict[str, str]: - if self._auth_module is None: + handler, identity = self._preauth_handler_and_identity + if handler is None or identity is None: return {} - if self._auth_module.preauth_handler is None: - return {} - - handler = cast( - PreAuthorizedActionAuthenticator, self._auth_module.preauth_handler - ) - identity = self._auth_module.get_identity() - if identity is None: - return {} return handler.get_authz_header( identity, org, repo, actions, oid, lifetime=lifetime ) diff --git a/giftless/transfer/basic_external.py b/giftless/transfer/basic_external.py index 01e7130..ddb8158 100644 --- a/giftless/transfer/basic_external.py +++ b/giftless/transfer/basic_external.py @@ -60,7 +60,7 @@ def upload( ) ) if response.get("actions", {}).get("upload"): # type:ignore[attr-defined] - response["authenticated"] = True + response["authenticated"] = self._provides_preauth headers = self._preauth_headers( organization, repo, @@ -98,7 +98,7 @@ def download( response["error"] = e.as_dict() if response.get("actions", {}).get("download"): # type:ignore[attr-defined] - response["authenticated"] = True + response["authenticated"] = self._provides_preauth return response diff --git a/giftless/transfer/basic_streaming.py b/giftless/transfer/basic_streaming.py index 9893a31..26709b7 100644 --- a/giftless/transfer/basic_streaming.py +++ b/giftless/transfer/basic_streaming.py @@ -204,7 +204,7 @@ def upload( "expires_in": self.VERIFY_LIFETIME, }, } - response["authenticated"] = True + response["authenticated"] = self._provides_preauth return response @@ -250,7 +250,7 @@ def download( "expires_in": self.action_lifetime, } } - response["authenticated"] = True + response["authenticated"] = self._provides_preauth return response From 55751473a7d1a9e2192a02beb1b9925aab07d593 Mon Sep 17 00:00:00 2001 From: Vit Zikmund Date: Wed, 24 Apr 2024 19:23:46 +0200 Subject: [PATCH 3/5] chore(build): fix setuptools package discovery The only package that's being packaged here is 'giftless', any other "rogue" directory confuses the packager. --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 0182cbc..6532e7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,9 @@ build-backend = "setuptools.build_meta" # Use single-quoted strings so TOML treats the string like a Python r-string # Multi-line strings are implicitly treated by black as regular expressions +[tool.setuptools.packages.find] +include = ["giftless"] + [tool.coverage.run] parallel = true branch = true From c0a7963abd56b41d309f248ba79fed4eb5a37fb7 Mon Sep 17 00:00:00 2001 From: Vit Zikmund Date: Wed, 24 Apr 2024 19:57:49 +0200 Subject: [PATCH 4/5] chore(lint): squeeze stray code into 80 columns --- tests/auth/test_github.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/auth/test_github.py b/tests/auth/test_github.py index 89c2d0b..6dd406b 100644 --- a/tests/auth/test_github.py +++ b/tests/auth/test_github.py @@ -197,7 +197,12 @@ def test_github_identity_core() -> None: user_dict = DEFAULT_USER_DICT | {"other_field": "other_value"} cache_cfg = DEFAULT_CONFIG.cache user = gh.GithubIdentity.from_dict(user_dict, cc=cache_cfg) - assert (user.id, user.github_id, user.name, user.email) == DEFAULT_USER_ARGS + assert ( + user.id, + user.github_id, + user.name, + user.email, + ) == DEFAULT_USER_ARGS assert all(arg in repr(user) for arg in DEFAULT_USER_ARGS[:3]) assert hash(user) == hash((user.id, user.github_id)) From fd4812ab5614cd258044d6401bd32abf08a759eb Mon Sep 17 00:00:00 2001 From: Vit Zikmund Date: Thu, 25 Apr 2024 09:38:25 +0200 Subject: [PATCH 5/5] chore(test): fix expected responses w/authenticated: false --- tests/transfer/test_basic_external_adapter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/transfer/test_basic_external_adapter.py b/tests/transfer/test_basic_external_adapter.py index cdcc206..c595eec 100644 --- a/tests/transfer/test_basic_external_adapter.py +++ b/tests/transfer/test_basic_external_adapter.py @@ -43,7 +43,7 @@ def test_upload_action_new_file(app: flask.Flask) -> None: assert response == { "oid": "abcdef123456", "size": 1234, - "authenticated": True, + "authenticated": False, "actions": { "upload": { "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900", @@ -75,7 +75,7 @@ def test_upload_action_extras_are_passed(app: flask.Flask) -> None: assert response == { "oid": "abcdef123456", "size": 1234, - "authenticated": True, + "authenticated": False, "actions": { "upload": { "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900&filename=foo.csv", @@ -120,7 +120,7 @@ def test_download_action_existing_file() -> None: assert response == { "oid": "abcdef123456", "size": 1234, - "authenticated": True, + "authenticated": False, "actions": { "download": { "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900", @@ -177,7 +177,7 @@ def test_download_action_extras_are_passed() -> None: assert response == { "oid": "abcdef123456", "size": 1234, - "authenticated": True, + "authenticated": False, "actions": { "download": { "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900&filename=foo.csv",