diff --git a/paasta_tools/api/api.py b/paasta_tools/api/api.py index f01a48dac1..268474c78f 100644 --- a/paasta_tools/api/api.py +++ b/paasta_tools/api/api.py @@ -31,6 +31,7 @@ import paasta_tools.api from paasta_tools import kubernetes_tools from paasta_tools.api import settings +from paasta_tools.api.tweens import auth from paasta_tools.api.tweens import profiling from paasta_tools.api.tweens import request_logger from paasta_tools.utils import load_system_paasta_config @@ -79,6 +80,18 @@ def parse_paasta_api_args(): default=4, help="Number of gunicorn workers to run", ) + parser.add_argument( + "--auth-endpoint", + type=str, + default="", + help="External API authorization endpoint", + ) + parser.add_argument( + "--auth-enforce", + action="store_true", + default=False, + help="Enforce API authorization", + ) args = parser.parse_args() return args @@ -105,6 +118,7 @@ def make_app(global_config=None): config.include("pyramid_swagger") config.include(request_logger) + config.include(auth) config.add_route( "flink.service.instance.jobs", "/v1/flink/{service}/{instance}/jobs" @@ -148,6 +162,18 @@ def make_app(global_config=None): "service.instance.tasks.task", "/v1/services/{service}/{instance}/tasks/{task_id}", ) + config.add_route( + "remote_run.start", + "/v1/remote_run/{service}/{instance}/start", + ) + config.add_route( + "remote_run.stop", + "/v1/remote_run/{service}/{instance}/stop", + ) + config.add_route( + "remote_run.token", + "/v1/remote_run/{service}/{instance}/token", + ) config.add_route("service.list", "/v1/services/{service}") config.add_route("services", "/v1/services") config.add_route( @@ -257,6 +283,11 @@ def main(argv=None): if args.cluster: os.environ["PAASTA_API_CLUSTER"] = args.cluster + if args.auth_endpoint: + os.environ["PAASTA_API_AUTH_ENDPOINT"] = args.auth_endpoint + if args.auth_enforce: + os.environ["PAASTA_API_AUTH_ENFORCE"] = "1" + gunicorn_args = [ "gunicorn", "-w", diff --git a/paasta_tools/api/api_docs/oapi.yaml b/paasta_tools/api/api_docs/oapi.yaml index 1518b6b65d..f3aefc7fec 100644 --- a/paasta_tools/api/api_docs/oapi.yaml +++ b/paasta_tools/api/api_docs/oapi.yaml @@ -1627,6 +1627,129 @@ paths: summary: Get mesos task of service_name.instance_name by task_id tags: - service + /remote_run/{service}/{instance}/start: + post: + operationId: remote_run_start + requestBody: + content: + application/json: + schema: + type: object + properties: + interactive: + type: boolean + user: + type: string + image: + type: string + required: + - interactive + - user + required: true + parameters: + - description: Service name + in: path + name: service + required: true + schema: + type: string + - description: Instance name + in: path + name: instance + required: true + schema: + type: string + responses: + "200": + content: + application/json: + schema: + type: string + description: The service is delayed for these possible reasons + "404": + description: Deployment key not found + "500": + description: Failure + summary: Launch a remote-run pod + tags: + - remote_run + /remote_run/{service}/{instance}/stop: + post: + operationId: remote_run_stop + requestBody: + content: + application/json: + schema: + type: object + properties: + user: + type: string + required: + - user + required: true + parameters: + - description: Service name + in: path + name: service + required: true + schema: + type: string + - description: Instance name + in: path + name: instance + required: true + schema: + type: string + responses: + "200": + content: + application/json: + schema: + type: string + description: Remote run pod stopped + "404": + description: Deployment key not found + "500": + description: Failure + summary: Stop a remote-run pod + tags: + - remote_run + /remote_run/{service}/{instance}/token: + get: + operationId: remote_run_token + parameters: + - description: Service name + in: path + name: service + required: true + schema: + type: string + - description: Instance name + in: path + name: instance + required: true + schema: + type: string + - description: User name + in: query + name: user + schema: + type: string + required: true + responses: + "200": + content: + application/json: + schema: + type: string + description: Token generated successfully + "404": + description: Deployment key not found + "500": + description: Failure + summary: Get a short lived token for exec into remote-run pod + tags: + - remote_run /version: get: operationId: showVersion diff --git a/paasta_tools/api/api_docs/swagger.json b/paasta_tools/api/api_docs/swagger.json index 9caa3c8cac..5fe2063367 100644 --- a/paasta_tools/api/api_docs/swagger.json +++ b/paasta_tools/api/api_docs/swagger.json @@ -846,6 +846,171 @@ } ] } + }, + "/remote_run/{service}/{instance}/start": { + "post": { + "responses": { + "200": { + "description": "It worked!", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Deployment key not found" + }, + "500": { + "description": "Failure" + } + }, + "summary": "Do a remote run", + "operationId": "remote_run_start", + "tags": [ + "remote_run" + ], + "parameters": [ + { + "in": "path", + "description": "Service name", + "name": "service", + "required": true, + "type": "string" + }, + { + "in": "path", + "description": "Instance name", + "name": "instance", + "required": true, + "type": "string" + }, + { + "in": "body", + "description": "Username", + "name": "json_body", + "required": true, + "schema": { + "type": "object", + "properties": { + "interactive": { + "type": "boolean" + }, + "user": { + "type": "string" + }, + "image": { + "type": "string" + } + }, + "required": [ + "interactive", + "user" + ] + } + } + ] + } + }, + "/remote_run/{service}/{instance}/stop": { + "post": { + "responses": { + "200": { + "description": "It worked!", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Deployment key not found" + }, + "500": { + "description": "Failure" + } + }, + "summary": "Stop a remote run", + "operationId": "remote_run_stop", + "tags": [ + "service" + ], + "parameters": [ + { + "in": "path", + "description": "Service name", + "name": "service", + "required": true, + "type": "string" + }, + { + "in": "path", + "description": "Instance name", + "name": "instance", + "required": true, + "type": "string" + }, + { + "in": "body", + "description": "Username", + "name": "json_body", + "required": true, + "schema": { + "type": "object", + "properties": { + "user": { + "type": "string" + } + }, + "required": [ + "user" + ] + } + } + ] + } + }, + "/remote_run/{service}/{instance}/token": { + "get": { + "responses": { + "200": { + "description": "It worked!", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Deployment key not found" + }, + "500": { + "description": "Failure" + } + }, + "summary": "Get a remote run token", + "operationId": "remote_run_token", + "tags": [ + "remote_run" + ], + "parameters": [ + { + "in": "path", + "description": "Service name", + "name": "service", + "required": true, + "type": "string" + }, + { + "in": "path", + "description": "Instance name", + "name": "instance", + "required": true, + "type": "string" + }, + { + "in": "query", + "description": "Username", + "name": "user", + "required": true, + "type": "string" + } + ] + } } }, "definitions": { diff --git a/paasta_tools/api/client.py b/paasta_tools/api/client.py index fc82dcc15c..2a1ce3de13 100644 --- a/paasta_tools/api/client.py +++ b/paasta_tools/api/client.py @@ -37,6 +37,7 @@ class PaastaOApiClient: default: paastaapis.DefaultApi resources: paastaapis.ResourcesApi service: paastaapis.ServiceApi + remote_run: paastaapis.RemoteRunApi api_error: Type[paastaapi.ApiException] connection_error: Type[paastaapi.ApiException] timeout_error: Type[paastaapi.ApiException] @@ -48,6 +49,7 @@ def get_paasta_oapi_client_by_url( cert_file: Optional[str] = None, key_file: Optional[str] = None, ssl_ca_cert: Optional[str] = None, + auth_token: str = "", ) -> PaastaOApiClient: server_variables = dict(scheme=parsed_url.scheme, host=parsed_url.netloc) config = paastaapi.Configuration( @@ -63,11 +65,15 @@ def get_paasta_oapi_client_by_url( client.rest_client.pool_manager.connection_pool_kw[ "timeout" ] = load_system_paasta_config().get_api_client_timeout() + # SEC-19555: support auth in PaaSTA APIs + if auth_token: + client.set_default_header("Authorization", f"Bearer {auth_token}") return PaastaOApiClient( autoscaler=paastaapis.AutoscalerApi(client), default=paastaapis.DefaultApi(client), resources=paastaapis.ResourcesApi(client), service=paastaapis.ServiceApi(client), + remote_run=paastaapis.RemoteRunApi(client), api_error=paastaapi.ApiException, connection_error=paastaapi.ApiException, timeout_error=paastaapi.ApiException, @@ -79,6 +85,7 @@ def get_paasta_oapi_client( cluster: str = None, system_paasta_config: SystemPaastaConfig = None, http_res: bool = False, + auth_token: str = "", ) -> Optional[PaastaOApiClient]: if not system_paasta_config: system_paasta_config = load_system_paasta_config() @@ -94,4 +101,6 @@ def get_paasta_oapi_client( parsed = urlparse(api_endpoints[cluster]) cert_file = key_file = ssl_ca_cert = None - return get_paasta_oapi_client_by_url(parsed, cert_file, key_file, ssl_ca_cert) + return get_paasta_oapi_client_by_url( + parsed, cert_file, key_file, ssl_ca_cert, auth_token + ) diff --git a/paasta_tools/api/tweens/__init__.py b/paasta_tools/api/tweens/__init__.py index e69de29bb2..e8d08dd58d 100644 --- a/paasta_tools/api/tweens/__init__.py +++ b/paasta_tools/api/tweens/__init__.py @@ -0,0 +1,6 @@ +from typing import Callable + +from pyramid.request import Request +from pyramid.response import Response + +Handler = Callable[[Request], Response] diff --git a/paasta_tools/api/tweens/auth.py b/paasta_tools/api/tweens/auth.py new file mode 100644 index 0000000000..2e2ff7ef3d --- /dev/null +++ b/paasta_tools/api/tweens/auth.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +# Copyright 2015-2016 Yelp Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +import os +from typing import NamedTuple + +import cachetools.func +import pyramid +import requests +from pyramid.config import Configurator +from pyramid.httpexceptions import HTTPForbidden +from pyramid.registry import Registry +from pyramid.request import Request +from pyramid.response import Response + +from paasta_tools.api.tweens import Handler + + +logger = logging.getLogger(__name__) +AUTH_CACHE_SIZE = 50000 +AUTH_CACHE_TTL = 30 * 60 + + +class AuthorizationOutcome(NamedTuple): + authorized: bool + reason: str + + +class AuthTweenFactory: + def __init__(self, handler: Handler, registry: Registry) -> None: + self.handler = handler + self.registry = registry + self.enforce = bool(os.getenv("PAASTA_API_AUTH_ENFORCE", "")) + self.endpoint = os.getenv("PAASTA_API_AUTH_ENDPOINT") + self.session = requests.Session() + + def __call__(self, request: Request) -> Response: + """ + Extracts relevant metadata from request, and checks if it is authorized + """ + token = request.headers.get("Authorization", "").strip() + token = token.split()[-1] if token else "" # removes "Bearer" prefix + auth_outcome = self.is_request_authorized(request.path, token, request.method) + if self.enforce and not auth_outcome.authorized: + return HTTPForbidden( + body=json.dumps({"reason": auth_outcome.reason}), + headers={"X-Auth-Failure-Reason": auth_outcome.reason}, + content_type="application/json", + charset="utf-8", + ) + return self.handler(request) + + @cachetools.func.ttl_cache(maxsize=AUTH_CACHE_SIZE, ttl=AUTH_CACHE_TTL) + def is_request_authorized( + self, path: str, token: str, method: str + ) -> AuthorizationOutcome: + """Check if API request is authorized + + :param str path: API path + :param str token: authentication token + :param str method: http method + :return: auth outcome + """ + try: + response = self.session.post( + url=self.endpoint, + json={ + "input": { + "path": path, + "backend": "paasta", + "token": token, + "method": method, + }, + }, + timeout=2, + ).json() + except Exception as e: + logger.exception(f"Issue communicating with auth endpoint: {e}") + return AuthorizationOutcome(False, "Auth backend error") + + if "result" not in response or "allowed" not in response["result"]: + return AuthorizationOutcome(False, "Malformed auth response") + + if not response["result"]["allowed"]: + reason = response["result"].get("reason", "Denied") + return AuthorizationOutcome(False, reason) + + reason = response["result"].get("reason", "Ok") + return AuthorizationOutcome(True, reason) + + +def includeme(config: Configurator): + if os.getenv("PAASTA_API_AUTH_ENDPOINT"): + config.add_tween( + "paasta_tools.api.tweens.auth.AuthTweenFactory", + under=( + pyramid.tweens.INGRESS, + "paasta_tools.api.tweens.request_logger.request_logger_tween_factory", + ), + ) diff --git a/paasta_tools/api/views/instance.py b/paasta_tools/api/views/instance.py index 945a1d5187..c2a87ce794 100644 --- a/paasta_tools/api/views/instance.py +++ b/paasta_tools/api/views/instance.py @@ -16,6 +16,7 @@ PaaSTA service instance status/start/stop etc. """ import asyncio +import json import logging import re import traceback @@ -31,6 +32,7 @@ import paasta_tools.mesos.exceptions as mesos_exceptions from paasta_tools import paasta_remote_run +from paasta_tools import paasta_remote_run_2 from paasta_tools import tron_tools from paasta_tools.api import settings from paasta_tools.api.views.exception import ApiFailure @@ -385,3 +387,71 @@ def instance_mesh_status(request): raise ApiFailure(error_message, 500) return instance_mesh + + +@view_config(route_name="remote_run.stop", request_method="POST", renderer="json") +def remote_run_stop(request): + service = request.swagger_data.get("service") + instance = request.swagger_data.get("instance") + user = request.swagger_data["json_body"].get("user") + is_eks = is_instance_eks(service, instance) + + try: + response = paasta_remote_run_2.remote_run_stop( + service, instance, user, settings.cluster, is_eks + ) + except: + error_message = traceback.format_exc() + raise ApiFailure(error_message, 500) + return response + + +@view_config(route_name="remote_run.token", request_method="GET", renderer="json") +def remote_run_token(request): + service = request.swagger_data.get("service") + instance = request.swagger_data.get("instance") + user = request.swagger_data.get("user") + is_eks = is_instance_eks(service, instance) + try: + token = paasta_remote_run_2.create_exec_token( + service, instance, user, settings.cluster, is_eks + ) + except: + error_message = traceback.format_exc() + raise ApiFailure(error_message, 500) + return json.dumps({"token": token}) + + +def is_instance_eks(service, instance): + try: + instance_type = validate_service_instance( + service, instance, settings.cluster, settings.soa_dir + ) + except NoConfigurationForServiceError: + error_message = no_configuration_for_service_message( + settings.cluster, + service, + instance, + ) + raise Exception(error_message, 404) + return instance_type == "eks" + + +@view_config(route_name="remote_run.start", request_method="POST", renderer="json") +def remote_run_start(request): + service = request.swagger_data.get("service") + instance = request.swagger_data.get("instance") + user = request.swagger_data["json_body"].get("user") + interactive = request.swagger_data["json_body"].get("interactive", True) + recreate = request.swagger_data["json_body"].get("recreate", True) + + is_eks = is_instance_eks(service, instance) + try: + response = paasta_remote_run_2.remote_run_start( + service, instance, user, settings.cluster, interactive, recreate, is_eks + ) + except Exception: + error_message = traceback.format_exc() + raise ApiFailure(error_message, 500) + + return response diff --git a/paasta_tools/cli/cli.py b/paasta_tools/cli/cli.py index 92f9dd222b..22af8a138d 100755 --- a/paasta_tools/cli/cli.py +++ b/paasta_tools/cli/cli.py @@ -118,6 +118,7 @@ def add_subparser(command, subparsers): "pause_service_autoscaler": "pause_service_autoscaler", "push-to-registry": "push_to_registry", "remote-run": "remote_run", + "remote-run-2": "remote_run_2", "rollback": "rollback", "secret": "secret", "security-check": "security_check", diff --git a/paasta_tools/cli/cmds/remote_run_2.py b/paasta_tools/cli/cmds/remote_run_2.py new file mode 100644 index 0000000000..67cac4aa20 --- /dev/null +++ b/paasta_tools/cli/cmds/remote_run_2.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python +# Copyright 2015-2016 Yelp Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import os +import pty +import sys +import time + +from paasta_tools.api.client import get_paasta_oapi_client +from paasta_tools.cli.cmds.check import makefile_responds_to +from paasta_tools.cli.cmds.cook_image import paasta_cook_image +from paasta_tools.cli.utils import get_paasta_oapi_api_clustername +from paasta_tools.cli.utils import lazy_choices_completer +from paasta_tools.utils import DEFAULT_SOA_DIR +from paasta_tools.utils import get_username +from paasta_tools.utils import list_services +from paasta_tools.utils import load_system_paasta_config +from paasta_tools.utils import PaastaColors +from paasta_tools.utils import SystemPaastaConfig + + +def add_common_args_to_parser(parser): + parser.add_argument( + "-s", + "--service", + help="The name of the service you wish to inspect. Required.", + required=True, + ).completer = lazy_choices_completer(list_services) + parser.add_argument( + "-i", + "--instance", + help=( + "Simulate a docker run for a particular instance of the " + "service, like 'main' or 'canary'. Required." + ), + required=True, + ) + parser.add_argument( + "-c", + "--cluster", + help=( + "The name of the cluster you wish to run your task on. " + "If omitted, uses the default cluster defined in the paasta " + f"remote-run configs." + ), + ) + + +def add_subparser( + subparsers, +) -> None: + remote_run_parser = subparsers.add_parser( + "remote-run-2", + help="Run stuff remotely.", + description=("'paasta remote-run' runs stuff remotely "), + ) + subparsers = remote_run_parser.add_subparsers(dest="remote_run_command") + start_parser = subparsers.add_parser( + "start", + help="Start or connect to a remote-run job", + description=("Starts or connects to a remote-run-job"), + ) + start_parser.add_argument( + "-b", + "--build", + dest="build", + help="Build the image from current directory", + action="store_true", + ) + start_parser.add_argument( + "-y", + "--yelpsoa-config-root", + dest="yelpsoa_config_root", + help="A directory from which yelpsoa-configs should be read from", + default=DEFAULT_SOA_DIR, + ) + start_parser.add_argument( + "-I", + "--interactive", + help=( + 'Run container in interactive mode. If interactive is set the default command will be "bash" ' + 'unless otherwise set by the "--cmd" flag' + ), + action="store_true", + required=False, + default=False, + ) + stop_parser = subparsers.add_parser( + "stop", + help="Stop! In the name of Paasta", + description="Stop your remote-run job if it exists", + ) + add_common_args_to_parser(start_parser) + add_common_args_to_parser(stop_parser) + remote_run_parser.set_defaults(command=remote_run) + + +def paasta_remote_run_start( + args, + system_paasta_config: SystemPaastaConfig, + verbose: int = 0, + is_eks: bool = True, +) -> int: + + cluster = args.cluster + service = args.service + instance = args.instance + build = args.build + output = [] + ret_code = 0 + + # TODO: Build + if build and not makefile_responds_to("cook-image"): + print( + "A local Makefile with a 'cook-image' target is required for --build", + file=sys.stderr, + ) + default_tag = "paasta-remote-run-{}-{}".format(service, get_username()) + os.environ["DOCKER_TAG"] = default_tag + paasta_cook_image(args=None, service=service, soa_dir=soa_dir) + # TODO Actually push the image + + client = get_paasta_oapi_client( + cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks), + system_paasta_config=system_paasta_config, + ) + if not client: + print("Cannot get a paasta-api client") + exit(1) + + # TODO add image argument if build + response = client.remote_run.remote_run_start( + service, + instance, + {"user": get_username(), "interactive": True}, + ) + try: + response = json.loads(response) + except client.api_error as exc: + print(exc, file=sys.stderr) + output.append(PaastaColors.red(exc.reason)) + ret_code = exc.status + except (client.connection_error, client.timeout_error) as exc: + output.append( + PaastaColors.red(f"Could not connect to API: {exc.__class__.__name__}") + ) + ret_code = 1 + except Exception as e: + output.append(PaastaColors.red(f"Exception when talking to the API:")) + output.append(str(e)) + ret_code = 1 + + if ret_code: + print("\n".join(output)) + return ret_code + + if response["status"] == 409: + print( + "A remote-run container already exists. Run remote-run stop first if you'd like a new one." + ) + attach = input("Would you like to attach to it? y/n ") + if attach == "n": + return 0 + print(response) + pod_name, namespace = response["pod_name"], response["namespace"] + + try: + token = client.remote_run.remote_run_token( + service=service, instance=instance, user="qlo" + ) + token = json.loads(token)["token"] + except: + raise + + exec_command_tmpl = "kubectl{eks}-{cluster} --token {token} exec -it -n {namespace} {pod} -- /bin/bash" + exec_command = exec_command_tmpl.format( + eks="-eks" if is_eks else "", + cluster=cluster, + namespace=namespace, + pod=pod_name, + token=token, + ) + cmd = pty.spawn(exec_command.split(" ")) + + return ret_code + + +def paasta_remote_run_stop( + args, + system_paasta_config: SystemPaastaConfig, + verbose: int = 0, + is_eks: bool = True, +) -> int: + + cluster = args.cluster + service = args.service + instance = args.instance + + ret_code = 0 + + client = get_paasta_oapi_client( + cluster=get_paasta_oapi_api_clustername(cluster=cluster, is_eks=is_eks), + system_paasta_config=system_paasta_config, + ) + if not client: + print("Cannot get a paasta-api client") + exit(1) + response = client.remote_run.remote_run_stop( + service, + instance, + {"user": get_username()}, + ) + print(response) + return ret_code + + +def remote_run(args) -> int: + """Run stuff, but remotely!""" + system_paasta_config = load_system_paasta_config( + "/nail/home/qlo/paasta_config/paasta/" + ) + if args.remote_run_command == "start": + return paasta_remote_run_start(args, system_paasta_config) + elif args.remote_run_command == "stop": + return paasta_remote_run_stop(args, system_paasta_config) diff --git a/paasta_tools/kubernetes/application/controller_wrappers.py b/paasta_tools/kubernetes/application/controller_wrappers.py index 21026120f8..09d7d90ec4 100644 --- a/paasta_tools/kubernetes/application/controller_wrappers.py +++ b/paasta_tools/kubernetes/application/controller_wrappers.py @@ -7,12 +7,14 @@ from kubernetes.client import V1beta1PodDisruptionBudget from kubernetes.client import V1DeleteOptions from kubernetes.client import V1Deployment +from kubernetes.client import V1Job from kubernetes.client import V1StatefulSet from kubernetes.client.rest import ApiException from paasta_tools.autoscaling.autoscaling_service_lib import autoscaling_is_paused from paasta_tools.eks_tools import load_eks_service_config_no_cache from paasta_tools.kubernetes_tools import create_deployment +from paasta_tools.kubernetes_tools import create_job from paasta_tools.kubernetes_tools import create_pod_disruption_budget from paasta_tools.kubernetes_tools import create_stateful_set from paasta_tools.kubernetes_tools import ensure_service_account @@ -51,7 +53,6 @@ def __init__( "config_sha", ] } - replicas = ( item.spec.replicas if item.metadata.labels.get(paasta_prefixed("autoscaled"), "false") @@ -411,14 +412,63 @@ def update(self, kube_client: KubeClient): ) +class JobWrapper(Application): + def __init__( + self, + item: V1Job, + logging=logging.getLogger(__name__), + ): + item.spec.replicas = None + super().__init__(item, logging) + + def deep_delete(self, kube_client: KubeClient) -> None: + """ + Remove all controllers, pods, and pod disruption budgets related to this application + :param kube_client: + """ + delete_options = V1DeleteOptions(propagation_policy="Foreground") + try: + kube_client.batches.delete_namespaced_job( + self.item.metadata.name, + self.item.metadata.namespace, + body=delete_options, + ) + except ApiException as e: + if e.status == 404: + # Job does not exist, nothing to delete but + # we can consider this a success. + self.logging.debug( + "not deleting nonexistent job/{} from namespace/{}".format( + self.item.metadata.name, self.item.metadata.namespace + ) + ) + else: + raise + else: + self.logging.info( + "deleted job/{} from namespace/{}".format( + self.item.metadata.name, self.item.metadata.namespace + ) + ) + + def create(self, kube_client: KubeClient): + create_job( + kube_client=kube_client, + formatted_job=self.item, + namespace=self.soa_config.get_namespace(), + ) + + def get_application_wrapper( - formatted_application: Union[V1Deployment, V1StatefulSet] + formatted_application: Union[V1Deployment, V1StatefulSet, V1Job] ) -> Application: app: Application if isinstance(formatted_application, V1Deployment): app = DeploymentWrapper(formatted_application) elif isinstance(formatted_application, V1StatefulSet): app = StatefulSetWrapper(formatted_application) + elif isinstance(formatted_application, V1Job): + app = JobWrapper(formatted_application) else: raise Exception("Unknown kubernetes object to update") diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index a4a6f43392..cef7b239b0 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -20,6 +20,7 @@ import os import re from datetime import datetime +from datetime import timedelta from datetime import timezone from enum import Enum from functools import lru_cache @@ -48,6 +49,7 @@ from humanfriendly import parse_size from kubernetes import client as kube_client from kubernetes import config as kube_config +from kubernetes.client import AuthenticationV1TokenRequest from kubernetes.client import CoreV1Event from kubernetes.client import models from kubernetes.client import V1Affinity @@ -71,6 +73,8 @@ from kubernetes.client import V1ExecAction from kubernetes.client import V1HostPathVolumeSource from kubernetes.client import V1HTTPGetAction +from kubernetes.client import V1Job +from kubernetes.client import V1JobSpec from kubernetes.client import V1KeyToPath from kubernetes.client import V1LabelSelector from kubernetes.client import V1Lifecycle @@ -95,11 +99,13 @@ from kubernetes.client import V1PodSecurityContext from kubernetes.client import V1PodSpec from kubernetes.client import V1PodTemplateSpec +from kubernetes.client import V1PolicyRule from kubernetes.client import V1PreferredSchedulingTerm from kubernetes.client import V1Probe from kubernetes.client import V1ProjectedVolumeSource from kubernetes.client import V1ReplicaSet from kubernetes.client import V1ResourceRequirements +from kubernetes.client import V1Role from kubernetes.client import V1RoleBinding from kubernetes.client import V1RoleRef from kubernetes.client import V1RollingUpdateDeployment @@ -113,6 +119,7 @@ from kubernetes.client import V1StatefulSetSpec from kubernetes.client import V1Subject from kubernetes.client import V1TCPSocketAction +from kubernetes.client import V1TokenRequestSpec from kubernetes.client import V1TopologySpreadConstraint from kubernetes.client import V1Volume from kubernetes.client import V1VolumeMount @@ -605,6 +612,7 @@ def __init__( self.core = kube_client.CoreV1Api(self.api_client) self.policy = kube_client.PolicyV1beta1Api(self.api_client) self.apiextensions = kube_client.ApiextensionsV1Api(self.api_client) + self.batches = kube_client.BatchV1Api(self.api_client) self.custom = kube_client.CustomObjectsApi(self.api_client) self.autoscaling = kube_client.AutoscalingV2beta2Api(self.api_client) @@ -2029,6 +2037,46 @@ def get_pod_management_policy(self) -> str: """Get sts pod_management_policy from config, default to 'OrderedReady'""" return self.config_dict.get("pod_management_policy", "OrderedReady") + def format_as_kubernetes_job(self) -> V1Job: + """Create the config for launching the deployment as a Job""" + try: + docker_url = self.get_docker_url() + git_sha = get_git_sha_from_dockerurl(docker_url, long=True) + system_paasta_config = load_system_paasta_config() + complete_config = V1Job( + api_version="batch/v1", + kind="Job", + metadata=self.get_kubernetes_metadata(git_sha), + spec=V1JobSpec( + active_deadline_seconds=3600, + template=self.get_pod_template_spec( + git_sha=git_sha, + system_paasta_config=system_paasta_config, + restart=False, + ), + ), + ) + + prometheus_shard = self.get_prometheus_shard() + if prometheus_shard: + complete_config.metadata.labels[ + "paasta.yelp.com/prometheus_shard" + ] = prometheus_shard + + image_version = self.get_image_version() + if image_version is not None: + complete_config.metadata.labels[ + "paasta.yelp.com/image_version" + ] = image_version + + complete_config.metadata.labels["paasta.yelp.com/remote_run"] = "true" + except Exception as e: + raise InvalidKubernetesConfig(e, self.get_service(), self.get_instance()) + log.debug( + "Complete configuration for remote-run instance is: %s", complete_config + ) + return complete_config + def format_kubernetes_app(self) -> Union[V1Deployment, V1StatefulSet]: """Create the configuration that will be passed to the Kubernetes REST API.""" @@ -2139,7 +2187,10 @@ def has_routable_ip( return "false" def get_pod_template_spec( - self, git_sha: str, system_paasta_config: SystemPaastaConfig + self, + git_sha: str, + system_paasta_config: SystemPaastaConfig, + restart: bool = True, ) -> V1PodTemplateSpec: service_namespace_config = load_service_namespace_config( service=self.service, namespace=self.get_nerve_namespace() @@ -2178,7 +2229,7 @@ def get_pod_template_spec( ), share_process_namespace=True, node_selector=self.get_node_selector(), - restart_policy="Always", + restart_policy="Always" if restart else "Never", volumes=self.get_pod_volumes( docker_volumes=docker_volumes + hacheck_sidecar_volumes, aws_ebs_volumes=self.get_aws_ebs_volumes(), @@ -2517,7 +2568,7 @@ def sanitize_for_config_hash( # remove data we dont want used to hash configs # replica count - if ahash["spec"] is not None: + if ahash["spec"] is not None and "replicas" in ahash["spec"]: del ahash["spec"]["replicas"] if ahash["metadata"] is not None: @@ -2788,6 +2839,7 @@ def ensure_namespace(kube_client: KubeClient, namespace: str) -> None: ensure_paasta_api_rolebinding(kube_client, namespace) ensure_paasta_namespace_limits(kube_client, namespace) + remove_remote_run_accounts_and_roles(kube_client, namespace) def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> None: @@ -3631,6 +3683,16 @@ def create_stateful_set( ) +def create_job( + kube_client: KubeClient, + formatted_job: V1Job, + namespace: str, +) -> None: + return kube_client.batches.create_namespaced_job( + namespace=namespace, body=formatted_job + ) + + def update_stateful_set( kube_client: KubeClient, formatted_stateful_set: V1StatefulSet, @@ -4354,6 +4416,20 @@ def get_kubernetes_secret_env_variables( return decrypted_secrets +def create_temp_exec_token( + kube_client: KubeClient, namespace: str, service_account: str +): + """Create a short lived token for exec""" + token_spec = V1TokenRequestSpec( + expiration_seconds=600, audiences=[] # minimum allowed by k8s + ) + request = AuthenticationV1TokenRequest(spec=token_spec) + response = kube_client.core.create_namespaced_service_account_token( + service_account, namespace, request + ) + return response + + def get_kubernetes_secret_volumes( kube_client: KubeClient, secret_volumes_config: Sequence[SecretVolume], @@ -4449,3 +4525,109 @@ def add_volumes_for_authenticating_services( ): config_volumes = [token_config, *config_volumes] return config_volumes + + +def create_pod_scoped_role(kube_client, namespace, user, pod_name): + pod_name_hash = hashlib.sha1(pod_name.encode("utf-8")).hexdigest()[:12] + policy = V1PolicyRule( + verbs=["create", "get"], + resources=["pods", "pods/exec"], + resource_names=[pod_name], + api_groups=[""], + ) + role_name = f"remote-run-role-{pod_name_hash}" + role = V1Role( + rules=[policy], + metadata=V1ObjectMeta( + name=role_name, + labels={"PodOwner": user}, + ), + ) + + kube_client.core.create_namespaced_role(namespace=namespace, body=role) + return role_name + + +def bind_role_to_service_account(kube_client, namespace, service_account, role): + role_binding = V1RoleBinding( + metadata=V1ObjectMeta( + name=f"binding-{role}", + namespace=namespace, + ), + role_ref=V1RoleRef( + api_group="rbac.authorization.k8s.io", + kind="Role", + name=role, + ), + subjects=[ + V1Subject( + kind="ServiceAccount", + name=service_account, + ), + ], + ) + kube_client.rbac.create_namespaced_role_binding( + namespace=namespace, body=role_binding + ) + + +def create_paasta_remote_run_service_account( + kube_client: KubeClient, namespace: str, username: str, pod_name: str +) -> None: + pod_name_hash = hashlib.sha1(pod_name.encode("utf-8")).hexdigest()[:12] + service_account_name = f"remote-run-{username}-{pod_name_hash}" + service_accounts = get_all_service_accounts(kube_client, namespace=namespace) + service_account_names = [item.metadata.name for item in service_accounts] + if service_account_name in service_account_names: + return service_account_name + + service_account = V1ServiceAccount( + metadata=V1ObjectMeta( + name=service_account_name, + namespace=namespace, + labels={"PodOwner": username}, + ) + ) + kube_client.core.create_namespaced_service_account( + namespace=namespace, body=service_account + ) + return service_account_name + + +def get_namespaced_roles(kube_client, namespace): + return kube_client.rbac.list_namespaced_role(namespace).items + + +def remove_remote_run_accounts_and_roles( + kube_client: KubeClient, + namespace: str, +) -> None: + service_accounts = get_all_service_accounts(kube_client, namespace) + roles = get_namespaced_roles(kube_client, namespace) + + current_time = datetime.utcnow().replace(tzinfo=timezone.utc) + age_limit = timedelta(seconds=60) + + for delete_func, entity_list in ( + (delete_namespaced_service_account, service_accounts), + (delete_namespaced_role, roles), + ): + for entity in entity_list: + if not entity.metadata.name.startswith("remote-run-"): + continue + entity_age = current_time - entity.metadata.creation_timestamp + if entity_age < age_limit: + continue + delete_func(kube_client, namespace, entity) + + +def delete_namespaced_service_account( + kube_client: KubeClient, namespace: str, service_account: V1ServiceAccount +): + kube_client.core.delete_namespaced_service_account( + service_account.metadata.name, namespace + ) + + +def delete_namespaced_role(kube_client: KubeClient, namespace: str, role: V1Role): + kube_client.rbac.delete_namespaced_role(role.metadata.name, namespace) diff --git a/paasta_tools/paasta_remote_run_2.py b/paasta_tools/paasta_remote_run_2.py new file mode 100755 index 0000000000..75cedb2d63 --- /dev/null +++ b/paasta_tools/paasta_remote_run_2.py @@ -0,0 +1,147 @@ +import json +from time import sleep + +from kubernetes.client.exceptions import ApiException + +from paasta_tools.eks_tools import load_eks_service_config_no_cache +from paasta_tools.kubernetes.application.controller_wrappers import ( + get_application_wrapper, +) +from paasta_tools.kubernetes_tools import create_temp_exec_token +from paasta_tools.kubernetes_tools import KubeClient +from paasta_tools.kubernetes_tools import load_kubernetes_service_config_no_cache +from paasta_tools.utils import DEFAULT_SOA_DIR + + +def create_exec_token(service, instance, user, cluster, is_eks): + """Creates a short lived token for execing into a pod""" + kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + + # Load the service deployment settings + if is_eks: + deployment = load_eks_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + else: + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + namespace = deployment.get_namespace() + formatted_job = deployment.format_as_kubernetes_job() + job_name = f"remote-run-{user}-{formatted_job.metadata.name}" + + try: + pod = find_pod(kube_client, namespace, job_name) + pod_name = pod.metadata.name + service_account = create_paasta_remote_run_service_account( + kubeclient, namespace, user, pod_name + ) + role = create_pod_scoped_role(kube_client, namespace, pod_name, user) + bind_role_service_account(kube_client, namespace, service_account, role) + token = create_temp_exec_token(kube_client, namespace, service_account) + except ApiException as E: + raise + return token.status.token + + +def remote_run_start(service, instance, user, cluster, interactive, recreate, is_eks): + # TODO Overriding the kube client config for now as the api has limited permissions + kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + + # Load the service deployment settings + if is_eks: + deployment = load_eks_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + else: + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + namespace = deployment.get_namespace() + + # Set to interactive mode + if interactive: + deployment.config_dict["cmd"] = "sleep 604800" # One week + + # Create the app with a new name + formatted_job = deployment.format_as_kubernetes_job() + formatted_job.metadata.name = f"remote-run-{user}-{formatted_job.metadata.name}" + job_name = formatted_job.metadata.name + app_wrapper = get_application_wrapper(formatted_job) + app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks) + + # Launch pod + status = 200 + try: + app_wrapper.create(kube_client) + except ApiException as e: + if e.status == 409: + # Job already running + status = 409 + raise + + pod = wait_until_pod_running(kube_client, namespace, job_name) + + return json.dumps( + {"status": status, "pod_name": pod.metadata.name, "namespace": namespace} + ) + + +def wait_until_deployment_gone(kube_client, namespace, job_name): + for retry in range(10): + pod = find_pod(kube_client, namespace, job_name, 1) + if not pod: + return + sleep(5) + raise Exception("Pod still exists!") + + +def find_pod(kube_client, namespace, job_name, retries=5): + # Get pod status and name + for retry in range(retries): + pod_list = kube_client.core.list_namespaced_pod(namespace) + matching_pod = None + for pod in pod_list.items: + if pod.metadata.name.startswith(job_name): + matching_pod = pod + break + + if not matching_pod: + sleep(2) + continue + return matching_pod + return None + + +def wait_until_pod_running(kube_client, namespace, job_name): + for retry in range(5): + pod = find_pod(kube_client, namespace, job_name) + if not pod: + raise Exception("No matching pod!") + if pod.status.phase == "Running": + break + elif pod.status.phase not in ("Initializing", "Pending"): + raise Exception(f"Pod state is {pod.status.phase}") + return pod + + +def remote_run_stop(service, instance, user, cluster, is_eks): + # TODO Overriding the kube client config for now as the api has limited permissions + kube_client = KubeClient(config_file="/etc/kubernetes/admin.conf") + if is_eks: + deployment = load_eks_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + else: + deployment = load_kubernetes_service_config_no_cache( + service, instance, cluster, DEFAULT_SOA_DIR + ) + namespace = deployment.get_namespace() + formatted_job = deployment.format_as_kubernetes_job() + job_name = f"remote-run-{user}-{formatted_job.metadata.name}" + formatted_job.metadata.name = job_name + + app_wrapper = get_application_wrapper(formatted_job) + app_wrapper.load_local_config(DEFAULT_SOA_DIR, cluster, is_eks) + app_wrapper.deep_delete(kube_client) + return json.dumps({"status": 200, "message": "Job successfully removed"}) diff --git a/paasta_tools/paastaapi/apis/__init__.py b/paasta_tools/paastaapi/apis/__init__.py index a93cebf52c..ab815d8c70 100644 --- a/paasta_tools/paastaapi/apis/__init__.py +++ b/paasta_tools/paastaapi/apis/__init__.py @@ -17,5 +17,6 @@ # Import APIs into API package: from paasta_tools.paastaapi.api.autoscaler_api import AutoscalerApi from paasta_tools.paastaapi.api.default_api import DefaultApi +from paasta_tools.paastaapi.api.remote_run_api import RemoteRunApi from paasta_tools.paastaapi.api.resources_api import ResourcesApi from paasta_tools.paastaapi.api.service_api import ServiceApi diff --git a/paasta_tools/paastaapi/models/__init__.py b/paasta_tools/paastaapi/models/__init__.py index 0d1278ee23..2a8cbf44b2 100644 --- a/paasta_tools/paastaapi/models/__init__.py +++ b/paasta_tools/paastaapi/models/__init__.py @@ -26,6 +26,8 @@ from paasta_tools.paastaapi.model.float_and_error import FloatAndError from paasta_tools.paastaapi.model.hpa_metric import HPAMetric from paasta_tools.paastaapi.model.inline_object import InlineObject +from paasta_tools.paastaapi.model.inline_object1 import InlineObject1 +from paasta_tools.paastaapi.model.inline_object2 import InlineObject2 from paasta_tools.paastaapi.model.inline_response200 import InlineResponse200 from paasta_tools.paastaapi.model.inline_response2001 import InlineResponse2001 from paasta_tools.paastaapi.model.instance_bounce_status import InstanceBounceStatus diff --git a/requirements.txt b/requirements.txt index 9887bf9df6..f8c1a05e93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ boto3-type-annotations==0.3.1 botocore==1.34.22 bravado==10.4.1 bravado-core==5.12.1 -cachetools==2.0.1 +cachetools==5.5.0 certifi==2017.11.5 chardet==3.0.4 choice==0.1 diff --git a/tests/api/tweens/test_auth.py b/tests/api/tweens/test_auth.py new file mode 100644 index 0000000000..21ad97bafd --- /dev/null +++ b/tests/api/tweens/test_auth.py @@ -0,0 +1,95 @@ +# Copyright 2015-2016 Yelp Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +from pyramid.httpexceptions import HTTPForbidden + +from paasta_tools.api.tweens import auth + + +@pytest.fixture +def mock_auth_tween(): + with patch.dict( + os.environ, + { + "PAASTA_API_AUTH_ENFORCE": "1", + "PAASTA_API_AUTH_ENDPOINT": "http://localhost:31337", + }, + ): + with patch("paasta_tools.api.tweens.auth.requests"): + yield auth.AuthTweenFactory(MagicMock(), MagicMock()) + + +def test_call(mock_auth_tween): + mock_request = MagicMock( + path="/something", + method="post", + headers={"Authorization": "Bearer aaa.bbb.ccc"}, + ) + with patch.object(mock_auth_tween, "is_request_authorized") as mock_is_authorized: + mock_is_authorized.return_value = auth.AuthorizationOutcome(True, "Ok") + mock_auth_tween(mock_request) + mock_is_authorized.assert_called_once_with("/something", "aaa.bbb.ccc", "post") + mock_auth_tween.handler.assert_called_once_with(mock_request) + + +def test_call_deny(mock_auth_tween): + mock_request = MagicMock( + path="/something", + method="post", + headers={"Authorization": "Bearer aaa.bbb.ccc"}, + ) + with patch.object(mock_auth_tween, "is_request_authorized") as mock_is_authorized: + mock_is_authorized.return_value = auth.AuthorizationOutcome(False, "Denied") + response = mock_auth_tween(mock_request) + assert isinstance(response, HTTPForbidden) + assert response.headers.get("X-Auth-Failure-Reason") == "Denied" + + +def test_is_request_authorized(mock_auth_tween): + mock_auth_tween.session.post.return_value.json.return_value = { + "result": {"allowed": True, "reason": "User allowed"} + } + assert mock_auth_tween.is_request_authorized( + "/allowed", "aaa.bbb.ccc", "get" + ) == auth.AuthorizationOutcome(True, "User allowed") + mock_auth_tween.session.post.assert_called_once_with( + url="http://localhost:31337", + json={ + "input": { + "path": "/allowed", + "backend": "paasta", + "token": "aaa.bbb.ccc", + "method": "get", + } + }, + timeout=2, + ) + + +def test_is_request_authorized_fail(mock_auth_tween): + mock_auth_tween.session.post.side_effect = Exception + assert mock_auth_tween.is_request_authorized( + "/allowed", "eee.ddd.fff", "get" + ) == auth.AuthorizationOutcome(False, "Auth backend error") + + +def test_is_request_authorized_malformed(mock_auth_tween): + mock_auth_tween.session.post.return_value.json.return_value = {"foo": "bar"} + assert mock_auth_tween.is_request_authorized( + "/allowed", "eee.ddd.fff", "post" + ) == auth.AuthorizationOutcome(False, "Malformed auth response")