Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/add-a-health-endpoint #35

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions docker/manage
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
export MSYS_NO_PATHCONV=1
# getDockerHost; for details refer to https://github.com/bcgov/DITP-DevOps/tree/main/code/snippets#getdockerhost
. /dev/stdin <<<"$(cat <(curl -s --raw https://raw.githubusercontent.com/bcgov/DITP-DevOps/main/code/snippets/getDockerHost))"
. /dev/stdin <<<"$(cat <(curl -s --raw https://raw.githubusercontent.com/bcgov/DITP-DevOps/main/code/snippets/getDockerHost))"
export DOCKERHOST=$(getDockerHost)
set -e

Expand Down Expand Up @@ -57,7 +57,7 @@ function logs() {
while getopts ":f-:" FLAG; do
case $FLAG in
f ) local _force=1 ;;
- )
- )
case ${OPTARG} in
"no-tail"*) no_tail=1
;;
Expand Down Expand Up @@ -88,15 +88,15 @@ build)
;;
start|up)
exportEnvironment "$@"
docker-compose up -d ngrok-tails-server tails-server
docker-compose up --build --force-recreate -d ngrok-tails-server tails-server
logs
echo "Run './manage logs' for logs"
echo "Run './manage logs' for logs"
;;
test)
exportEnvironment "$@"
docker-compose up -d ngrok-tails-server tails-server
docker-compose up --build --force-recreate -d ngrok-tails-server tails-server
docker-compose run tester --genesis-url $GENESIS_URL --tails-server-url $TAILS_SERVER_URL
# docker-compose down
# docker-compose down --volumes --remove-orphans
;;
logs)
docker-compose logs -f
Expand All @@ -105,11 +105,11 @@ stop)
docker-compose stop
;;
down|rm)
docker-compose down
docker-compose down --volumes --remove-orphans
;;
*)
usage
;;
esac

popd >/dev/null
popd >/dev/null
248 changes: 248 additions & 0 deletions tails_server/health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
import asyncio
import imp
import json
import os
import logging
import socket
import sys
import time
import traceback
from aiohttp import web


try:
from functools import reduce
except Exception:
pass


def basic_exception_handler(_, e):
return False, str(e)


def json_success_handler(results):
data = {
'hostname': socket.gethostname(),
'status': 'success',
'timestamp': time.time(),
'results': results,
}

return json.dumps(data)


def json_failed_handler(results):
data = {
'hostname': socket.gethostname(),
'status': 'failure',
'timestamp': time.time(),
'results': results,
}

return json.dumps(data)


def check_reduce(passed, result):
return passed and result.get('passed')


class Check(object):
def __init__(self, success_status=200, success_headers=None,
success_handler=json_success_handler, success_ttl=None,
failed_status=500, failed_headers=None,
failed_handler=json_failed_handler, failed_ttl=None,
exception_handler=basic_exception_handler, checkers=None,
logger=None, **options):
self.cache = dict()

self.success_status = success_status
self.success_headers = success_headers or {'Content-Type': 'application/json'}
self.success_handler = success_handler
self.success_ttl = float(success_ttl or 0)

self.failed_status = failed_status
self.failed_headers = failed_headers or {'Content-Type': 'application/json'}
self.failed_handler = failed_handler
self.failed_ttl = float(failed_ttl or 0)

self.exception_handler = exception_handler

self.options = options
self.checkers = checkers or []

self.logger = logger
if not self.logger:
self.logger = logging.getLogger('HealthCheck')

@asyncio.coroutine
def __call__(self, request):
message, status, headers = yield from self.check()
return web.Response(text=message, status=status, headers=headers)

def add_check(self, func):
if not asyncio.iscoroutinefunction(func):
func = asyncio.coroutine(func)

self.checkers.append(func)

@asyncio.coroutine
def run_check(self, checker):
try:
passed, output = yield from checker()
except Exception:
traceback.print_exc()
e = sys.exc_info()[0]
self.logger.exception(e)
passed, output = self.exception_handler(checker, e)

if not passed:
msg = 'Health check "{}" failed with output "{}"'.format(checker.__name__, output)
self.logger.error(msg)

timestamp = time.time()
if passed:
expires = timestamp + self.success_ttl
else:
expires = timestamp + self.failed_ttl

result = {'checker': checker.__name__,
'output': output,
'passed': passed,
'timestamp': timestamp,
'expires': expires}
return result

@asyncio.coroutine
def check(self):
results = []
for checker in self.checkers:
if checker in self.cache and self.cache[checker].get('expires') >= time.time():
result = self.cache[checker]
else:
result = yield from self.run_check(checker)
self.cache[checker] = result
results.append(result)

passed = reduce(check_reduce, results, True)

if passed:
message = "OK"
if self.success_handler:
message = self.success_handler(results)

return message, self.success_status, self.success_headers
else:
message = "NOT OK"
if self.failed_handler:
message = self.failed_handler(results)

return message, self.failed_status, self.failed_headers


class EnvDump(object):
def __init__(self,
include_os=False,
include_python=False,
include_process=False):

self.functions = {}

if include_os:
self.functions['os'] = self.get_os
if include_python:
self.functions['python'] = self.get_python
if include_process:
self.functions['process'] = self.get_process

@asyncio.coroutine
def __call__(self, request):
data = yield from self.dump_environment(request)
return web.json_response(data)

@asyncio.coroutine
def dump_environment(self, request):
data = {}
data['storage'] = yield from self.get_storage_info(request)

for name, func in self.functions.items():
data[name] = yield from func()

return data

@asyncio.coroutine
def get_os(self):
return {'platform': sys.platform,
'name': os.name,
'uname': os.uname()}

@asyncio.coroutine
def get_python(self):
result = {'version': sys.version,
'executable': sys.executable,
'pythonpath': sys.path,
'version_info': {'major': sys.version_info.major,
'minor': sys.version_info.minor,
'micro': sys.version_info.micro,
'releaselevel': sys.version_info.releaselevel,
'serial': sys.version_info.serial}}
if imp.find_module('pkg_resources'):
import pkg_resources
packages = dict([(p.project_name, p.version) for p in pkg_resources.working_set])
result['packages'] = packages

return result

@asyncio.coroutine
def get_login(self):
# Based on https://github.com/gitpython-developers/GitPython/pull/43/
# Fix for 'Inappropriate ioctl for device' on posix systems.
if os.name == "posix":
import pwd
username = pwd.getpwuid(os.geteuid()).pw_name
else:
username = os.environ.get('USER', os.environ.get('USERNAME', 'UNKNOWN'))
if username == 'UNKNOWN' and hasattr(os, 'getlogin'):
username = os.getlogin()
return username

@asyncio.coroutine
def get_process(self):
return {'argv': sys.argv,
'cwd': os.getcwd(),
'user': (yield from self.get_login()),
'pid': os.getpid(),
'environ': self.safe_dump(os.environ)}

@asyncio.coroutine
def get_storage_info(self, request):
storage_path = request.app["settings"]["storage_path"]
dir_count = 0
file_count = 0
total = 0
with os.scandir(storage_path) as it:
for entry in it:
if entry.is_file():
file_count += 1
total += entry.stat().st_size
elif entry.is_dir():
dir_count += 1
total += get_dir_size(entry.path)

return {'number_of_files': file_count,
'number_of_directories': dir_count,
'used_space': total}

@staticmethod
def safe_dump(dictionary):
result = {}
for key in dictionary.keys():
if 'key' in key.lower() or 'token' in key.lower() or 'pass' in key.lower():
# Try to avoid listing passwords and access tokens or keys in the output
result[key] = "********"
else:
try:
json.dumps(dictionary[key])
result[key] = dictionary[key]
except TypeError:
pass
return result
22 changes: 21 additions & 1 deletion tails_server/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
BadGenesisError,
BadRevocationRegistryIdError,
)
from .health import Check, EnvDump

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -107,7 +108,7 @@ async def put_file(request):
text="Second field in multipart request must have name 'tails'"
)

# Process the file in chunks so we don't explode on large files.
# Process the file in chunks, so we don't explode on large files.
# Construct hash and write file in chunks.
sha256 = hashlib.sha256()
try:
Expand Down Expand Up @@ -146,13 +147,32 @@ async def put_file(request):
return web.Response(text=tails_hash)


def custom_check(): # An example of custom check to be enacted as part of the "/health/check"
if 1 + 1 == 2:
return True, "It works!"
else:
return False, "It doesn't work!!! :("


def start(settings):
app = web.Application()
app["settings"] = settings

# Add routes
app.add_routes(routes)

# To avoid putting too much strain on backend services, health check results can be cached in process memory.
# By default, they are set to None, so we need to set them to a specific time intervals for the cache to function
check = Check(success_ttl=30, failed_ttl=10)
# EnvDump at minimum will return storage statistics, while OS/Python/process can be toggled on/off
env_dump = EnvDump(include_os=True, include_python=True, include_process=True)

app.router.add_get("/health/check", check)
app.router.add_get("/health/env", env_dump)

# To enable extensibility, we can use `add_check` to inject our own custom check method (example above)
check.add_check(custom_check)

web.run_app(
app,
host=settings.get("host") or DEFAULT_WEB_HOST,
Expand Down