Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat) Add masking to python middleware #955

Merged
merged 15 commits into from
Sep 18, 2024
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ test-webhooks-php-laravel: ## Run webhooks tests against the PHP SDK + Laravel

test-metrics-python-django: ## Run Metrics tests against the Python SDK + Django
docker compose up --build --detach integration_metrics_python_django
npm run test:integration-metrics || make cleanup-failure
SUPPORTS_HASHING=true npm run test:integration-metrics || make cleanup-failure
@make cleanup

test-metrics-python-flask: ## Run Metrics tests against the Python SDK + Flask
docker compose up --build --detach integration_python_flask_metrics
npm run test:integration-metrics || make cleanup-failure
SUPPORTS_HASHING=true npm run test:integration-metrics || make cleanup-failure
@make cleanup

test-webhooks-python-flask: ## Run webhooks tests against the Python SDK + Flask
docker compose up --build --detach integration_python_flask_webhooks
npm run test:integration-webhooks || make cleanup-failure
SUPPORTS_HASHING=true npm run test:integration-webhooks || make cleanup-failure
@make cleanup

##
Expand Down
1 change: 1 addition & 0 deletions packages/python/readme_metrics/Metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def process(self, request, response: ResponseInfoWrapper) -> None:
)
if self.config.IS_DEVELOPMENT_MODE:
print(traceback.format_exc())
return

self.queue.put(payload)
if self.queue.qsize() >= self.config.BUFFER_LENGTH:
Expand Down
24 changes: 15 additions & 9 deletions packages/python/readme_metrics/PayloadBuilder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections.abc import Mapping
import importlib
import json
from json import JSONDecodeError
Expand All @@ -8,7 +7,8 @@

from typing import List, Optional
from urllib import parse
from readme_metrics import ResponseInfoWrapper
from .ResponseInfoWrapper import ResponseInfoWrapper
from .util import mask


class QueryNotFound(Exception):
Expand Down Expand Up @@ -135,6 +135,9 @@ def _validate_group(self, group: Optional[dict]):
)
return None

# Mask the id / api_key
group["id"] = mask(group["id"])

for field in ["email", "label"]:
if field not in group:
self.logger.warning(
Expand Down Expand Up @@ -167,6 +170,7 @@ def _build_request_payload(self, request) -> dict:
dict: Wrapped request payload
"""
headers = self.redact_dict(request.headers)

queryString = parse.parse_qsl(self._get_query_string(request))

content_type = self._get_content_type(headers)
Expand Down Expand Up @@ -216,6 +220,11 @@ def _build_request_payload(self, request) -> dict:
"bodySize": -1,
}

if "Authorization" in headers:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at this point, if there is an Authorization header, won't there already be an unmasked Authorization header in payload["headers"]?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be my expectation too - does .append overwrite the existing header here, or do we need to forcefully delete it?

payload["headers"].append(
{"name": "Authorization", "value": mask(headers["Authorization"])}
)

if not post_data is False:
payload["postData"] = post_data

Expand Down Expand Up @@ -357,15 +366,11 @@ def _process_body(self, content_type, body):

return {"mimeType": content_type, "text": body}

def redact_dict(self, mapping: Mapping):
def redact_dict(self, mapping: dict) -> dict:
def _redact_value(val):
if isinstance(val, str):
return f"[REDACTED {len(val)}]"

return "[REDACTED]"
return f"[REDACTED {len(val)}]" if isinstance(val, str) else "[REDACTED]"

# Short-circuit this function if there's no allowlist or denylist
if not (self.allowlist or self.denylist):
if not self.allowlist and not self.denylist:
return mapping

result = {}
Expand All @@ -376,4 +381,5 @@ def _redact_value(val):
result[key] = _redact_value(value)
else:
result[key] = value

return result
2 changes: 1 addition & 1 deletion packages/python/readme_metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from readme_metrics.MetricsApiConfig import MetricsApiConfig
from readme_metrics.MetricsMiddleware import MetricsMiddleware

__version__ = "3.1.0"
__version__ = "3.2.0"
71 changes: 68 additions & 3 deletions packages/python/readme_metrics/tests/PayloadBuilder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from readme_metrics import MetricsApiConfig
from readme_metrics import MetricsMiddleware
from readme_metrics.PayloadBuilder import PayloadBuilder
from readme_metrics.util import mask
from .fixtures import Environ


Expand All @@ -27,6 +28,15 @@ def mockStartResponse(self, status, headers):
self.headers = headers


class TestUtils:
def testMask(self):
# pylint: disable=line-too-long
assert (
mask("deadbeef")
== "sha512-ETo7x4PYUfwDcyFLGep76fo95UHsuf4CbVLGA+jqGcF0zA6XBfi5DTEiEsDDpthFPd+z4xQUCc9L7cjvAzWQtA==?beef"
)


class TestPayloadBuilder:
"""
Unit Tests for Payload Builder to make sure the created payloads are created
Expand Down Expand Up @@ -183,7 +193,37 @@ def test_grouping_function(self):
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))
group = data["group"]

assert group["id"] == "spam"
assert group["id"] == mask("spam")
assert group["email"] == "[email protected]"
assert group["label"] == "Spam Musubi"

def test_mask_matches_node(self):
config = self.mockMiddlewareConfig(
grouping_function=lambda req: {
"api_key": "Bearer: a-random-api-key",
"email": "[email protected]",
"label": "Spam Musubi",
}
)

environ = Environ.MockEnviron().getEnvironForRequest(b"", "POST")
app = MockApplication("{ 'responseObject': 'value' }")

metrics = MetricsCoreMock()
middleware = MetricsMiddleware(app, config)
middleware.metrics_core = metrics

next(middleware(environ, app.mockStartResponse))

payload = self.createPayload(config)
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))
group = data["group"]

assert (
group["id"]
== "sha512-7S+L0vUE8Fn6HI3836rtz4b6fVf6H4JFur6SGkOnL3b"
+ "FpC856+OSZkpIHphZ0ipNO+kUw1ePb5df2iYrNQCpXw==?-key"
)
assert group["email"] == "[email protected]"
assert group["label"] == "Spam Musubi"

Expand Down Expand Up @@ -228,7 +268,7 @@ def test_deprecated_id_grouping(self):
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))
group = data["group"]

assert group["id"] == "spam"
assert group["id"] == mask("spam")
assert group["email"] == "[email protected]"
assert group["label"] == "Spam Musubi"

Expand Down Expand Up @@ -282,7 +322,7 @@ def test_grouping_function_validation_when_extra_fields_present(self):
assert isinstance(data, dict)
assert "group" in data
assert data["group"] == {
"id": "spam",
"id": mask("spam"),
"email": "[email protected]",
"label": "Spam Musubi",
}
Expand Down Expand Up @@ -311,6 +351,8 @@ def test_uuid_generation(self):
payload = self.createPayload(config)
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))

print(data["_id"])

assert data["_id"] == str(uuid.UUID(data["_id"], version=4))

def test_har_creator(self):
Expand All @@ -337,3 +379,26 @@ def test_har_creator(self):

assert post_data["text"] == json.dumps({"ok": 123, "password": 456})
assert post_data["mimeType"] == "text/plain"

def test_auth_header_masked(self):
config = self.mockMiddlewareConfig(development_mode=True)
environ = Environ.MockEnviron().getEnvironForRequest(b"", "GET")
app = MockApplication("{ 'responseObject': 'value' }")

metrics = MetricsCoreMock()
middleware = MetricsMiddleware(app, config)
middleware.metrics_core = metrics

next(middleware(environ, app.mockStartResponse))

payload = self.createPayload(config)
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))

auth_header = None

# Pull out the auth header
for header in data["request"]["log"]["entries"][0]["request"]["headers"]:
if header["name"] == "Authorization":
auth_header = header["value"]

assert auth_header and auth_header == mask(environ["HTTP_AUTHORIZATION"])
1 change: 1 addition & 0 deletions packages/python/readme_metrics/tests/fixtures/Environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ def getEnvironForRequest(self, jsonByteString, httpRequestMethod):
stream = io.BytesIO(jsonByteString)
environ["wsgi.input"] = wsgi.LimitedStream(stream, contentLength)
environ["CONTENT_LENGTH"] = str(contentLength)
environ["HTTP_AUTHORIZATION"] = "Bearer: a-random-api-key"
return environ
10 changes: 10 additions & 0 deletions packages/python/readme_metrics/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import logging
import hashlib
import base64
from logging import Formatter, StreamHandler


Expand All @@ -10,3 +12,11 @@ def util_build_logger():
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger


def mask(data):
m = hashlib.sha512()
m.update(data.encode("utf8"))
digest = base64.b64encode(m.digest()).decode("utf8")
opts = data[-4:]
return f"sha512-{digest}?{opts}"