Skip to content

Commit

Permalink
(feat) Add masking to python middleware (#955)
Browse files Browse the repository at this point in the history
| 🚥 Resolves RM-8742 |
| :------------------- |

## 🧰 Changes

Adds sensitive data masking to the Python SDK

## 🧬 QA & Testing

Updated and added tests as applicable

---------

Co-authored-by: Kenny Hoxworth <[email protected]>
Co-authored-by: Bill Mill <[email protected]>
Co-authored-by: Andrii Andreiev <[email protected]>
  • Loading branch information
4 people authored Sep 18, 2024
1 parent 69bc17f commit b24b2d8
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 16 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ test-webhooks-php-laravel: ## Run webhooks tests against the PHP SDK + Laravel

test-metrics-python-django: ## Run Metrics tests against the Python SDK + Django
docker compose up --build --detach integration_metrics_python_django
npm run test:integration-metrics || make cleanup-failure
SUPPORTS_HASHING=true npm run test:integration-metrics || make cleanup-failure
@make cleanup

test-metrics-python-flask: ## Run Metrics tests against the Python SDK + Flask
docker compose up --build --detach integration_python_flask_metrics
npm run test:integration-metrics || make cleanup-failure
SUPPORTS_HASHING=true npm run test:integration-metrics || make cleanup-failure
@make cleanup

test-webhooks-python-flask: ## Run webhooks tests against the Python SDK + Flask
docker compose up --build --detach integration_python_flask_webhooks
npm run test:integration-webhooks || make cleanup-failure
SUPPORTS_HASHING=true npm run test:integration-webhooks || make cleanup-failure
@make cleanup

##
Expand Down
1 change: 1 addition & 0 deletions packages/python/readme_metrics/Metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def process(self, request, response: ResponseInfoWrapper) -> None:
)
if self.config.IS_DEVELOPMENT_MODE:
print(traceback.format_exc())
return

self.queue.put(payload)
if self.queue.qsize() >= self.config.BUFFER_LENGTH:
Expand Down
24 changes: 15 additions & 9 deletions packages/python/readme_metrics/PayloadBuilder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections.abc import Mapping
import importlib
import json
from json import JSONDecodeError
Expand All @@ -8,7 +7,8 @@

from typing import List, Optional
from urllib import parse
from readme_metrics import ResponseInfoWrapper
from .ResponseInfoWrapper import ResponseInfoWrapper
from .util import mask


class QueryNotFound(Exception):
Expand Down Expand Up @@ -135,6 +135,9 @@ def _validate_group(self, group: Optional[dict]):
)
return None

# Mask the id / api_key
group["id"] = mask(group["id"])

for field in ["email", "label"]:
if field not in group:
self.logger.warning(
Expand Down Expand Up @@ -167,6 +170,7 @@ def _build_request_payload(self, request) -> dict:
dict: Wrapped request payload
"""
headers = self.redact_dict(request.headers)

queryString = parse.parse_qsl(self._get_query_string(request))

content_type = self._get_content_type(headers)
Expand Down Expand Up @@ -205,6 +209,11 @@ def _build_request_payload(self, request) -> dict:
else:
post_data = self._process_body(content_type, request.rm_body)

headers = dict(headers)

if "Authorization" in headers:
headers["Authorization"] = mask(headers["Authorization"])

payload = {
"method": request.method,
"url": self._build_base_url(request),
Expand Down Expand Up @@ -357,15 +366,11 @@ def _process_body(self, content_type, body):

return {"mimeType": content_type, "text": body}

def redact_dict(self, mapping: Mapping):
def redact_dict(self, mapping: dict) -> dict:
def _redact_value(val):
if isinstance(val, str):
return f"[REDACTED {len(val)}]"

return "[REDACTED]"
return f"[REDACTED {len(val)}]" if isinstance(val, str) else "[REDACTED]"

# Short-circuit this function if there's no allowlist or denylist
if not (self.allowlist or self.denylist):
if not self.allowlist and not self.denylist:
return mapping

result = {}
Expand All @@ -376,4 +381,5 @@ def _redact_value(val):
result[key] = _redact_value(value)
else:
result[key] = value

return result
2 changes: 1 addition & 1 deletion packages/python/readme_metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from readme_metrics.MetricsApiConfig import MetricsApiConfig
from readme_metrics.MetricsMiddleware import MetricsMiddleware

__version__ = "3.1.0"
__version__ = "3.2.0"
71 changes: 68 additions & 3 deletions packages/python/readme_metrics/tests/PayloadBuilder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from readme_metrics import MetricsApiConfig
from readme_metrics import MetricsMiddleware
from readme_metrics.PayloadBuilder import PayloadBuilder
from readme_metrics.util import mask
from .fixtures import Environ


Expand All @@ -27,6 +28,15 @@ def mockStartResponse(self, status, headers):
self.headers = headers


class TestUtils:
def testMask(self):
# pylint: disable=line-too-long
assert (
mask("deadbeef")
== "sha512-ETo7x4PYUfwDcyFLGep76fo95UHsuf4CbVLGA+jqGcF0zA6XBfi5DTEiEsDDpthFPd+z4xQUCc9L7cjvAzWQtA==?beef"
)


class TestPayloadBuilder:
"""
Unit Tests for Payload Builder to make sure the created payloads are created
Expand Down Expand Up @@ -183,7 +193,37 @@ def test_grouping_function(self):
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))
group = data["group"]

assert group["id"] == "spam"
assert group["id"] == mask("spam")
assert group["email"] == "[email protected]"
assert group["label"] == "Spam Musubi"

def test_mask_matches_node(self):
config = self.mockMiddlewareConfig(
grouping_function=lambda req: {
"api_key": "Bearer: a-random-api-key",
"email": "[email protected]",
"label": "Spam Musubi",
}
)

environ = Environ.MockEnviron().getEnvironForRequest(b"", "POST")
app = MockApplication("{ 'responseObject': 'value' }")

metrics = MetricsCoreMock()
middleware = MetricsMiddleware(app, config)
middleware.metrics_core = metrics

next(middleware(environ, app.mockStartResponse))

payload = self.createPayload(config)
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))
group = data["group"]

assert (
group["id"]
== "sha512-7S+L0vUE8Fn6HI3836rtz4b6fVf6H4JFur6SGkOnL3b"
+ "FpC856+OSZkpIHphZ0ipNO+kUw1ePb5df2iYrNQCpXw==?-key"
)
assert group["email"] == "[email protected]"
assert group["label"] == "Spam Musubi"

Expand Down Expand Up @@ -228,7 +268,7 @@ def test_deprecated_id_grouping(self):
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))
group = data["group"]

assert group["id"] == "spam"
assert group["id"] == mask("spam")
assert group["email"] == "[email protected]"
assert group["label"] == "Spam Musubi"

Expand Down Expand Up @@ -282,7 +322,7 @@ def test_grouping_function_validation_when_extra_fields_present(self):
assert isinstance(data, dict)
assert "group" in data
assert data["group"] == {
"id": "spam",
"id": mask("spam"),
"email": "[email protected]",
"label": "Spam Musubi",
}
Expand Down Expand Up @@ -311,6 +351,8 @@ def test_uuid_generation(self):
payload = self.createPayload(config)
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))

print(data["_id"])

assert data["_id"] == str(uuid.UUID(data["_id"], version=4))

def test_har_creator(self):
Expand All @@ -337,3 +379,26 @@ def test_har_creator(self):

assert post_data["text"] == json.dumps({"ok": 123, "password": 456})
assert post_data["mimeType"] == "text/plain"

def test_auth_header_masked(self):
config = self.mockMiddlewareConfig(development_mode=True)
environ = Environ.MockEnviron().getEnvironForRequest(b"", "GET")
app = MockApplication("{ 'responseObject': 'value' }")

metrics = MetricsCoreMock()
middleware = MetricsMiddleware(app, config)
middleware.metrics_core = metrics

next(middleware(environ, app.mockStartResponse))

payload = self.createPayload(config)
data = payload(metrics.req, metrics.res, str(uuid.uuid4()))

auth_header = None

# Pull out the auth header
for header in data["request"]["log"]["entries"][0]["request"]["headers"]:
if header["name"] == "Authorization":
auth_header = header["value"]

assert auth_header and auth_header == mask(environ["HTTP_AUTHORIZATION"])
1 change: 1 addition & 0 deletions packages/python/readme_metrics/tests/fixtures/Environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ def getEnvironForRequest(self, jsonByteString, httpRequestMethod):
stream = io.BytesIO(jsonByteString)
environ["wsgi.input"] = wsgi.LimitedStream(stream, contentLength)
environ["CONTENT_LENGTH"] = str(contentLength)
environ["HTTP_AUTHORIZATION"] = "Bearer: a-random-api-key"
return environ
10 changes: 10 additions & 0 deletions packages/python/readme_metrics/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import logging
import hashlib
import base64
from logging import Formatter, StreamHandler


Expand All @@ -10,3 +12,11 @@ def util_build_logger():
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger


def mask(data):
m = hashlib.sha512()
m.update(data.encode("utf8"))
digest = base64.b64encode(m.digest()).decode("utf8")
opts = data[-4:]
return f"sha512-{digest}?{opts}"

0 comments on commit b24b2d8

Please sign in to comment.