diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8a601f0..74d6e57 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,6 +14,12 @@ Change Log Unreleased ~~~~~~~~~~ +[3.7.0] - 2024-08-02 +~~~~~~~~~~~~~~~~~~~~ +Added +----- +* ``DatadogDiagnosticMiddleware`` can now detect and log anomalous traces, enabled by Waffle flag ``datadog.diagnostics.detect_anomalous_trace`` + [3.6.0] - 2024-07-24 ~~~~~~~~~~~~~~~~~~~~ Added diff --git a/edx_arch_experiments/__init__.py b/edx_arch_experiments/__init__.py index ebdb6b8..6ab9526 100644 --- a/edx_arch_experiments/__init__.py +++ b/edx_arch_experiments/__init__.py @@ -2,4 +2,4 @@ A plugin to include applications under development by the architecture team at 2U. """ -__version__ = '3.6.0' +__version__ = '3.7.0' diff --git a/edx_arch_experiments/datadog_diagnostics/README.rst b/edx_arch_experiments/datadog_diagnostics/README.rst index c51dd6d..364ad6d 100644 --- a/edx_arch_experiments/datadog_diagnostics/README.rst +++ b/edx_arch_experiments/datadog_diagnostics/README.rst @@ -4,10 +4,3 @@ Datadog Diagnostics When installed in the LMS as a plugin app, the ``datadog_diagnostics`` app adds additional logging for debugging our Datadog integration. This is intended as a temporary situation while we debug the `trace concatenation issue `_. - -Usage -***** - -In LMS: - -- Install ``edx-arch-experiments`` as a dependency diff --git a/edx_arch_experiments/datadog_diagnostics/middleware.py b/edx_arch_experiments/datadog_diagnostics/middleware.py index ae3ffed..844695c 100644 --- a/edx_arch_experiments/datadog_diagnostics/middleware.py +++ b/edx_arch_experiments/datadog_diagnostics/middleware.py @@ -1,14 +1,31 @@ """ Diagnostic middleware for Datadog. + +To use, install edx-arch-experiments and add +``edx_arch_experiments.datadog_diagnostics.middleware.DatadogDiagnosticMiddleware`` +to ``MIDDLEWARE``, then set the below settings as needed. """ import logging +import time from django.core.exceptions import MiddlewareNotUsed from edx_toggles.toggles import WaffleFlag log = logging.getLogger(__name__) +# .. toggle_name: datadog.diagnostics.detect_anomalous_trace +# .. toggle_implementation: WaffleFlag +# .. toggle_default: False +# .. toggle_description: Enables logging of anomalous Datadog traces for web requests. +# .. toggle_warning: This is a noisy feature and so it should only be enabled +# for a percentage of requests. +# .. toggle_use_cases: temporary +# .. toggle_creation_date: 2024-08-01 +# .. toggle_target_removal_date: 2024-11-01 +# .. toggle_tickets: https://github.com/edx/edx-arch-experiments/issues/692 +DETECT_ANOMALOUS_TRACE = WaffleFlag('datadog.diagnostics.detect_anomalous_trace', module_name=__name__) + # .. toggle_name: datadog.diagnostics.log_root_span # .. toggle_implementation: WaffleFlag # .. toggle_default: False @@ -43,6 +60,8 @@ def __init__(self, get_response): # If import fails, don't even load this middleware. raise MiddlewareNotUsed # pylint: disable=raise-missing-from + self.worker_start_epoch = time.time() + def __call__(self, request): return self.get_response(request) @@ -63,9 +82,21 @@ def log_diagnostics(self, request): """ Contains all the actual logging logic. """ + local_root_span = self.dd_tracer.current_root_span() + + if DETECT_ANOMALOUS_TRACE.is_enabled(): + root_duration_s = local_root_span.duration + if root_duration_s is not None: + worker_run_time_s = time.time() - self.worker_start_epoch + log.warning( + f"Anomalous Datadog local root span (duration already set): " + f"id = {local_root_span.trace_id:x}; " + f"duration = {root_duration_s:0.3f} sec; " + f"worker age = {worker_run_time_s:0.3f} sec" + ) + if LOG_ROOT_SPAN.is_enabled(): route_pattern = getattr(request.resolver_match, 'route', None) - local_root_span = self.dd_tracer.current_root_span() current_span = self.dd_tracer.current_span() # pylint: disable=protected-access log.info(