spandex-project · reachfh · Aug 9, 2022 · Aug 9, 2022 · Aug 9, 2022 · Aug 9, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -5,8 +5,9 @@ version: 2
 jobs:
   build:
     docker:
-      # specify the version here
-      - image: circleci/elixir:1.7
+      - image: cimg/elixir:1.11
+        environment:
+          MIX_ENV: test
 
       # Specify service dependencies here if necessary
       # CircleCI maintains a library of pre-built images
@@ -16,12 +17,12 @@ jobs:
     working_directory: ~/spandex
     steps:
       - checkout
-
-      # specify any bash command here prefixed with `run: `
-      - run: mix format --check-formatted
       - run: mix local.hex --force
       - run: mix local.rebar --force
       - run: mix deps.get
+      - run: mix format --check-formatted
       - run: mix compile --warnings-as-errors
-      - run: mix coveralls.circle
+      # (ExCoveralls.ReportUploadError) Failed to upload the report to 'https://coveralls.io' (reason: status_code = 422, body = {"message":"Couldn't find a repository matching this job.","error":true}).
+      # - run: mix coveralls.circle
+      - run: mix coveralls
       - run: mix inch.report
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,65 @@
+---
+name: CI
+on: push
+
+jobs:
+  elixir:
+    name: Elixir Tests
+    runs-on: ubuntu-20.04
+    env:
+      MIX_ENV: test
+    strategy:
+      matrix:
+        elixir: ['1.11.4', '1.13.4']
+        otp: ['24.3']
+    steps:
+      - name: Cancel previous runs
+        uses: styfle/[email protected]
+        with:
+          access_token: ${{ github.token }}
+
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Setup Elixir
+        uses: erlef/setup-beam@v1
+        with:
+          otp-version: ${{ matrix.otp }}
+          elixir-version: ${{ matrix.elixir }}
+
+      - name: Get deps cache
+        uses: actions/cache@v2
+        with:
+          path: deps/
+          key: deps-${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }}
+
+      - name: Get build cache
+        uses: actions/cache@v2
+        with:
+          path: _build/test/
+          key: build-${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }}
+
+      - name: Install deps
+        run: |
+          mix local.rebar --force
+          mix local.hex --force
+          mix deps.get
+
+      - name: Compile code
+        run: |
+          # Use of @deprecated causes compile warnings
+          # mix compile --warnings-as-errors
+          mix compile
+
+      - name: Run tests
+        run: |
+          mix test
+          mix format --check-formatted
+          mix coveralls
+          mix inch.report
+
+      # - name: Publish unit test results to GitHub
+      #   uses: EnricoMi/publish-unit-test-result-action@v2
+      #   if: always() # always run even if tests fail
+      #   with:
+      #     junit_files: _build/test/lib/*/test-junit-report.xml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,18 @@ See [Conventional Commits](Https://conventionalcommits.org) for commit guideline
 
 <!-- changelog -->
 
+### Breaking Changes:
+
+* Don't set default priority to support distributed traces and sampling
+* Use new `Config` instead of `Mix.Config` in `config` files
+* Update libraries
+
+### Bug Fixes:
+* Apply fix for interpolated string span and trace names from PR #136
+
+### Features:
+* Add functions to get and set trace priority
+
 ## [3.1.0](https://github.com/spandex-project/spandex/compare/3.0.3...3.1.0) (2021-10-23)
 
 * Encode logger metadata as string. by @aselder in https://github.com/spandex-project/spandex/pull/127

diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ monitoring tool that allows you get extremely granular information about the
 runtime of your system. Using distributed tracing, you can also get a view of
 how requests make their way through your entire ecosystem of microservices or
 applications. Currently, Spandex only supports integrating with
-[datadog](https://www.datadoghq.com/), but it is built to be agnostic to what
+[Datadog](https://www.datadoghq.com/), but it is built to be agnostic to what
 platform you choose to view your trace data. Eventually it should support Open
 Zipkin, Stackdriver, and any other trace viewer/aggregation tool you'd like to
 integrate with. We are still under active development, working on moving to a
@@ -38,7 +38,7 @@ This is Datadog-specific since that's currently the only adapter.
 ## Adapters
 
 * [Datadog](https://github.com/spandex-project/spandex_datadog)
-* Thats it so far! If you want another adapter, it should be relatively easy to
+* That's it so far! If you want another adapter, it should be relatively easy to
   write! This library is in charge of handling the state management of spans,
   and the adapter is just in charge of generating certain values and ultimately
   sending the values to the service.
@@ -282,3 +282,54 @@ Check out [spandex_ecto](https://github.com/spandex-project/spandex_ecto).
 ## Phoenix Tracing
 
 Check out [spandex_phoenix](https://github.com/spandex-project/spandex_phoenix).
+
+## Sampling and Rate Limiting
+
+When the load or cost from tracing increases, it is useful to use rate limiting
+or sampling to reduce tracing. When many traces are the same, it's enough to
+trace only e.g. 10% of them, reducing the bill by 90% while still preserving
+the ability to troubleshoot the system. The tracing still happens, but it
+may not be sent to the monitoring service, or the service may drop it or not
+retain detailed information.
+
+Spandex stores the `priority` as an integer in the top level `Trace`.
+
+In Datadog, there are four values:
+* `MANUAL_KEEP`(2) indicates that the application wants to ensure that a trace is
+  sampled, e.g. if there is an error
+* `AUTO_KEEP` (1) indicates that a trace has been selected for sampling
+* `AUTO_REJECT` (0) indicates that the trace has not been selected for sampling
+* `MANUAL_REJECT` (-1) indicates that the application wants a trace to be dropped
+
+Similarly, OpenTracing uses 0 and 1 to indicate that a trace is sampled.
+
+In distributed tracing, multiple processes contribute to the same trace.  When
+sampling, the process that starts the trace can make a decision about whether
+it should be sampled. It then passes that information to downstream processes
+via an HTTP header.
+
+A trace may be sampled out, i.e. priority of 0, but the application can
+override the priority manually. This is usually done for requests with errors,
+as they are the ones that need troubleshooting. You can also enable tracing
+dynamically with a feature flag to debug a feature in production.
+
+Spandex has functions to read and set the priority
+(`Spandex.Tracer.current_priority/1` and `Spandex.Tracer.update_priority/2`).
+
+The following code overrides the `span_error` function on the tracer to set the
+priority to `MANUAL_KEEP` when there is an error on a trace:
+
+```elixir
+defmodule Foo.Tracer do
+  use Spandex.Tracer, otp_app: :foo
+
+  @impl Spandex.Tracer
+  def span_error(error, stacktrace, opts) do
+    super(error, stacktrace, opts)
+    __MODULE__.update_priority(2)
+  end
+```
+
+The specific details of priority and other sampling and rate limiting are specific
+to the observability back end, so look to e.g.
+[spandex_datadog](https://github.com/spandex-project/spandex_datadog) for details.
diff --git a/config/config.exs b/config/config.exs
@@ -1,5 +1,12 @@
 # This file is responsible for configuring your application
-# and its dependencies with the aid of the Mix.Config module.
-use Mix.Config
+# and its dependencies with the aid of the Config module.
+#
+# This configuration file is loaded before any dependency and
+# is restricted to this project.
 
-import_config "#{Mix.env()}.exs"
+# General application configuration
+import Config
+
+# Import environment specific config. This must remain at the bottom
+# of this file so it overrides the configuration defined above.
+import_config "#{config_env()}.exs"
diff --git a/config/dev.exs b/config/dev.exs
@@ -1,6 +1,4 @@
-# This file is responsible for configuring your application
-# and its dependencies with the aid of the Mix.Config module.
-use Mix.Config
+import Config
 
 config :git_ops,
   mix_project: Spandex.Mixfile,

diff --git a/config/test.exs b/config/test.exs
@@ -1,10 +1,10 @@
-use Mix.Config
+import Config
 
 config :logger, :console,
   level: :debug,
   colors: [enabled: false],
   format: "$time $metadata[$level] $message\n",
-  metadata: [:trace_id, :span_id]
+  metadata: [:trace_id, :span_id, :file, :line]
 
 config :spandex, :decorators, tracer: Spandex.Test.Support.Tracer
 

diff --git a/lib/span_context.ex b/lib/span_context.ex
@@ -12,12 +12,12 @@ defmodule Spandex.SpanContext do
   @type t :: %__MODULE__{
           trace_id: Spandex.id(),
           parent_id: Spandex.id(),
-          priority: integer(),
+          priority: integer() | nil,
           baggage: Keyword.t()
         }
 
   defstruct trace_id: nil,
             parent_id: nil,
-            priority: 1,
+            priority: nil,
             baggage: []
 end
diff --git a/lib/spandex.ex b/lib/spandex.ex
@@ -31,9 +31,9 @@ defmodule Spandex do
           | {:error, :disabled}
           | {:error, :trace_running}
           | {:error, [Optimal.error()]}
-  def start_trace(_, :disabled), do: {:error, :disabled}
+  def start_trace(name, :disabled) when is_binary(name), do: {:error, :disabled}
 
-  def start_trace(name, opts) do
+  def start_trace(name, opts) when is_binary(name) do
     strategy = opts[:strategy]
 
     if strategy.trace_active?(opts[:trace_key]) do
@@ -55,9 +55,9 @@ defmodule Spandex do
           {:ok, Span.t()}
           | {:error, :disabled}
           | {:error, :no_trace_context}
-  def start_span(_, :disabled), do: {:error, :disabled}
+  def start_span(name, :disabled) when is_binary(name), do: {:error, :disabled}
 
-  def start_span(name, opts) do
+  def start_span(name, opts) when is_binary(name) do
     strategy = opts[:strategy]
 
     case strategy.get_trace(opts[:trace_key]) do
@@ -104,6 +104,24 @@ defmodule Spandex do
     end
   end
 
+  @doc """
+  Update the priority of the current trace.
+  """
+  @spec update_priority(integer(), Tracer.opts()) ::
+          {:ok, Trace.t()}
+          | {:error, :disabled}
+          | {:error, :no_trace_context}
+          | {:error, [Optimal.error()]}
+  def update_priority(_, :disabled), do: {:error, :disabled}
+
+  def update_priority(priority, opts) do
+    strategy = opts[:strategy]
+
+    with {:ok, trace} <- strategy.get_trace(opts[:trace_key]) do
+      strategy.put_trace(opts[:trace_key], %{trace | priority: priority})
+    end
+  end
+
   @doc """
   Updates the top-most parent span.
 
@@ -247,6 +265,26 @@ defmodule Spandex do
     update_span(Keyword.put_new(opts, :error, updates))
   end
 
+  @doc """
+  Returns the priority of the currently running trace.
+  """
+  @spec current_priority(Tracer.opts()) :: integer() | nil
+  def current_priority(:disabled), do: nil
+
+  def current_priority(opts) do
+    strategy = opts[:strategy]
+
+    case strategy.get_trace(opts[:trace_key]) do
+      {:ok, %Trace{priority: priority}} ->
+        priority
+
+      {:error, _} ->
+        # TODO: Alter the return type of this interface to allow for returning
+        # errors from fetching the trace.
+        nil
+    end
+  end
+
   @doc """
   Returns the id of the currently-running trace.
   """

diff --git a/lib/trace.ex b/lib/trace.ex
@@ -10,15 +10,15 @@ defmodule Spandex.Trace do
   """
   defstruct baggage: [],
             id: nil,
-            priority: 1,
+            priority: nil,
             spans: [],
             stack: []
 
   @typedoc @moduledoc
   @type t :: %__MODULE__{
           baggage: Keyword.t(),
           id: Spandex.id(),
-          priority: integer(),
+          priority: integer() | nil,
           spans: [Spandex.Span.t()],
           stack: [Spandex.Span.t()]
         }