Skip to content

Commit

Permalink
Merge pull request #18 from darklang/paul/otel2
Browse files Browse the repository at this point in the history
OpenTelemetry sampling
  • Loading branch information
pbiggar authored Apr 6, 2023
2 parents f22584c + 38120bf commit 7037f0d
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 37 deletions.
43 changes: 43 additions & 0 deletions backend/src/LibService/LaunchDarkly.fs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,42 @@ module Internal =
stringSetTestDefault name testDefault
fun canvasName -> stringVar name $"canvas-{canvasName}" default_

// -------------
// per-service values
// -------------
let serviceBool
(name : string)
(default_ : bool)
(testDefault : bool)
: string -> bool =
boolSetTestDefault name testDefault
fun serviceName -> boolVar name $"service-{serviceName}" default_

let serviceInt
(name : string)
(default_ : int)
(testDefault : int)
: string -> int =
intSetTestDefault name testDefault
fun serviceName -> intVar name $"service-{serviceName}" default_

let serviceFloat
(name : string)
(default_ : float)
(testDefault : float)
: string -> float =
floatSetTestDefault name testDefault
fun serviceName -> floatVar name $"service-{serviceName}" default_

let serviceString
(name : string)
(default_ : string)
(testDefault : string)
: string -> string =
stringSetTestDefault name testDefault
fun serviceName -> stringVar name $"service-{serviceName}" default_




let flush () : unit = Internal.client.Force().Dispose()
Expand All @@ -214,6 +250,13 @@ let traceSamplingRule =
// Canvas Flags - these are per-canvas settings
// --------------

// --------------
// Service Flags - may be different for each service
// --------------
// Whether to record traces
let telemetrySamplePercentage =
Internal.serviceFloat "telemetry-sample-percentage" 100.0 100.0


// --------------
// System flags - this allows us to change the run-time values of system
Expand Down
2 changes: 1 addition & 1 deletion backend/src/LibService/LibService.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
<Compile Include="Exception.fs" />
<Compile Include="ConfigDsl.fs" />
<Compile Include="Config.fs" />
<Compile Include="LaunchDarkly.fs" />
<Compile Include="Telemetry.fs" />
<Compile Include="Logging.fs" />
<Compile Include="LaunchDarkly.fs" />
<Compile Include="Rollbar.fs" />
<Compile Include="FireAndForget.fs" />
<Compile Include="DBConnection.fs" />
Expand Down
62 changes: 26 additions & 36 deletions backend/src/LibService/Telemetry.fs
Original file line number Diff line number Diff line change
Expand Up @@ -289,47 +289,37 @@ let configureAspNetCore
options.Enrich <- enrich
options.RecordException <- true

#nowarn "9"

/// A sampler is used to reduce the number of events, to not overwhelm the results.
/// In our case, we want to control costs too - we only have 1.5B honeycomb events
/// per month, and it's easy to use them very quickly in a loop
type DarkSampler() =
type Sampler(serviceName : string) =
inherit OpenTelemetry.Trace.Sampler()

let keep = SamplingResult(SamplingDecision.RecordAndSample)
let _drop = SamplingResult(SamplingDecision.Drop)

let getInt (name : string) (map : Map<string, obj>) : Option<int> =
try
match Map.get name map with
| Some result ->
if typeof<int> = result.GetType() then Some(result :?> int) else None
| None -> None
with
| _ -> None

let getFloat (name : string) (map : Map<string, obj>) : Option<float> =
try
match Map.get name map with
| Some result ->
if typeof<float> = result.GetType() then Some(result :?> float) else None
| None -> None
with
| _ -> None

let getString (name : string) (map : Map<string, obj>) : Option<string> =
try
match Map.get name map with
| Some result ->
if typeof<string> = result.GetType() then Some(result :?> string) else None
| None -> None
with
| _ -> None

override this.ShouldSample(_p : SamplingParameters inref) : SamplingResult =
// This turned out to be useless for the initial need (trimming short DB queries)
keep

let sampler = DarkSampler()
let drop = SamplingResult(SamplingDecision.Drop)


override this.ShouldSample(ps : SamplingParameters inref) : SamplingResult =
// Sampling means that we lose lot of precision and might miss something. By
// adding a feature flag, we can dynamically turn up precision when we need it
// (eg if we can't find something or there's an outage). Ideally, we'd keep error
// traces all the time, but that's not something that's possible with
// OpenTelemetry right now.

// Note we tweak sampling by service, so we can have 100% of one service and 10%
// of another
let percentage = LaunchDarkly.telemetrySamplePercentage serviceName
if false && percentage >= 100.0 then
keep
else
let scaled = int ((percentage / 100.0) * float System.Int32.MaxValue)
// Deterministic sampler, will produce the same result for every span in a trace
// Originally based on https://github.com/open-telemetry/opentelemetry-dotnet/blob/b2fb873fcd9ceca2552b152a60bf192e2ea12b99/src/OpenTelemetry/Trace/TraceIdRatioBasedSampler.cs#LL76
let traceIDAsInt = ps.TraceId.GetHashCode() |> System.Math.Abs
if traceIDAsInt < scaled then keep else drop


type TraceDBQueries =
| TraceDBQueries
Expand All @@ -349,6 +339,7 @@ let addTelemetry
(builder : TracerProviderBuilder)
: TracerProviderBuilder =
builder
|> fun b -> b.SetSampler(Sampler(serviceName))
|> fun b ->
List.fold
b
Expand All @@ -372,7 +363,6 @@ let addTelemetry
| TraceDBQueries -> b.AddNpgsql()
| DontTraceDBQueries -> b
|> fun b -> b.AddSource("Dark")
|> fun b -> b.SetSampler(sampler)


module Console =
Expand Down

0 comments on commit 7037f0d

Please sign in to comment.