From b8b362c444d8c7f8925f4bf0a0eff5653f07ce6e Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:23:59 -0400 Subject: [PATCH 01/17] fix: refactor OTel tracing to group POST and EXEC spans in query and asyncquery --- src/controllers/async/asyncquery.ts | 9 ++++++++- src/controllers/opentelemetry.ts | 11 ++++++----- src/controllers/threading/taskHandler.ts | 17 +++++++++-------- src/controllers/threading/threadHandler.ts | 14 +++++++++----- 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/controllers/async/asyncquery.ts b/src/controllers/async/asyncquery.ts index f8703cd..4bfdc26 100644 --- a/src/controllers/async/asyncquery.ts +++ b/src/controllers/async/asyncquery.ts @@ -1,4 +1,5 @@ import axios, { AxiosError, AxiosResponse } from "axios"; +import { context, propagation } from "@opentelemetry/api"; import { customAlphabet } from "nanoid"; import * as utils from "../../utils/common"; import { redisClient } from "@biothings-explorer/utils"; @@ -42,8 +43,14 @@ export async function asyncquery( } const url = `${req.protocol}://${req.header("host")}/v1/asyncquery_status/${jobId}`; + // add OTel trace context + const otelData: Partial<{ traceparent: string; tracestate: string }> = {}; + propagation.inject(context.active(), otelData); + const { traceparent, tracestate } = otelData; + queueData = { ...queueData, traceparent, tracestate }; + const job = await queryQueue.add( - { ...queueData, url: url.replace("status", "response") }, + { ...queueData, traceparent: traceparent, tracestate: tracestate, url: url.replace("status", "response") }, { jobId: jobId, timeout: parseInt(process.env.JOB_TIMEOUT ?? (1000 * 60 * 5).toString()), diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 2acf1e9..e5fad6e 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -2,18 +2,19 @@ import { NodeSDK } from "@opentelemetry/sdk-node"; import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node"; import { Resource } from "@opentelemetry/resources"; import Debug from "debug"; +import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; const debug = Debug("bte:biothings-explorer:otel-init"); -import { JaegerExporter } from "@opentelemetry/exporter-jaeger"; +const { SEMRESATTRS_SERVICE_NAME } = require('@opentelemetry/semantic-conventions'); debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ - traceExporter: new JaegerExporter({ - host: process.env.JAEGER_HOST ?? "jaeger-otel-agent.sri", - port: parseInt(process.env.JAEGER_PORT ?? "6832"), + // metrics, if needed, shall be exported on a different endpoint + traceExporter: new OTLPTraceExporter({ + url: `${process.env.JAEGER_HOST}:${process.env.JAEGER_PORT}/v1/traces` }), instrumentations: [getNodeAutoInstrumentations()], resource: new Resource({ - "service.name": "biothings-explorer", + [SEMRESATTRS_SERVICE_NAME]: "biothings-explorer", }), }); sdk.start(); diff --git a/src/controllers/threading/taskHandler.ts b/src/controllers/threading/taskHandler.ts index 9fcc41b..e082243 100644 --- a/src/controllers/threading/taskHandler.ts +++ b/src/controllers/threading/taskHandler.ts @@ -14,7 +14,7 @@ import { tasks } from "../../routes/index"; import { getQueryQueue } from "../async/asyncquery_queue"; import * as Sentry from "@sentry/node"; import { ProfilingIntegration } from "@sentry/profiling-node"; -import OpenTelemetry, { Span } from "@opentelemetry/api"; +import { Span, trace, context, propagation, Context } from "@opentelemetry/api"; import { Telemetry } from "@biothings-explorer/utils"; import { InnerTaskData } from "@biothings-explorer/types"; @@ -90,13 +90,14 @@ async function runTask({ scope.setSpan(transaction); }); - span = OpenTelemetry.trace - .getTracer("biothings-explorer-thread") - .startSpan( - routeNames[route], - undefined, - OpenTelemetry.propagation.extract(OpenTelemetry.context.active(), { traceparent, tracestate }), - ); + let activeContext: Context = propagation.extract(context.active(), { traceparent, tracestate }); + let tracer = trace.getTracer("biothings-explorer-thread") + span = tracer.startSpan( + routeNames[route], + {kind: 1}, // specifies internal span + activeContext, + ); + span.setAttribute("bte.requestData", JSON.stringify(req.data.queryGraph)); Telemetry.setOtelSpan(span); } catch (error) { diff --git a/src/controllers/threading/threadHandler.ts b/src/controllers/threading/threadHandler.ts index 7f91bb3..2ee675e 100644 --- a/src/controllers/threading/threadHandler.ts +++ b/src/controllers/threading/threadHandler.ts @@ -104,11 +104,8 @@ async function queueTaskToWorkers(pool: Piscina, taskInfo: TaskInfo, route: stri const abortController = new AbortController(); const { port1: toWorker, port2: fromWorker } = new MessageChannel(); - // get otel context - - const otelData: Partial<{ traceparent: string; tracestate: string }> = {}; - propagation.inject(context.active(), otelData); - const { traceparent, tracestate } = otelData; + const traceparent: string = taskInfo.data.traceparent; + const tracestate: string = taskInfo.data.tracestate; const taskData: InnerTaskData = { req: taskInfo, route, traceparent, tracestate, port: toWorker }; taskData.req.data.options = {...taskData.req.data.options, metakg: global.metakg?.ops, smartapi: global.smartapi} as QueryHandlerOptions; @@ -219,6 +216,11 @@ async function queueTaskToWorkers(pool: Piscina, taskInfo: TaskInfo, route: stri export async function runTask(req: Request, res: Response, route: string, useBullSync = true): Promise { const queryQueue: Queue = global.queryQueue.bte_sync_query_queue; + + const otelData: Partial<{ traceparent: string; tracestate: string }> = {}; + propagation.inject(context.active(), otelData); + const { traceparent, tracestate } = otelData; + const taskInfo: TaskInfo = { data: { route, @@ -233,6 +235,8 @@ export async function runTask(req: Request, res: Response, route: string, useBul }, params: req.params, endpoint: req.originalUrl, + traceparent: traceparent, + tracestate: tracestate, }, }; From edb3387f3c9f18add11536f79115420aa4d7e2b1 Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Fri, 27 Sep 2024 02:16:58 -0700 Subject: [PATCH 02/17] chore: add override for semmeddb has adjusted x-bte annotation to use the new semmeddb-edge-attribute feature --- config/smartapi_overrides.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/smartapi_overrides.yaml b/config/smartapi_overrides.yaml index bf7bf47..feaeca5 100644 --- a/config/smartapi_overrides.yaml +++ b/config/smartapi_overrides.yaml @@ -2,6 +2,7 @@ config: only_overrides: false apis: { + "1d288b3a3caf75d541ffaae3aab386c8": "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/refs/heads/semmeddb_publication_refactor/semmeddb/smartapi.yaml" # "8f08d1446e0bb9c2b323713ce83e2bd3": "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/refs/heads/remove-clinical-trials/mychem.info/openapi_full.yml", # "1138c3297e8e403b6ac10cff5609b319": "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/refs/heads/remove-clinical-trials/repodb/smartapi.yaml" } From 555e7ee868cb55a6d51df7130959507b77d860af Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:05:59 -0400 Subject: [PATCH 03/17] fix: add default host & fix service name --- package.json | 1 + src/controllers/opentelemetry.ts | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 6c10459..d24b3de 100644 --- a/package.json +++ b/package.json @@ -89,6 +89,7 @@ "@opentelemetry/sdk-metrics": "^1.18.1", "@opentelemetry/sdk-node": "^0.50.0", "@opentelemetry/sdk-trace-node": "^1.18.1", + "@opentelemetry/semantic-conventions": "^1.27.0", "@sentry/node": "^7.74.1", "@sentry/profiling-node": "^1.2.1", "axios": "^0.27.2", diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index e5fad6e..2563881 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -4,17 +4,18 @@ import { Resource } from "@opentelemetry/resources"; import Debug from "debug"; import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; const debug = Debug("bte:biothings-explorer:otel-init"); -const { SEMRESATTRS_SERVICE_NAME } = require('@opentelemetry/semantic-conventions'); +import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; + debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ // metrics, if needed, shall be exported on a different endpoint traceExporter: new OTLPTraceExporter({ - url: `${process.env.JAEGER_HOST}:${process.env.JAEGER_PORT}/v1/traces` + url: `${process.env.JAEGER_HOST ?? 'jaeger-otel-collector'}:${process.env.JAEGER_PORT ?? 4318}/v1/traces` }), instrumentations: [getNodeAutoInstrumentations()], resource: new Resource({ - [SEMRESATTRS_SERVICE_NAME]: "biothings-explorer", + [ATTR_SERVICE_NAME]: "biothings-explorer", }), }); sdk.start(); From 5790cb40b80782df14cb6bae4c0ead5d88b1e3bc Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Sat, 28 Sep 2024 00:14:50 -0700 Subject: [PATCH 04/17] feat: enable dryrun_pathfinder parameter for ARA endpoints --- docs/smartapi.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/smartapi.yaml b/docs/smartapi.yaml index 7028c43..15c2aef 100644 --- a/docs/smartapi.yaml +++ b/docs/smartapi.yaml @@ -149,6 +149,12 @@ paths: required: false schema: type: boolean + - description: Dryrun a pathfinder query (logs the tempalted-queries BTE would make without running them) + in: query + name: dryrun_pathfinder + required: false + schema: + type: boolean requestBody: description: Query information to be submitted required: true @@ -454,6 +460,12 @@ paths: required: false schema: type: boolean + - description: Dryrun a pathfinder query (logs the tempalted-queries BTE would make without running them) + in: query + name: dryrun_pathfinder + required: false + schema: + type: boolean requestBody: description: Query information to be submitted required: true From aa83d8c0562deb76f101075f596a893417887474 Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 3 Oct 2024 11:38:37 -0400 Subject: [PATCH 05/17] chore: add debug statements for OTel --- src/controllers/opentelemetry.ts | 1 + src/controllers/threading/taskHandler.ts | 2 ++ src/controllers/threading/threadHandler.ts | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 2563881..1c2a60b 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -18,5 +18,6 @@ const sdk = new NodeSDK({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), }); +debug(`OTel URL ${process.env.JAEGER_HOST ?? 'jaeger-otel-collector'}:${process.env.JAEGER_PORT ?? 4318}/v1/traces`); sdk.start(); debug("Opentelemetry instrumentation initialized."); diff --git a/src/controllers/threading/taskHandler.ts b/src/controllers/threading/taskHandler.ts index e082243..78b2629 100644 --- a/src/controllers/threading/taskHandler.ts +++ b/src/controllers/threading/taskHandler.ts @@ -91,12 +91,14 @@ async function runTask({ }); let activeContext: Context = propagation.extract(context.active(), { traceparent, tracestate }); + debug(`OTel task ${traceparent} and ${tracestate}`); let tracer = trace.getTracer("biothings-explorer-thread") span = tracer.startSpan( routeNames[route], {kind: 1}, // specifies internal span activeContext, ); + debug(`OTel span created ${span}`); span.setAttribute("bte.requestData", JSON.stringify(req.data.queryGraph)); Telemetry.setOtelSpan(span); diff --git a/src/controllers/threading/threadHandler.ts b/src/controllers/threading/threadHandler.ts index eb52333..e9a595c 100644 --- a/src/controllers/threading/threadHandler.ts +++ b/src/controllers/threading/threadHandler.ts @@ -244,12 +244,15 @@ export async function runTask(req: Request, res: Response, route: string, useBul taskInfo.data.options.caching = false; } + debug(`OTel ${traceparent} and ${tracestate}`); if (process.env.USE_THREADING === "false") { // Threading disabled, just use the provided function in main event loop + debug("OTel enter no threading") const response = (await tasks[route](taskInfo)) as TrapiResponse; return response; } else if (!(queryQueue && useBullSync)) { // Redis unavailable or query not to sync queue such as asyncquery_status + debug("OTel enter queueTaskToWorkers") const response = await queueTaskToWorkers( useBullSync ? global.threadpool.sync : global.threadpool.misc, taskInfo, @@ -298,6 +301,7 @@ export async function runTask(req: Request, res: Response, route: string, useBul throw new ServerOverloadedError(message, expectedWaitTime); } + debug("OTel enter queryQueue.add") const job = await queryQueue.add(taskInfo.data, jobOpts); try { const response: TrapiResponse = await (job.finished() as Promise); From 4d5181b89df353234a3c0ec5932a2b1e4d0cdfe0 Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:03:26 -0400 Subject: [PATCH 06/17] chore: refactor Jaeger URL init --- src/controllers/opentelemetry.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 1c2a60b..e7c46ba 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -6,18 +6,21 @@ import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; const debug = Debug("bte:biothings-explorer:otel-init"); import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; +const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector'; +const jaegerPort = process.env.JAEGER_PORT ?? 4318; +const jaegerResName = process.env.JAEGER_RES_NAME ?? ''; debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ // metrics, if needed, shall be exported on a different endpoint traceExporter: new OTLPTraceExporter({ - url: `${process.env.JAEGER_HOST ?? 'jaeger-otel-collector'}:${process.env.JAEGER_PORT ?? 4318}/v1/traces` + url: `${jaegerHost}:${jaegerPort}${jaegerResName}` }), instrumentations: [getNodeAutoInstrumentations()], resource: new Resource({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), }); -debug(`OTel URL ${process.env.JAEGER_HOST ?? 'jaeger-otel-collector'}:${process.env.JAEGER_PORT ?? 4318}/v1/traces`); +debug(`OTel URL ${jaegerHost}:${jaegerPort}${jaegerResName}`); sdk.start(); debug("Opentelemetry instrumentation initialized."); From 45493561024f0b4f8559fc59ed85504d2713efba Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:25:52 -0400 Subject: [PATCH 07/17] fix: modify default jaeger endpoint --- src/controllers/opentelemetry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index e7c46ba..ced174f 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -6,7 +6,7 @@ import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; const debug = Debug("bte:biothings-explorer:otel-init"); import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; -const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector'; +const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-agent.sri'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; const jaegerResName = process.env.JAEGER_RES_NAME ?? ''; From dd2292d073c47e23ff5cea730165f32ef77f04c3 Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:41:14 -0400 Subject: [PATCH 08/17] fix: jaeger res name --- src/controllers/opentelemetry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index ced174f..491029a 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -8,7 +8,7 @@ import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-agent.sri'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; -const jaegerResName = process.env.JAEGER_RES_NAME ?? ''; +const jaegerResName = process.env.JAEGER_RES_NAME ?? '/v1/traces'; debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ From 53a8bf9f16c7074f674c785df824aabe3324e379 Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:26:26 -0400 Subject: [PATCH 09/17] fix: update otel config --- src/controllers/opentelemetry.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 491029a..c7beb8a 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -6,21 +6,21 @@ import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; const debug = Debug("bte:biothings-explorer:otel-init"); import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; -const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-agent.sri'; +const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; -const jaegerResName = process.env.JAEGER_RES_NAME ?? '/v1/traces'; +const jaegerResName = process.env.JAEGER_RES_NAME ?? ''; debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ // metrics, if needed, shall be exported on a different endpoint traceExporter: new OTLPTraceExporter({ - url: `${jaegerHost}:${jaegerPort}${jaegerResName}` + url: `http://${jaegerHost}:${jaegerPort}${jaegerResName}` }), instrumentations: [getNodeAutoInstrumentations()], resource: new Resource({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), }); -debug(`OTel URL ${jaegerHost}:${jaegerPort}${jaegerResName}`); +debug(`OTel URL http://${jaegerHost}:${jaegerPort}${jaegerResName}`); sdk.start(); debug("Opentelemetry instrumentation initialized."); From ae59681ff2265d621ea56886efe8e23f4173caae Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:41:46 -0400 Subject: [PATCH 10/17] fix: update otel config --- src/controllers/opentelemetry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index c7beb8a..1058f86 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -6,7 +6,7 @@ import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; const debug = Debug("bte:biothings-explorer:otel-init"); import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; -const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector'; +const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector.sri'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; const jaegerResName = process.env.JAEGER_RES_NAME ?? ''; From 979c8fbb86a8542fb83a15f82e1d5217e1482e9a Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:58:05 -0400 Subject: [PATCH 11/17] fix: update otel config --- src/controllers/opentelemetry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 1058f86..e05793a 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -8,7 +8,7 @@ import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector.sri'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; -const jaegerResName = process.env.JAEGER_RES_NAME ?? ''; +const jaegerResName = process.env.JAEGER_RES_NAME ?? '/v1/traces'; debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ From b8c3648231912b43bcf8a814b92cc24796a297e7 Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:14:49 -0400 Subject: [PATCH 12/17] fix: make otel tracing leaner and synchronous --- package.json | 2 +- src/controllers/opentelemetry.ts | 14 +++++++++----- src/controllers/threading/taskHandler.ts | 3 +-- src/controllers/threading/threadHandler.ts | 7 ++++++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/package.json b/package.json index d24b3de..2a7c03e 100644 --- a/package.json +++ b/package.json @@ -81,10 +81,10 @@ "@bull-board/api": "^5.9.1", "@bull-board/express": "^5.9.1", "@opentelemetry/api": "^1.7.0", - "@opentelemetry/auto-instrumentations-node": "^0.44.0", "@opentelemetry/exporter-jaeger": "^1.19.0", "@opentelemetry/exporter-metrics-otlp-proto": "^0.50.0", "@opentelemetry/exporter-trace-otlp-proto": "^0.50.0", + "@opentelemetry/instrumentation-http": "^0.53.0", "@opentelemetry/resources": "^1.18.1", "@opentelemetry/sdk-metrics": "^1.18.1", "@opentelemetry/sdk-node": "^0.50.0", diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index e05793a..4e5dfa1 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -1,5 +1,6 @@ import { NodeSDK } from "@opentelemetry/sdk-node"; -import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node"; +import { SimpleSpanProcessor } from "@opentelemetry/sdk-trace-node"; +import { HttpInstrumentation } from "@opentelemetry/instrumentation-http"; import { Resource } from "@opentelemetry/resources"; import Debug from "debug"; import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; @@ -9,17 +10,20 @@ import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector.sri'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; const jaegerResName = process.env.JAEGER_RES_NAME ?? '/v1/traces'; +const traceExporter = new OTLPTraceExporter({ + url: `http://${jaegerHost}:${jaegerPort}${jaegerResName}` +}); debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ // metrics, if needed, shall be exported on a different endpoint - traceExporter: new OTLPTraceExporter({ - url: `http://${jaegerHost}:${jaegerPort}${jaegerResName}` - }), - instrumentations: [getNodeAutoInstrumentations()], + // trace a subset of instrumentations to avoid performance overhead + instrumentations: [new HttpInstrumentation()], resource: new Resource({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), + // use simple span processor to avoid losing data when the forked process exits (taskHandler) + spanProcessors: [new SimpleSpanProcessor(traceExporter)], }); debug(`OTel URL http://${jaegerHost}:${jaegerPort}${jaegerResName}`); sdk.start(); diff --git a/src/controllers/threading/taskHandler.ts b/src/controllers/threading/taskHandler.ts index 78b2629..4e9705a 100644 --- a/src/controllers/threading/taskHandler.ts +++ b/src/controllers/threading/taskHandler.ts @@ -91,14 +91,13 @@ async function runTask({ }); let activeContext: Context = propagation.extract(context.active(), { traceparent, tracestate }); - debug(`OTel task ${traceparent} and ${tracestate}`); + debug(`OTel task context: ${traceparent} and ${tracestate}`); let tracer = trace.getTracer("biothings-explorer-thread") span = tracer.startSpan( routeNames[route], {kind: 1}, // specifies internal span activeContext, ); - debug(`OTel span created ${span}`); span.setAttribute("bte.requestData", JSON.stringify(req.data.queryGraph)); Telemetry.setOtelSpan(span); diff --git a/src/controllers/threading/threadHandler.ts b/src/controllers/threading/threadHandler.ts index e9a595c..894caa8 100644 --- a/src/controllers/threading/threadHandler.ts +++ b/src/controllers/threading/threadHandler.ts @@ -1,6 +1,6 @@ import { MessageChannel, threadId } from "worker_threads"; import Debug from "debug"; -import { context, propagation } from "@opentelemetry/api"; +import { context, propagation, trace } from "@opentelemetry/api"; const debug = Debug("bte:biothings-explorer-trapi:threading"); import path from "path"; import { redisClient } from "@biothings-explorer/utils"; @@ -221,6 +221,11 @@ export async function runTask(req: Request, res: Response, route: string, useBul propagation.inject(context.active(), otelData); const { traceparent, tracestate } = otelData; + // add req dest to root span name as HTTP instrumentation doesn't do it automatically + const activeContext = context.active(); + const rootSpan = trace.getSpan(activeContext); + rootSpan.updateName(`${req.method} ${req.originalUrl}`); + const taskInfo: TaskInfo = { data: { route, From 677886ccd47a3b455484c448a5dfb362ea442307 Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:44:44 -0400 Subject: [PATCH 13/17] chore: ignore ts lint error --- src/controllers/opentelemetry.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 4e5dfa1..252b286 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -23,6 +23,7 @@ const sdk = new NodeSDK({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), // use simple span processor to avoid losing data when the forked process exits (taskHandler) + // @ts-ignore - fix from MetinSeylan/Nestjs-OpenTelemetry#63 spanProcessors: [new SimpleSpanProcessor(traceExporter)], }); debug(`OTel URL http://${jaegerHost}:${jaegerPort}${jaegerResName}`); From daac52bc50358bba542810f0538ae5779ce12ccd Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:57:56 -0400 Subject: [PATCH 14/17] fix: update root span only if it exists --- src/controllers/threading/threadHandler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/threading/threadHandler.ts b/src/controllers/threading/threadHandler.ts index 894caa8..5a8a771 100644 --- a/src/controllers/threading/threadHandler.ts +++ b/src/controllers/threading/threadHandler.ts @@ -224,7 +224,7 @@ export async function runTask(req: Request, res: Response, route: string, useBul // add req dest to root span name as HTTP instrumentation doesn't do it automatically const activeContext = context.active(); const rootSpan = trace.getSpan(activeContext); - rootSpan.updateName(`${req.method} ${req.originalUrl}`); + if (rootSpan != undefined) rootSpan.updateName(`${req.method} ${req.originalUrl}`); const taskInfo: TaskInfo = { data: { From 08aefaabf08440bf1b34dd7d7959cd258a0f2edc Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:01:10 -0400 Subject: [PATCH 15/17] chore: add type hints to otel vars --- src/controllers/threading/taskHandler.ts | 4 ++-- src/controllers/threading/threadHandler.ts | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/controllers/threading/taskHandler.ts b/src/controllers/threading/taskHandler.ts index 4e9705a..1a42dbd 100644 --- a/src/controllers/threading/taskHandler.ts +++ b/src/controllers/threading/taskHandler.ts @@ -14,7 +14,7 @@ import { tasks } from "../../routes/index"; import { getQueryQueue } from "../async/asyncquery_queue"; import * as Sentry from "@sentry/node"; import { ProfilingIntegration } from "@sentry/profiling-node"; -import { Span, trace, context, propagation, Context } from "@opentelemetry/api"; +import { Span, trace, context, propagation, Context, Tracer } from "@opentelemetry/api"; import { Telemetry } from "@biothings-explorer/utils"; import { InnerTaskData } from "@biothings-explorer/types"; @@ -92,7 +92,7 @@ async function runTask({ let activeContext: Context = propagation.extract(context.active(), { traceparent, tracestate }); debug(`OTel task context: ${traceparent} and ${tracestate}`); - let tracer = trace.getTracer("biothings-explorer-thread") + let tracer: Tracer = trace.getTracer("biothings-explorer-thread") span = tracer.startSpan( routeNames[route], {kind: 1}, // specifies internal span diff --git a/src/controllers/threading/threadHandler.ts b/src/controllers/threading/threadHandler.ts index 5a8a771..c23e69c 100644 --- a/src/controllers/threading/threadHandler.ts +++ b/src/controllers/threading/threadHandler.ts @@ -1,6 +1,6 @@ import { MessageChannel, threadId } from "worker_threads"; import Debug from "debug"; -import { context, propagation, trace } from "@opentelemetry/api"; +import { context, propagation, trace, Context, Span } from "@opentelemetry/api"; const debug = Debug("bte:biothings-explorer-trapi:threading"); import path from "path"; import { redisClient } from "@biothings-explorer/utils"; @@ -222,8 +222,8 @@ export async function runTask(req: Request, res: Response, route: string, useBul const { traceparent, tracestate } = otelData; // add req dest to root span name as HTTP instrumentation doesn't do it automatically - const activeContext = context.active(); - const rootSpan = trace.getSpan(activeContext); + const activeContext: Context = context.active(); + const rootSpan: Span = trace.getSpan(activeContext); if (rootSpan != undefined) rootSpan.updateName(`${req.method} ${req.originalUrl}`); const taskInfo: TaskInfo = { From 0d30d9b56d6188223874edf93e48d3401c6e0d5f Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:50:59 -0400 Subject: [PATCH 16/17] fix: switch to BatchSpanProcessor and flush spans when thread exits --- src/controllers/opentelemetry.ts | 15 +++++++++++---- src/controllers/threading/taskHandler.ts | 2 ++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 252b286..77b81e3 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -1,5 +1,5 @@ import { NodeSDK } from "@opentelemetry/sdk-node"; -import { SimpleSpanProcessor } from "@opentelemetry/sdk-trace-node"; +import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-node"; import { HttpInstrumentation } from "@opentelemetry/instrumentation-http"; import { Resource } from "@opentelemetry/resources"; import Debug from "debug"; @@ -10,9 +10,11 @@ import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions'; const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector.sri'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; const jaegerResName = process.env.JAEGER_RES_NAME ?? '/v1/traces'; + const traceExporter = new OTLPTraceExporter({ url: `http://${jaegerHost}:${jaegerPort}${jaegerResName}` }); +const spanProcessor = new BatchSpanProcessor(traceExporter); debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ @@ -22,10 +24,15 @@ const sdk = new NodeSDK({ resource: new Resource({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), - // use simple span processor to avoid losing data when the forked process exits (taskHandler) - // @ts-ignore - fix from MetinSeylan/Nestjs-OpenTelemetry#63 - spanProcessors: [new SimpleSpanProcessor(traceExporter)], + // @ts-ignore because MetinSeylan/Nestjs-OpenTelemetry#63 + spanProcessors: [spanProcessor], }); debug(`OTel URL http://${jaegerHost}:${jaegerPort}${jaegerResName}`); sdk.start(); debug("Opentelemetry instrumentation initialized."); + +export async function flushRemainingSpans(): Promise { + // avoid losing any spans in the buffer when taskHandler exits + debug("Flushing remaining spans..."); + await spanProcessor.forceFlush(); +} diff --git a/src/controllers/threading/taskHandler.ts b/src/controllers/threading/taskHandler.ts index 1a42dbd..b22ae41 100644 --- a/src/controllers/threading/taskHandler.ts +++ b/src/controllers/threading/taskHandler.ts @@ -17,6 +17,7 @@ import { ProfilingIntegration } from "@sentry/profiling-node"; import { Span, trace, context, propagation, Context, Tracer } from "@opentelemetry/api"; import { Telemetry } from "@biothings-explorer/utils"; import { InnerTaskData } from "@biothings-explorer/types"; +import { flushRemainingSpans } from "../opentelemetry"; // use SENTRY_DSN environment variable try { @@ -113,6 +114,7 @@ async function runTask({ transaction.finish(); span.end(); Telemetry.removeOtelSpan(); + await flushRemainingSpans(); } catch (error) { debug("Sentry/OpenTelemetry transaction finish error. This does not affect execution."); debug(error); From f05cb73d7ab916fc4cb6be148cbbc958e3589f02 Mon Sep 17 00:00:00 2001 From: NeuralFlux <40491005+NeuralFlux@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:07:20 -0400 Subject: [PATCH 17/17] fix: add express instrumentation for async traces to show up --- package.json | 1 + src/controllers/opentelemetry.ts | 3 ++- src/controllers/threading/threadHandler.ts | 5 ----- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index 2a7c03e..3d50019 100644 --- a/package.json +++ b/package.json @@ -84,6 +84,7 @@ "@opentelemetry/exporter-jaeger": "^1.19.0", "@opentelemetry/exporter-metrics-otlp-proto": "^0.50.0", "@opentelemetry/exporter-trace-otlp-proto": "^0.50.0", + "@opentelemetry/instrumentation-express": "^0.43.0", "@opentelemetry/instrumentation-http": "^0.53.0", "@opentelemetry/resources": "^1.18.1", "@opentelemetry/sdk-metrics": "^1.18.1", diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index 77b81e3..557a5c0 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -1,6 +1,7 @@ import { NodeSDK } from "@opentelemetry/sdk-node"; import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-node"; import { HttpInstrumentation } from "@opentelemetry/instrumentation-http"; +import { ExpressInstrumentation } from "@opentelemetry/instrumentation-express"; import { Resource } from "@opentelemetry/resources"; import Debug from "debug"; import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; @@ -20,7 +21,7 @@ debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ // metrics, if needed, shall be exported on a different endpoint // trace a subset of instrumentations to avoid performance overhead - instrumentations: [new HttpInstrumentation()], + instrumentations: [new HttpInstrumentation(), new ExpressInstrumentation()], resource: new Resource({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), diff --git a/src/controllers/threading/threadHandler.ts b/src/controllers/threading/threadHandler.ts index c23e69c..6a6916a 100644 --- a/src/controllers/threading/threadHandler.ts +++ b/src/controllers/threading/threadHandler.ts @@ -221,11 +221,6 @@ export async function runTask(req: Request, res: Response, route: string, useBul propagation.inject(context.active(), otelData); const { traceparent, tracestate } = otelData; - // add req dest to root span name as HTTP instrumentation doesn't do it automatically - const activeContext: Context = context.active(); - const rootSpan: Span = trace.getSpan(activeContext); - if (rootSpan != undefined) rootSpan.updateName(`${req.method} ${req.originalUrl}`); - const taskInfo: TaskInfo = { data: { route,