Skip to content

Commit

Permalink
Use cached envelope data from dead process for native crash (#1760)
Browse files Browse the repository at this point in the history
## Goal

For native crashes, try to use the envelope resource/metadata from the associated session to reflect the app/SDK state when the crash happened. If a session is not found, use the cached crash envelope that is written when there are no sessions to get the envelope data. Failing that, use the current SDK's envelope, but log an internal error.

This is done through using a new component, `CachedLogEnvelopeStore`, that will be be used by the `PayloadResurrectionService` to create and cache the appropriate envelope for a native crash, and then by `LogEnvelopeSource` to consume the envelope.

The workflow goes thusly:

1. Session resurrection: create and cache envelope for native crash associated with a resurrected session
2. Native crash log creation: get envelope for the given crash's `StoredTelemetryMetadata`
3. Left over native crashes (only expect 1) will use the resource and metadata off the cached crash envelope to produce the envelope for it the crash.
4. Send session-less crash
5. Clean up any left over native crashes and envelopes

## Testing

Unit and integration tests added to verify the case where a session is found, where a session isn't found but the cached crash envelope exists, as well as when neither are there.
  • Loading branch information
bidetofevil authored Dec 17, 2024
2 parents f51105b + 95d4d33 commit 5be222f
Show file tree
Hide file tree
Showing 39 changed files with 923 additions and 143 deletions.
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
package io.embrace.android.embracesdk.internal.envelope.log

import io.embrace.android.embracesdk.internal.delivery.PayloadType
import io.embrace.android.embracesdk.internal.delivery.storage.CachedLogEnvelopeStore
import io.embrace.android.embracesdk.internal.delivery.storage.CachedLogEnvelopeStore.Companion.createNativeCrashEnvelopeMetadata
import io.embrace.android.embracesdk.internal.envelope.metadata.EnvelopeMetadataSource
import io.embrace.android.embracesdk.internal.envelope.resource.EnvelopeResourceSource
import io.embrace.android.embracesdk.internal.logs.LogRequest
import io.embrace.android.embracesdk.internal.opentelemetry.embProcessIdentifier
import io.embrace.android.embracesdk.internal.payload.Envelope
import io.embrace.android.embracesdk.internal.payload.Envelope.Companion.createLogEnvelope
import io.embrace.android.embracesdk.internal.payload.LogPayload
import io.embrace.android.embracesdk.internal.spans.findAttributeValue
import io.opentelemetry.semconv.incubating.SessionIncubatingAttributes

internal class LogEnvelopeSourceImpl(
private val metadataSource: EnvelopeMetadataSource,
private val resourceSource: EnvelopeResourceSource,
private val logPayloadSource: LogPayloadSource,
private val cachedLogEnvelopeStore: CachedLogEnvelopeStore?,
) : LogEnvelopeSource {

override fun getBatchedLogEnvelope(): Envelope<LogPayload> = getLogEnvelope(logPayloadSource.getBatchedLogPayload())

override fun getSingleLogEnvelopes(): List<LogRequest<Envelope<LogPayload>>> {
val payloads = logPayloadSource.getSingleLogPayloads()
return if (payloads.isNotEmpty()) {
payloads.map { LogRequest(payload = getLogEnvelope(it.payload), defer = it.defer) }
val requests = logPayloadSource.getSingleLogPayloads()
return if (requests.isNotEmpty()) {
requests.map { LogRequest(payload = getLogEnvelope(it.payload), defer = it.defer) }
} else {
emptyList()
}
Expand All @@ -27,11 +35,27 @@ internal class LogEnvelopeSourceImpl(
return getLogEnvelope(LogPayload(logs = emptyList()))
}

private fun getLogEnvelope(payload: LogPayload) = Envelope(
resourceSource.getEnvelopeResource(),
metadataSource.getEnvelopeMetadata(),
"0.1.0",
"logs",
payload
)
private fun getLogEnvelope(payload: LogPayload): Envelope<LogPayload> {
if (cachedLogEnvelopeStore != null && payload.findType() == PayloadType.NATIVE_CRASH) {
val nativeCrash = payload.logs?.firstOrNull()
val envelope = cachedLogEnvelopeStore.get(
createNativeCrashEnvelopeMetadata(
sessionId = nativeCrash?.attributes?.findAttributeValue(SessionIncubatingAttributes.SESSION_ID.key),
processIdentifier = nativeCrash?.attributes?.findAttributeValue(embProcessIdentifier.name)
)
)

if (envelope != null) {
return envelope.copy(data = payload)
}
}

return payload.createLogEnvelope(
resource = resourceSource.getEnvelopeResource(),
metadata = metadataSource.getEnvelopeMetadata()
)
}

private fun LogPayload.findType(): PayloadType =
PayloadType.fromValue(logs?.firstOrNull()?.attributes?.findAttributeValue("emb.type"))
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import io.embrace.android.embracesdk.internal.delivery.debug.DeliveryTracer
import io.embrace.android.embracesdk.internal.delivery.execution.RequestExecutionService
import io.embrace.android.embracesdk.internal.delivery.intake.IntakeService
import io.embrace.android.embracesdk.internal.delivery.scheduling.SchedulingService
import io.embrace.android.embracesdk.internal.delivery.storage.CachedLogEnvelopeStore
import io.embrace.android.embracesdk.internal.delivery.storage.PayloadStorageService
import io.embrace.android.embracesdk.internal.session.orchestrator.PayloadStore

Expand All @@ -22,6 +23,7 @@ interface DeliveryModule {
val payloadCachingService: PayloadCachingService?
val payloadStorageService: PayloadStorageService?
val cacheStorageService: PayloadStorageService?
val cachedLogEnvelopeStore: CachedLogEnvelopeStore?
val requestExecutionService: RequestExecutionService?
val schedulingService: SchedulingService?
val deliveryTracer: DeliveryTracer?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import io.embrace.android.embracesdk.internal.delivery.intake.IntakeService
import io.embrace.android.embracesdk.internal.delivery.intake.IntakeServiceImpl
import io.embrace.android.embracesdk.internal.delivery.scheduling.SchedulingService
import io.embrace.android.embracesdk.internal.delivery.scheduling.SchedulingServiceImpl
import io.embrace.android.embracesdk.internal.delivery.storage.CachedLogEnvelopeStore
import io.embrace.android.embracesdk.internal.delivery.storage.CachedLogEnvelopeStoreImpl
import io.embrace.android.embracesdk.internal.delivery.storage.PayloadStorageService
import io.embrace.android.embracesdk.internal.delivery.storage.PayloadStorageServiceImpl
import io.embrace.android.embracesdk.internal.delivery.storage.StorageLocation
Expand Down Expand Up @@ -146,6 +148,20 @@ internal class DeliveryModuleImpl(
}
}

override val cachedLogEnvelopeStore: CachedLogEnvelopeStore? by singleton {
if (configModule.configService.isOnlyUsingOtelExporters()) {
null
} else {
val location = StorageLocation.ENVELOPE.asFile(coreModule.context, initModule.logger)
CachedLogEnvelopeStoreImpl(
outputDir = location,
worker = dataPersistenceWorker,
logger = initModule.logger,
serializer = initModule.jsonSerializer
)
}
}

override val requestExecutionService: RequestExecutionService? by singleton {
requestExecutionServiceProvider?.invoke() ?: if (configModule.configService.isOnlyUsingOtelExporters()) {
null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,6 @@ interface InitModule {
val jsonSerializer: PlatformSerializer

val instrumentedConfig: InstrumentedConfig

val processIdentifierProvider: () -> String
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.embrace.android.embracesdk.internal.injection

import io.embrace.android.embracesdk.internal.IdGenerator
import io.embrace.android.embracesdk.internal.SystemInfo
import io.embrace.android.embracesdk.internal.clock.Clock
import io.embrace.android.embracesdk.internal.clock.NormalizedIntervalClock
Expand All @@ -18,6 +19,7 @@ internal class InitModuleImpl(
override val clock: Clock = NormalizedIntervalClock(systemClock = SystemClock()),
override val logger: EmbLogger = EmbLoggerImpl(),
override val systemInfo: SystemInfo = SystemInfo(),
override val processIdentifierProvider: () -> String = IdGenerator.Companion::generateLaunchInstanceId,
) : InitModule {

override val telemetryService: TelemetryService by singleton {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ internal class OpenTelemetryModuleImpl(
}

override val openTelemetryConfiguration: OpenTelemetryConfiguration by lazy {
OpenTelemetryConfiguration(spanSink, logSink, initModule.systemInfo)
OpenTelemetryConfiguration(
spanSink = spanSink,
logSink = logSink,
systemInfo = initModule.systemInfo,
processIdentifierProvider = initModule.processIdentifierProvider
)
}

private val openTelemetrySdk: OpenTelemetrySdk by lazy {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ internal class PayloadSourceModuleImpl(
}

override val logEnvelopeSource: LogEnvelopeSource by singleton {
LogEnvelopeSourceImpl(metadataSource, resourceSource, logPayloadSource)
LogEnvelopeSourceImpl(metadataSource, resourceSource, logPayloadSource, deliveryModule.cachedLogEnvelopeStore)
}

override val deviceArchitecture: DeviceArchitecture by singleton {
Expand Down Expand Up @@ -130,16 +130,20 @@ internal class PayloadSourceModuleImpl(
}
}

@Suppress("ComplexCondition")
override val payloadResurrectionService: PayloadResurrectionService? by singleton {
val intakeService = deliveryModule.intakeService
val cacheStorageService = deliveryModule.cacheStorageService
val cachedLogEnvelopeStore = deliveryModule.cachedLogEnvelopeStore
if (configModule.configService.autoDataCaptureBehavior.isV2StorageEnabled() &&
intakeService != null &&
cacheStorageService != null
cacheStorageService != null &&
cachedLogEnvelopeStore != null
) {
PayloadResurrectionServiceImpl(
intakeService = intakeService,
cacheStorageService = cacheStorageService,
cachedLogEnvelopeStore = cachedLogEnvelopeStore,
logger = initModule.logger,
serializer = initModule.jsonSerializer
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class OpenTelemetryConfiguration(
spanSink: SpanSink,
logSink: LogSink,
systemInfo: SystemInfo,
private val processIdentifierProvider: () -> String = IdGenerator.Companion::generateLaunchInstanceId
) {
val embraceSdkName: String = BuildConfig.LIBRARY_PACKAGE_NAME
val embraceSdkVersion: String = BuildConfig.VERSION_NAME
Expand All @@ -49,7 +50,7 @@ class OpenTelemetryConfiguration(
* this out by proximity for stitched sessions.
*/
val processIdentifier: String by lazy {
Systrace.traceSynchronous("process-identifier-init", IdGenerator.Companion::generateLaunchInstanceId)
Systrace.traceSynchronous("process-identifier-init", processIdentifierProvider)
}

private val externalSpanExporters = mutableListOf<SpanExporter>()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,22 @@ import io.embrace.android.embracesdk.internal.clock.nanosToMillis
import io.embrace.android.embracesdk.internal.delivery.StoredTelemetryMetadata
import io.embrace.android.embracesdk.internal.delivery.SupportedEnvelopeType
import io.embrace.android.embracesdk.internal.delivery.intake.IntakeService
import io.embrace.android.embracesdk.internal.delivery.storage.CachedLogEnvelopeStore
import io.embrace.android.embracesdk.internal.delivery.storage.CachedLogEnvelopeStore.Companion.createNativeCrashEnvelopeMetadata
import io.embrace.android.embracesdk.internal.delivery.storage.PayloadStorageService
import io.embrace.android.embracesdk.internal.logging.EmbLogger
import io.embrace.android.embracesdk.internal.logging.InternalErrorType
import io.embrace.android.embracesdk.internal.ndk.NativeCrashService
import io.embrace.android.embracesdk.internal.opentelemetry.embCrashId
import io.embrace.android.embracesdk.internal.opentelemetry.embHeartbeatTimeUnixNano
import io.embrace.android.embracesdk.internal.opentelemetry.embProcessIdentifier
import io.embrace.android.embracesdk.internal.opentelemetry.embState
import io.embrace.android.embracesdk.internal.payload.ApplicationState
import io.embrace.android.embracesdk.internal.payload.Attribute
import io.embrace.android.embracesdk.internal.payload.Envelope
import io.embrace.android.embracesdk.internal.payload.EnvelopeMetadata
import io.embrace.android.embracesdk.internal.payload.EnvelopeResource
import io.embrace.android.embracesdk.internal.payload.LogPayload
import io.embrace.android.embracesdk.internal.payload.NativeCrashData
import io.embrace.android.embracesdk.internal.payload.SessionPayload
import io.embrace.android.embracesdk.internal.payload.Span
Expand All @@ -26,28 +33,31 @@ import io.embrace.android.embracesdk.internal.spans.findAttributeValue
import io.embrace.android.embracesdk.internal.spans.hasFixedAttribute
import io.embrace.android.embracesdk.internal.utils.Provider
import io.opentelemetry.semconv.incubating.SessionIncubatingAttributes
import java.util.Locale
import java.util.zip.GZIPInputStream
import kotlin.math.max

internal class PayloadResurrectionServiceImpl(
private val intakeService: IntakeService,
private val cacheStorageService: PayloadStorageService,
private val cachedLogEnvelopeStore: CachedLogEnvelopeStore,
private val logger: EmbLogger,
private val serializer: PlatformSerializer,
) : PayloadResurrectionService {

override fun resurrectOldPayloads(nativeCrashServiceProvider: Provider<NativeCrashService?>) {
val nativeCrashService = nativeCrashServiceProvider()
val undeliveredPayloads = cacheStorageService.getUndeliveredPayloads()
val payloadsToResurrect = undeliveredPayloads.filterNot { it.isCrashEnvelope() }
val nativeCrashes = nativeCrashService?.getNativeCrashes()?.associateBy { it.sessionId } ?: emptyMap()
val processedCrashes = mutableSetOf<NativeCrashData>()

undeliveredPayloads.forEach { payload ->
payloadsToResurrect.forEach { payload ->
val result = runCatching {
payload.processUndeliveredPayload(
nativeCrashService = nativeCrashService,
nativeCrashProvider = nativeCrashes::get,
postNativeCrashCallback = processedCrashes::add,
postNativeCrashProcessingCallback = processedCrashes::add,
)
}

Expand All @@ -67,21 +77,81 @@ internal class PayloadResurrectionServiceImpl(
}

if (nativeCrashService != null) {
nativeCrashes.values.filterNot { processedCrashes.contains(it) }.forEach { nativeCrash ->
nativeCrashService.sendNativeCrash(
nativeCrash = nativeCrash,
sessionProperties = emptyMap(),
metadata = emptyMap()
)
// We assume that there can ever only be one cached crash envelope and one sessionless native crash
// Internal errors will be logged if that assumption is not true, as we currently don't store enough
// metadata in the native crash to determine which app instance it came from if it isn't associated with
// a session.
//
// This assumption would be incorrect if a native crash happens during startup, before a session is created,
// and before the payload resurrection phase of the SDK startup has completed. This seems pretty rare.
//
// Solving this requires the persistence of the processIdentifier, and we will only do this if this
// proves to be a problem in production.

val sessionlessNativeCrashes = nativeCrashes.values.filterNot { processedCrashes.contains(it) }
if (sessionlessNativeCrashes.isNotEmpty()) {
val cachedCrashEnvelopeMetadata = undeliveredPayloads.firstOrNull { it.isCrashEnvelope() }
val cachedCrashEnvelope = if (cachedCrashEnvelopeMetadata != null) {
runCatching {
serializer.fromJson<Envelope<LogPayload>>(
inputStream = GZIPInputStream(
cacheStorageService.loadPayloadAsStream(cachedCrashEnvelopeMetadata)
),
type = SupportedEnvelopeType.CRASH.serializedType
).also {
cacheStorageService.delete(cachedCrashEnvelopeMetadata)
}
}.getOrNull()
} else {
null
}
val resource = cachedCrashEnvelope?.resource
val metadata = cachedCrashEnvelope?.metadata
sessionlessNativeCrashes.forEach { nativeCrash ->
if (resource != null && metadata != null) {
cachedLogEnvelopeStore.create(
storedTelemetryMetadata = createNativeCrashEnvelopeMetadata(
sessionId = nativeCrash.sessionId
),
resource = resource,
metadata = metadata
)
} else {
logger.trackInternalError(
type = InternalErrorType.NATIVE_CRASH_RESURRECTION_ERROR,
throwable = IllegalStateException("Cached native crash envelope data not found")
)
}
nativeCrashService.sendNativeCrash(
nativeCrash = nativeCrash,
sessionProperties = emptyMap(),
metadata = mapOf(
embState.attributeKey to ApplicationState.BACKGROUND.name.lowercase(Locale.ENGLISH)
),
)
}
if (sessionlessNativeCrashes.size > 1) {
logger.trackInternalError(
type = InternalErrorType.NATIVE_CRASH_RESURRECTION_ERROR,
throwable = IllegalStateException("Multiple sessionless native crashes found.")
)
}
}
nativeCrashService.deleteAllNativeCrashes()
}

undeliveredPayloads.filter { it.isCrashEnvelope() }.forEach { crashEnvelopeMetadata ->
cacheStorageService.delete(crashEnvelopeMetadata)
}
cachedLogEnvelopeStore.clear()
}

private fun StoredTelemetryMetadata.isCrashEnvelope() = envelopeType == SupportedEnvelopeType.CRASH

private fun StoredTelemetryMetadata.processUndeliveredPayload(
nativeCrashService: NativeCrashService?,
nativeCrashProvider: (String) -> NativeCrashData?,
postNativeCrashCallback: (NativeCrashData) -> Unit,
postNativeCrashProcessingCallback: (NativeCrashData) -> Unit,
) {
val resurrectedPayload = when (envelopeType) {
SupportedEnvelopeType.SESSION -> {
Expand All @@ -92,18 +162,33 @@ internal class PayloadResurrectionServiceImpl(

val sessionId = deadSession.getSessionId()
val appState = deadSession.getSessionSpan()?.attributes?.findAttributeValue(embState.name)
val nativeCrash = if (sessionId != null) {
val nativeCrash = if (nativeCrashService != null && sessionId != null) {
nativeCrashProvider(sessionId)?.apply {
postNativeCrashCallback(this)
nativeCrashService?.sendNativeCrash(
val nativeCrashEnvelopeMetadata = createNativeCrashEnvelopeMetadata(
sessionId = sessionId,
processIdentifier = processId
)

cachedLogEnvelopeStore.create(
storedTelemetryMetadata = nativeCrashEnvelopeMetadata,
resource = deadSession.resource ?: EnvelopeResource(),
metadata = deadSession.metadata ?: EnvelopeMetadata()
)

nativeCrashService.sendNativeCrash(
nativeCrash = this,
sessionProperties = deadSession.getSessionProperties(),
metadata = if (appState != null) {
mapOf(embState.attributeKey to appState)
mapOf(
embState.attributeKey to appState,
embProcessIdentifier.attributeKey to processId
)
} else {
emptyMap()
}
},
)

postNativeCrashProcessingCallback(this)
}
} else {
null
Expand Down
Loading

0 comments on commit 5be222f

Please sign in to comment.