Skip to content

Commit 289871a

Browse files
committed
Merge branch 'main' into enable-self-instrumentation-tracing
2 parents 25e7663 + 6c70484 commit 289871a

File tree

8 files changed

+97
-20
lines changed

8 files changed

+97
-20
lines changed

CHANGELOG.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// tag::list[]
2+
* <<apm-release-notes-8.16>>
23
* <<apm-release-notes-8.15>>
34
* <<apm-release-notes-8.14>>
45
* <<apm-release-notes-8.13>>
@@ -19,6 +20,7 @@
1920
2021
// tag::includes[]
2122
include::./changelogs/head.asciidoc[]
23+
include::./changelogs/8.16.asciidoc[]
2224
include::./changelogs/8.15.asciidoc[]
2325
include::./changelogs/8.14.asciidoc[]
2426
include::./changelogs/8.13.asciidoc[]

changelogs/8.16.asciidoc

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
[[apm-release-notes-8.16]]
2+
== APM version 8.16
3+
* <<apm-release-notes-8.16.0>>
4+
5+
[float]
6+
[[apm-release-notes-8.16.0]]
7+
=== APM version 8.16.0
8+
9+
https://github.com/elastic/apm-server/compare/v8.15.2\...v8.16.0[View commits]
10+
11+
[float]
12+
==== Bug fixes
13+
14+
- Track all bulk request response status codes {pull}13574[13574]
15+
- Fix a concurrent map write panic in monitoring middleware {pull}14335[14335]
16+
- Apply shutdown timeout to http server {pull}14339[14339]
17+
- Tail-based sampling: Fix rare gc thread failure after EA hot reload causing storage not reclaimed and stuck with "storage limit reached" {pull}13574[13574]
18+
19+
[float]
20+
==== Breaking Changes
21+
22+
[float]
23+
==== Deprecations
24+
- Support for Jaeger is now deprecated, and will be removed in a future release {pull}13809[13809]
25+
26+
[float]
27+
==== Intake API Changes
28+
29+
[float]
30+
==== Added
31+
32+
- APM Server will no longer retry an HTTP request that returned 502s, 503s, 504s. It will only retry 429s. {pull}13523[13523]
33+
- APM Server now supports emitting distributed tracing for its own operation when running under Elastic Agent, and adds support for configuring a sampling rate {pull}14231[14231]

changelogs/head.asciidoc

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,16 @@
11
[[release-notes-head]]
22
== APM version HEAD
33

4-
https://github.com/elastic/apm-server/compare/8.15\...main[View commits]
5-
6-
[float]
7-
==== Bug fixes
8-
9-
- Track all bulk request response status codes {pull}13574[13574]
10-
- Fix a concurrent map write panic in monitoring middleware {pull}14335[14335]
11-
- Apply shutdown timeout to http server {pull}14339[14339]
4+
https://github.com/elastic/apm-server/compare/8.16\...8.x[View commits]
125

136
[float]
147
==== Breaking Changes
158

169
[float]
1710
==== Deprecations
18-
- Support for Jaeger is now deprecated, and will be removed in a future release {pull}13809[13809]
1911

2012
[float]
2113
==== Intake API Changes
2214

2315
[float]
2416
==== Added
25-
26-
- APM Server will no longer retry an HTTP request that returned 502s, 503s, 504s. It will only retry 429s. {pull}13523[13523]
27-
- APM Server now supports emitting distributed tracing for its own operation when running under Elastic Agent, and adds support for configuring a sampling rate {pull}14231[14231]

systemtest/go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ require (
1414
github.com/hashicorp/go-multierror v1.1.1
1515
github.com/jaegertracing/jaeger v1.62.0
1616
github.com/stretchr/testify v1.9.0
17-
github.com/testcontainers/testcontainers-go v0.33.0
17+
github.com/testcontainers/testcontainers-go v0.34.0
1818
github.com/tidwall/gjson v1.18.0
1919
go.elastic.co/apm/v2 v2.6.2
2020
go.elastic.co/fastjson v1.4.0
@@ -47,7 +47,7 @@ require (
4747
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
4848
github.com/containerd/log v0.1.0 // indirect
4949
github.com/containerd/platforms v0.2.1 // indirect
50-
github.com/cpuguy83/dockercfg v0.3.1 // indirect
50+
github.com/cpuguy83/dockercfg v0.3.2 // indirect
5151
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
5252
github.com/distribution/reference v0.6.0 // indirect
5353
github.com/docker/go-units v0.5.0 // indirect

systemtest/go.sum

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
1717
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
1818
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
1919
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
20-
github.com/cpuguy83/dockercfg v0.3.1 h1:/FpZ+JaygUR/lZP2NlFI2DVfrOEMAIKP5wWEJdoYe9E=
21-
github.com/cpuguy83/dockercfg v0.3.1/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
20+
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
21+
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
2222
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
2323
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
2424
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -143,6 +143,7 @@ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs
143143
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
144144
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
145145
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
146+
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
146147
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
147148
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
148149
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -151,8 +152,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
151152
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
152153
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
153154
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
154-
github.com/testcontainers/testcontainers-go v0.33.0 h1:zJS9PfXYT5O0ZFXM2xxXfk4J5UMw/kRiISng037Gxdw=
155-
github.com/testcontainers/testcontainers-go v0.33.0/go.mod h1:W80YpTa8D5C3Yy16icheD01UTDu+LmXIA2Keo+jWtT8=
155+
github.com/testcontainers/testcontainers-go v0.34.0 h1:5fbgF0vIN5u+nD3IWabQwRybuB4GY8G2HHgCkbMzMHo=
156+
github.com/testcontainers/testcontainers-go v0.34.0/go.mod h1:6P/kMkQe8yqPHfPWNulFGdFHTD8HB2vLq/231xY2iPQ=
156157
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
157158
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
158159
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=

testing/apmsoak/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ terraform.tfvars:
1414

1515
.PHONY: use-production
1616
use-production:
17-
@terraform workspace select production || terraform worksapce new production
17+
@terraform workspace select production || terraform workspace new production
1818

1919
.PHONY: apmsoak
2020
apmsoak:

x-pack/apm-server/sampling/processor.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ const (
4040
shutdownGracePeriod = 5 * time.Second
4141
)
4242

43+
var (
44+
// gcCh works like a global mutex to protect gc from running concurrently when 2 TBS processors are active during a hot reload
45+
gcCh = make(chan struct{}, 1)
46+
)
47+
4348
// Processor is a tail-sampling event processor.
4449
type Processor struct {
4550
config Config
@@ -386,6 +391,16 @@ func (p *Processor) Run() error {
386391
}
387392
})
388393
g.Go(func() error {
394+
// Protect this goroutine from running concurrently when 2 TBS processors are active
395+
// as badger GC is not concurrent safe.
396+
select {
397+
case <-p.stopping:
398+
return nil
399+
case gcCh <- struct{}{}:
400+
}
401+
defer func() {
402+
<-gcCh
403+
}()
389404
// This goroutine is responsible for periodically garbage
390405
// collecting the Badger value log, using the recommended
391406
// discard ratio of 0.5.
@@ -411,7 +426,9 @@ func (p *Processor) Run() error {
411426
})
412427
g.Go(func() error {
413428
// Subscribe to remotely sampled trace IDs. This is cancelled immediately when
414-
// Stop is called. The next subscriber will pick up from the previous position.
429+
// Stop is called. But it is possible that both old and new subscriber goroutines
430+
// run concurrently, before the old one eventually receives the Stop call.
431+
// The next subscriber will pick up from the previous position.
415432
defer close(remoteSampledTraceIDs)
416433
defer close(subscriberPositions)
417434
ctx, cancel := context.WithCancel(context.Background())
@@ -558,7 +575,13 @@ func (p *Processor) Run() error {
558575
return nil
559576
}
560577

578+
// subscriberPositionFileMutex protects the subscriber file from concurrent RW, in case of hot reload.
579+
var subscriberPositionFileMutex sync.Mutex
580+
561581
func readSubscriberPosition(logger *logp.Logger, storageDir string) (pubsub.SubscriberPosition, error) {
582+
subscriberPositionFileMutex.Lock()
583+
defer subscriberPositionFileMutex.Unlock()
584+
562585
var pos pubsub.SubscriberPosition
563586
data, err := os.ReadFile(filepath.Join(storageDir, subscriberPositionFile))
564587
if errors.Is(err, os.ErrNotExist) {
@@ -579,6 +602,9 @@ func writeSubscriberPosition(storageDir string, pos pubsub.SubscriberPosition) e
579602
if err != nil {
580603
return err
581604
}
605+
606+
subscriberPositionFileMutex.Lock()
607+
defer subscriberPositionFileMutex.Unlock()
582608
return os.WriteFile(filepath.Join(storageDir, subscriberPositionFile), data, 0644)
583609
}
584610

x-pack/apm-server/sampling/processor_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/pkg/errors"
2323
"github.com/stretchr/testify/assert"
2424
"github.com/stretchr/testify/require"
25+
"golang.org/x/sync/errgroup"
2526
"google.golang.org/protobuf/testing/protocmp"
2627

2728
"github.com/elastic/apm-data/model/modelpb"
@@ -668,6 +669,31 @@ func TestStorageGC(t *testing.T) {
668669
t.Fatal("timed out waiting for value log garbage collection")
669670
}
670671

672+
func TestStorageGCConcurrency(t *testing.T) {
673+
// This test ensures that TBS processor does not return an error
674+
// even when run concurrently e.g. in hot reload
675+
if testing.Short() {
676+
t.Skip("skipping slow test")
677+
}
678+
679+
config := newTempdirConfig(t)
680+
config.TTL = 10 * time.Millisecond
681+
config.FlushInterval = 10 * time.Millisecond
682+
config.StorageGCInterval = 10 * time.Millisecond
683+
684+
g := errgroup.Group{}
685+
for i := 0; i < 2; i++ {
686+
processor, err := sampling.NewProcessor(config)
687+
require.NoError(t, err)
688+
g.Go(processor.Run)
689+
go func() {
690+
time.Sleep(time.Second)
691+
assert.NoError(t, processor.Stop(context.Background()))
692+
}()
693+
}
694+
assert.NoError(t, g.Wait())
695+
}
696+
671697
func TestStorageLimit(t *testing.T) {
672698
// This test ensures that when tail sampling is configured with a hard
673699
// storage limit, the limit is respected once the size is available.

0 commit comments

Comments
 (0)