@@ -35,7 +35,9 @@ import (
3535 "github.com/cockroachdb/cockroach/pkg/testutils"
3636 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
3737 "github.com/cockroachdb/cockroach/pkg/ts/tspb"
38+ "github.com/cockroachdb/cockroach/pkg/util/randutil"
3839 "github.com/cockroachdb/cockroach/pkg/util/retry"
40+ "github.com/cockroachdb/cockroach/pkg/util/syncutil"
3941 "github.com/cockroachdb/cockroach/pkg/util/timeutil"
4042 "github.com/cockroachdb/errors"
4143 "github.com/jackc/pgtype"
@@ -76,8 +78,33 @@ func registerFollowerReads(r registry.Registry) {
7678 survival : survival ,
7779 deadPrimaryRegion : insufficientQuorum ,
7880 }
79- data := initFollowerReadsDB (ctx , t , c , topology )
80- runFollowerReadsTest (ctx , t , c , topology , rc , data )
81+ conns := struct {
82+ mu syncutil.Mutex
83+ mapping map [int ]* gosql.DB
84+ }{
85+ mapping : make (map [int ]* gosql.DB ),
86+ }
87+ connFunc := func (node int ) * gosql.DB {
88+ conns .mu .Lock ()
89+ defer conns .mu .Unlock ()
90+
91+ if _ , ok := conns .mapping [node ]; ! ok {
92+ conn := c .Conn (ctx , t .L (), node )
93+ conns .mapping [node ] = conn
94+ }
95+
96+ return conns .mapping [node ]
97+ }
98+
99+ defer func () {
100+ for _ , c := range conns .mapping {
101+ c .Close ()
102+ }
103+ }()
104+
105+ rng , _ := randutil .NewPseudoRand ()
106+ data := initFollowerReadsDB (ctx , t , t .L (), c , connFunc , connFunc , rng , topology )
107+ runFollowerReadsTest (ctx , t , t .L (), c , rng , topology , rc , data )
81108 },
82109 })
83110 }
@@ -185,7 +212,9 @@ type topologySpec struct {
185212func runFollowerReadsTest (
186213 ctx context.Context ,
187214 t test.Test ,
215+ l * logger.Logger ,
188216 c cluster.Cluster ,
217+ rng * rand.Rand ,
189218 topology topologySpec ,
190219 rc readConsistency ,
191220 data map [int ]int64 ,
@@ -196,7 +225,7 @@ func runFollowerReadsTest(
196225 // levels in the mixed-version variant of this test than they are on master.
197226 isoLevels := []string {"read committed" , "snapshot" , "serializable" }
198227 require .NoError (t , func () error {
199- db := c .Conn (ctx , t . L () , 1 )
228+ db := c .Conn (ctx , l , 1 )
200229 defer db .Close ()
201230 err := enableIsolationLevels (ctx , t , db )
202231 if err != nil && strings .Contains (err .Error (), "unknown cluster setting" ) {
@@ -209,9 +238,8 @@ func runFollowerReadsTest(
209238
210239 var conns []* gosql.DB
211240 for i := 0 ; i < c .Spec ().NodeCount ; i ++ {
212- isoLevel := isoLevels [rand .Intn (len (isoLevels ))]
213- isoLevelOpt := option .ConnectionOption ("default_transaction_isolation" , isoLevel )
214- conn := c .Conn (ctx , t .L (), i + 1 , isoLevelOpt )
241+ isoLevel := isoLevels [rng .Intn (len (isoLevels ))]
242+ conn := c .Conn (ctx , l , i + 1 , option .ConnectionOption ("default_transaction_isolation" , isoLevel ))
215243 defer conn .Close ()
216244 conns = append (conns , conn )
217245 }
@@ -299,10 +327,7 @@ func runFollowerReadsTest(
299327 ),
300328 )
301329 if err != nil {
302- // 20.2 doesn't have this setting.
303- if ! strings .Contains (err .Error (), "unknown cluster setting" ) {
304- t .Fatal (err )
305- }
330+ t .Fatal (err )
306331 }
307332
308333 // Read the follower read counts before issuing the follower reads to observe
@@ -317,7 +342,7 @@ func runFollowerReadsTest(
317342 // Perform reads on each node and ensure we get the expected value. Do so for
318343 // 15 seconds to give closed timestamps a chance to propagate and caches time
319344 // to warm up.
320- t . L () .Printf ("warming up reads" )
345+ l .Printf ("warming up reads" )
321346 g , gCtx := errgroup .WithContext (ctx )
322347 k , v := chooseKV ()
323348 until := timeutil .Now ().Add (15 * time .Second )
@@ -361,19 +386,19 @@ func runFollowerReadsTest(
361386 if topology .deadPrimaryRegion && i <= 3 {
362387 stopOpts := option .DefaultStopOpts ()
363388 stopOpts .RoachprodOpts .Sig = 9
364- c .Stop (ctx , t . L () , stopOpts , c .Node (i ))
389+ c .Stop (ctx , l , stopOpts , c .Node (i ))
365390 deadNodes [i ] = struct {}{}
366391 } else {
367392 liveNodes [i ] = struct {}{}
368393 }
369394 }
370395
371- t . L () .Printf ("starting read load" )
396+ l .Printf ("starting read load" )
372397 const loadDuration = 4 * time .Minute
373398 timeoutCtx , cancel := context .WithCancel (ctx )
374399 defer cancel ()
375400 time .AfterFunc (loadDuration , func () {
376- t . L () .Printf ("stopping load" )
401+ l .Printf ("stopping load" )
377402 cancel ()
378403 })
379404 g , gCtx = errgroup .WithContext (timeoutCtx )
@@ -392,7 +417,7 @@ func runFollowerReadsTest(
392417 t .Fatalf ("error reading data: %v" , err )
393418 }
394419 end := timeutil .Now ()
395- t . L () .Printf ("load stopped" )
420+ l .Printf ("load stopped" )
396421
397422 // Depending on the test's topology, we expect a different set of nodes to
398423 // perform follower reads.
@@ -422,39 +447,52 @@ func runFollowerReadsTest(
422447 expectedLowRatioNodes = 1
423448 }
424449 }
425- verifyHighFollowerReadRatios (ctx , t , c , liveNodes , start , end , expectedLowRatioNodes )
450+ verifyHighFollowerReadRatios (ctx , t , l , c , liveNodes , start , end , expectedLowRatioNodes )
426451
427452 if topology .multiRegion {
428453 // Perform a ts query to verify that the SQL latencies were well below the
429454 // WAN latencies which should be at least 50ms.
430455 //
431456 // We don't do this for singleRegion since, in a single region, there's no
432457 // low latency and high-latency regimes.
433- verifySQLLatency (ctx , t , c , liveNodes , start , end , maxLatencyThreshold )
458+ verifySQLLatency (ctx , t , l , c , liveNodes , start , end , maxLatencyThreshold )
434459 }
435460
436461 // Restart dead nodes, if necessary.
437462 for i := range deadNodes {
438- c .Start (ctx , t . L () , option .DefaultStartOpts (), install .MakeClusterSettings (), c .Node (i ))
463+ c .Start (ctx , l , option .DefaultStartOpts (), install .MakeClusterSettings (), c .Node (i ))
439464 }
440465}
441466
442467// initFollowerReadsDB initializes a database for the follower reads test.
443468// Returns the data inserted into the test table.
469+ //
470+ // The `connFunc` (and `systemConnFunc`) parameters capture how to
471+ // connect to the database in a test run. This allows us to use the
472+ // mixedersion helpers in mixed-version tests. We need to connect to
473+ // the system tenant to change relevant SystemOnly cluster settings,
474+ // and some mixed-version tests run in a multi-tenant deployment.
444475func initFollowerReadsDB (
445- ctx context.Context , t test.Test , c cluster.Cluster , topology topologySpec ,
476+ ctx context.Context ,
477+ t test.Test ,
478+ l * logger.Logger ,
479+ c cluster.Cluster ,
480+ connectFunc , systemConnectFunc func (int ) * gosql.DB ,
481+ rng * rand.Rand ,
482+ topology topologySpec ,
446483) (data map [int ]int64 ) {
447- db := c .Conn (ctx , t .L (), 1 )
448- defer db .Close ()
484+ systemDB := systemConnectFunc (1 )
485+ db := connectFunc (1 )
486+
449487 // Disable load based splitting and range merging because splits and merges
450488 // interfere with follower reads. This test's workload regularly triggers load
451489 // based splitting in the first phase creating small ranges which later
452490 // in the test are merged. The merging tends to coincide with the final phase
453491 // of the test which attempts to observe low latency reads leading to
454492 // flakiness.
455- _ , err := db .ExecContext (ctx , "SET CLUSTER SETTING kv.range_split.by_load_enabled = 'false'" )
493+ _ , err := systemDB .ExecContext (ctx , "SET CLUSTER SETTING kv.range_split.by_load_enabled = 'false'" )
456494 require .NoError (t , err )
457- _ , err = db .ExecContext (ctx , "SET CLUSTER SETTING kv.range_merge.queue_enabled = 'false'" )
495+ _ , err = systemDB .ExecContext (ctx , "SET CLUSTER SETTING kv.range_merge.queue_enabled = 'false'" )
458496 require .NoError (t , err )
459497
460498 // Check the cluster regions.
@@ -502,7 +540,7 @@ func initFollowerReadsDB(
502540 }
503541
504542 // Wait until the table has completed up-replication.
505- t . L () .Printf ("waiting for up-replication..." )
543+ l .Printf ("waiting for up-replication..." )
506544 retryOpts := retry.Options {MaxBackoff : 15 * time .Second }
507545 for r := retry .StartWithCtx (ctx , retryOpts ); r .Next (); {
508546 // Check that the table has the expected number and location of voting and
@@ -550,7 +588,7 @@ func initFollowerReadsDB(
550588 ctx , q1 , votersCount , nonVotersCount , pq .Array (votersSet ), pq .Array (nonVotersSet ),
551589 ).Scan (& ok , & voters , & nonVoters )
552590 if errors .Is (err , gosql .ErrNoRows ) {
553- t . L () .Printf ("up-replication not complete, missing range" )
591+ l .Printf ("up-replication not complete, missing range" )
554592 continue
555593 }
556594 require .NoError (t , err )
@@ -559,9 +597,9 @@ func initFollowerReadsDB(
559597 break
560598 }
561599
562- t . L () .Printf ("up-replication not complete, " +
563- "found voters = %v (want %d in set %v) and non_voters = %s (want %d in set %v)" ,
564- voters , votersCount , votersSet , nonVoters , votersCount , votersSet )
600+ l .Printf ("up-replication not complete, " +
601+ "found voters = %v (want %d in set %v) and non_voters = %v (want %d in set %v)" ,
602+ voters , votersCount , votersSet , nonVoters , nonVotersCount , nonVotersSet )
565603 }
566604
567605 if topology .multiRegion {
@@ -618,7 +656,7 @@ func initFollowerReadsDB(
618656 break
619657 }
620658
621- t . L () .Printf ("rebalancing not complete, expected %d at risk ranges, " +
659+ l .Printf ("rebalancing not complete, expected %d at risk ranges, " +
622660 "found %d" , expAtRisk , atRisk )
623661 }
624662 }
@@ -629,7 +667,7 @@ func initFollowerReadsDB(
629667 sem := make (chan struct {}, concurrency )
630668 data = make (map [int ]int64 )
631669 insert := func (ctx context.Context , k int ) func () error {
632- v := rand .Int63 ()
670+ v := rng .Int63 ()
633671 data [k ] = v
634672 return func () error {
635673 sem <- struct {}{}
@@ -671,6 +709,7 @@ func computeFollowerReadDuration(ctx context.Context, db *gosql.DB) (time.Durati
671709func verifySQLLatency (
672710 ctx context.Context ,
673711 t test.Test ,
712+ l * logger.Logger ,
674713 c cluster.Cluster ,
675714 liveNodes map [int ]struct {},
676715 start , end time.Time ,
@@ -682,13 +721,11 @@ func verifySQLLatency(
682721 adminNode = i
683722 break
684723 }
685- adminURLs , err := c .ExternalAdminUIAddr (ctx , t . L () , c .Node (adminNode ))
724+ adminURLs , err := c .ExternalAdminUIAddr (ctx , l , c .Node (adminNode ))
686725 if err != nil {
687726 t .Fatal (err )
688727 }
689- // follower-reads/mixed-version runs on insecure mode, so we need http.
690- // Tests that do run on secure mode will redirect to https.
691- url := "http://" + adminURLs [0 ] + "/ts/query"
728+ url := "https://" + adminURLs [0 ] + "/ts/query"
692729 var sources []string
693730 for i := range liveNodes {
694731 sources = append (sources , strconv .Itoa (i ))
@@ -704,7 +741,7 @@ func verifySQLLatency(
704741 SourceAggregator : tspb .TimeSeriesQueryAggregator_MAX .Enum (),
705742 }},
706743 }
707- client := roachtestutil .DefaultHTTPClient (c , t . L () )
744+ client := roachtestutil .DefaultHTTPClient (c , l )
708745 var response tspb.TimeSeriesQueryResponse
709746 if err := client .PostProtobuf (ctx , url , & request , & response ); err != nil {
710747 t .Fatal (err )
@@ -736,6 +773,7 @@ func verifySQLLatency(
736773func verifyHighFollowerReadRatios (
737774 ctx context.Context ,
738775 t test.Test ,
776+ l * logger.Logger ,
739777 c cluster.Cluster ,
740778 liveNodes map [int ]struct {},
741779 start , end time.Time ,
@@ -753,11 +791,10 @@ func verifyHighFollowerReadRatios(
753791 adminNode = i
754792 break
755793 }
756- adminURLs , err := c .ExternalAdminUIAddr (ctx , t . L () , c .Node (adminNode ))
794+ adminURLs , err := c .ExternalAdminUIAddr (ctx , l , c .Node (adminNode ))
757795 require .NoError (t , err )
758- // follower-reads/mixed-version runs on insecure mode, so we need http.
759- // Tests that do run on secure mode will redirect to https.
760- url := "http://" + adminURLs [0 ] + "/ts/query"
796+
797+ url := "https://" + adminURLs [0 ] + "/ts/query"
761798 request := tspb.TimeSeriesQueryRequest {
762799 StartNanos : start .UnixNano (),
763800 EndNanos : end .UnixNano (),
@@ -777,7 +814,11 @@ func verifyHighFollowerReadRatios(
777814 Derivative : tspb .TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE .Enum (),
778815 })
779816 }
780- client := roachtestutil .DefaultHTTPClient (c , t .L ())
817+ // Make sure to connect to the system tenant in case this test
818+ // is running on a multitenant deployment.
819+ client := roachtestutil .DefaultHTTPClient (
820+ c , l , roachtestutil .VirtualCluster (install .SystemInterfaceName ),
821+ )
781822 var response tspb.TimeSeriesQueryResponse
782823 if err := client .PostProtobuf (ctx , url , & request , & response ); err != nil {
783824 t .Fatal (err )
@@ -810,7 +851,7 @@ func verifyHighFollowerReadRatios(
810851 }
811852 }
812853
813- t . L () .Printf ("interval stats: %s" , intervalsToString (stats ))
854+ l .Printf ("interval stats: %s" , intervalsToString (stats ))
814855
815856 // Now count how many intervals have more than the tolerated number of nodes
816857 // with low follower read ratios.
@@ -865,10 +906,12 @@ func getFollowerReadCounts(ctx context.Context, t test.Test, c cluster.Cluster)
865906 if err != nil {
866907 return err
867908 }
868- // follower-reads/mixed-version runs on insecure mode, so we need http.
869- // Tests that do run on secure mode will redirect to https.
870- url := "http://" + adminUIAddrs [0 ] + "/_status/vars"
871- client := roachtestutil .DefaultHTTPClient (c , t .L ())
909+ url := "https://" + adminUIAddrs [0 ] + "/_status/vars"
910+ // Make sure to connect to the system tenant in case this test
911+ // is running on a multitenant deployment.
912+ client := roachtestutil .DefaultHTTPClient (
913+ c , t .L (), roachtestutil .VirtualCluster (install .SystemInterfaceName ),
914+ )
872915 resp , err := client .Get (ctx , url )
873916 if err != nil {
874917 return err
@@ -970,21 +1013,25 @@ func runFollowerReadsMixedVersionTest(
9701013 rc readConsistency ,
9711014 opts ... mixedversion.CustomOption ,
9721015) {
973- mvt := mixedversion .NewTest (ctx , t , t .L (), c , c .All (),
974- append ([]mixedversion.CustomOption {
975- // Multi-tenant deployments are currently unsupported. See #127378.
976- mixedversion .EnabledDeploymentModes (mixedversion .SystemOnlyDeployment ),
977- }, opts ... )... ,
978- )
1016+ mvt := mixedversion .NewTest (ctx , t , t .L (), c , c .All (), opts ... )
9791017
9801018 var data map [int ]int64
9811019 runInit := func (ctx context.Context , l * logger.Logger , r * rand.Rand , h * mixedversion.Helper ) error {
982- data = initFollowerReadsDB (ctx , t , c , topology )
1020+ if topology .multiRegion &&
1021+ h .IsMultitenant () &&
1022+ ! h .Context ().FromVersion .AtLeast (mixedversion .TenantsAndSystemAlignedSettingsVersion ) {
1023+ const setting = "sql.multi_region.allow_abstractions_for_secondary_tenants.enabled"
1024+ if err := setTenantSetting (l , r , h , setting , true ); err != nil {
1025+ return errors .Wrapf (err , "setting %s" , setting )
1026+ }
1027+ }
1028+
1029+ data = initFollowerReadsDB (ctx , t , l , c , h .Connect , h .System .Connect , r , topology )
9831030 return nil
9841031 }
9851032
9861033 runFollowerReads := func (ctx context.Context , l * logger.Logger , r * rand.Rand , h * mixedversion.Helper ) error {
987- runFollowerReadsTest (ctx , t , c , topology , rc , data )
1034+ runFollowerReadsTest (ctx , t , l , c , r , topology , rc , data )
9881035 return nil
9891036 }
9901037
0 commit comments