diff --git a/.gitignore b/.gitignore index 0413d89e7..91c25a7c5 100644 --- a/.gitignore +++ b/.gitignore @@ -56,5 +56,3 @@ __init__.pyc .cache/ .ipynb_checkpoints/ .vscode/ - - diff --git a/dialogue-core/build.gradle b/dialogue-core/build.gradle index 59d8e9aa8..27d4dfb57 100644 --- a/dialogue-core/build.gradle +++ b/dialogue-core/build.gradle @@ -16,6 +16,7 @@ dependencies { testCompile 'org.assertj:assertj-core' testCompile 'org.assertj:assertj-guava' testCompile 'org.mockito:mockito-core' + testRuntimeOnly 'org.slf4j:slf4j-simple' annotationProcessor 'org.immutables:value' testAnnotationProcessor 'org.immutables:value' diff --git a/dialogue-core/src/main/java/com/palantir/dialogue/core/ConcurrencyLimitedChannel.java b/dialogue-core/src/main/java/com/palantir/dialogue/core/ConcurrencyLimitedChannel.java index 97eb25a27..e381125c3 100644 --- a/dialogue-core/src/main/java/com/palantir/dialogue/core/ConcurrencyLimitedChannel.java +++ b/dialogue-core/src/main/java/com/palantir/dialogue/core/ConcurrencyLimitedChannel.java @@ -18,6 +18,7 @@ import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.LoadingCache; +import com.github.benmanes.caffeine.cache.Ticker; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.FutureCallback; @@ -42,6 +43,7 @@ */ final class ConcurrencyLimitedChannel implements LimitedChannel { private static final Void NO_CONTEXT = null; + private static final Ticker SYSTEM_NANOTIME = System::nanoTime; private final Channel delegate; private final LoadingCache> limiters; @@ -54,10 +56,11 @@ final class ConcurrencyLimitedChannel implements LimitedChannel { } static ConcurrencyLimitedChannel create(Channel delegate) { - return new ConcurrencyLimitedChannel(delegate, ConcurrencyLimitedChannel::createLimiter); + return new ConcurrencyLimitedChannel(delegate, () -> ConcurrencyLimitedChannel.createLimiter(SYSTEM_NANOTIME)); } - private static Limiter createLimiter() { + @VisibleForTesting + static Limiter createLimiter(Ticker nanoTimeClock) { AIMDLimit aimdLimit = AIMDLimit.newBuilder() // Explicitly set values to prevent library changes from breaking us .initialLimit(20) @@ -68,13 +71,15 @@ private static Limiter createLimiter() { .timeout(Long.MAX_VALUE, TimeUnit.DAYS) .build(); return SimpleLimiter.newBuilder() + .clock(nanoTimeClock::read) .limit(WindowedLimit.newBuilder().build(aimdLimit)) .build(); } @Override public Optional> maybeExecute(Endpoint endpoint, Request request) { - return limiters.get(endpoint).acquire(NO_CONTEXT).map(listener -> + Limiter limiter = limiters.get(endpoint); + return limiter.acquire(NO_CONTEXT).map(listener -> DialogueFutures.addDirectCallback(delegate.execute(endpoint, request), new LimiterCallback(listener))); } diff --git a/dialogue-core/src/main/java/com/palantir/dialogue/core/QueuedChannel.java b/dialogue-core/src/main/java/com/palantir/dialogue/core/QueuedChannel.java index 281a89e2a..c328e6dc5 100644 --- a/dialogue-core/src/main/java/com/palantir/dialogue/core/QueuedChannel.java +++ b/dialogue-core/src/main/java/com/palantir/dialogue/core/QueuedChannel.java @@ -27,6 +27,7 @@ import com.palantir.dialogue.Endpoint; import com.palantir.dialogue.Request; import com.palantir.dialogue.Response; +import com.palantir.logsafe.SafeArg; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.List; @@ -37,6 +38,8 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.atomic.AtomicInteger; import org.immutables.value.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A {@link Channel} that queues requests while the underlying {@link LimitedChannel} is unable to accept any new @@ -57,7 +60,7 @@ * TODO(jellis): record metrics for queue sizes, num requests in flight, time spent in queue, etc. */ final class QueuedChannel implements Channel { - + private static final Logger log = LoggerFactory.getLogger(QueuedChannel.class); private static final Executor DIRECT = MoreExecutors.directExecutor(); private final BlockingDeque queuedCalls; @@ -114,8 +117,13 @@ private void onCompletion() { * Try to schedule as many tasks as possible. Called when requests are submitted and when they complete. */ private void schedule() { + int numScheduled = 0; while (scheduleNextTask()) { - // Do nothing + numScheduled++; + } + + if (log.isDebugEnabled()) { + log.debug("Scheduled {} requests", SafeArg.of("numScheduled", numScheduled)); } } diff --git a/dialogue-core/src/test/java/com/palantir/dialogue/core/ConcurrencyLimitedChannelTest.java b/dialogue-core/src/test/java/com/palantir/dialogue/core/ConcurrencyLimitedChannelTest.java index 6fe155515..08eeb772b 100644 --- a/dialogue-core/src/test/java/com/palantir/dialogue/core/ConcurrencyLimitedChannelTest.java +++ b/dialogue-core/src/test/java/com/palantir/dialogue/core/ConcurrencyLimitedChannelTest.java @@ -23,6 +23,7 @@ import com.google.common.util.concurrent.SettableFuture; import com.netflix.concurrency.limits.Limiter; +import com.netflix.concurrency.limits.limiter.SimpleLimiter; import com.palantir.dialogue.Channel; import com.palantir.dialogue.Endpoint; import com.palantir.dialogue.Request; @@ -47,7 +48,7 @@ public class ConcurrencyLimitedChannelTest { private Channel delegate; @Mock - private Limiter limiter; + private SimpleLimiter limiter; @Mock private Limiter.Listener listener; diff --git a/settings.gradle b/settings.gradle index ce02a0370..21e1c271f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -6,3 +6,4 @@ include 'dialogue-java-client' include 'dialogue-okhttp-client' include 'dialogue-serde' include 'dialogue-target' +include 'simulation' diff --git a/simulation/build.gradle b/simulation/build.gradle new file mode 100644 index 000000000..9a5589350 --- /dev/null +++ b/simulation/build.gradle @@ -0,0 +1,24 @@ +versionsLock { + testProject() +} + +dependencies { + implementation project(':dialogue-core') + implementation project(':dialogue-target') + implementation 'org.jmock:jmock' + implementation 'org.knowm.xchart:xchart' + + testImplementation 'com.palantir.safe-logging:preconditions-assertj' + testImplementation 'junit:junit' + testImplementation 'org.assertj:assertj-core' + testImplementation 'org.mockito:mockito-core' + + testRuntimeOnly 'org.slf4j:slf4j-simple' + + annotationProcessor 'org.immutables:value' + compile 'org.immutables:value::annotations' +} + +tasks.withType(JavaCompile) { + options.errorprone.errorproneArgs += '-Xep:Slf4jLogsafeArgs:OFF' +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/Benchmark.java b/simulation/src/main/java/com/palantir/dialogue/core/Benchmark.java new file mode 100644 index 000000000..53228d5f6 --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/Benchmark.java @@ -0,0 +1,263 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.codahale.metrics.Snapshot; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.MoreExecutors; +import com.google.common.util.concurrent.SettableFuture; +import com.palantir.dialogue.Channel; +import com.palantir.dialogue.Endpoint; +import com.palantir.dialogue.Request; +import com.palantir.dialogue.Response; +import com.palantir.logsafe.Preconditions; +import java.time.Duration; +import java.util.Map; +import java.util.Random; +import java.util.TreeMap; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.LongStream; +import java.util.stream.Stream; +import org.immutables.value.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Exercises the given {@link Channel} using a pre-defined number of requests, all scheduled on the + * {@link Simulation} executor. + */ +public final class Benchmark { + private static final Logger log = LoggerFactory.getLogger(Benchmark.class); + static final String REQUEST_ID_HEADER = "simulation-req-id"; + + private Simulation simulation; + private Channel channelUnderTest; + private Duration delayBetweenRequests; + private Stream requestStream; + private Function requestSupplier = Benchmark::constructRequest; + private ShouldStopPredicate benchmarkFinished; + + private Benchmark() {} + + static Benchmark builder() { + return new Benchmark(); + } + + public Benchmark requestsPerSecond(int rps) { + delayBetweenRequests = Duration.ofSeconds(1).dividedBy(rps); + return this; + } + + public Benchmark numRequests(long numRequests) { + Preconditions.checkState(requestStream == null, "Already set up requests"); + requestStream = infiniteRequests(delayBetweenRequests).limit(numRequests); + stopWhenNumReceived(numRequests); + return this; + } + + public Benchmark sendUntil(Duration cutoff) { + Preconditions.checkState(requestStream == null, "Already set up requests"); + long num = cutoff.toNanos() / delayBetweenRequests.toNanos(); + return numRequests(num); + } + + public Benchmark randomEndpoints(Endpoint... endpoints) { + return endpoints(true, endpoints); + } + + public Benchmark endpoints(boolean randomize, Endpoint... endpoints) { + Preconditions.checkNotNull(requestStream, "Must call sendUntil or numRequests first"); + Random pseudoRandom = new Random(21876781263L); + requestStream = requestStream.map(req -> { + int index = randomize ? pseudoRandom.nextInt(endpoints.length) : (int) (req.number() % endpoints.length); + return ImmutableScheduledRequest.builder() + .from(req) + .endpoint(endpoints[index]) + .build(); + }); + return this; + } + + public Benchmark simulation(Simulation sim) { + simulation = sim; + return this; + } + + public Benchmark channel(Channel value) { + channelUnderTest = value; + return this; + } + + private Benchmark stopWhenNumReceived(long numReceived) { + SettableFuture future = SettableFuture.create(); + benchmarkFinished = new ShouldStopPredicate() { + @Override + public SettableFuture getFuture() { + return future; + } + + @Override + public void update(Duration _time, long _requestsStarted, long responsesReceived) { + if (responsesReceived >= numReceived) { + future.set(null); + } + } + }; + return this; + } + + @SuppressWarnings("FutureReturnValueIgnored") + public Benchmark abortAfter(Duration cutoff) { + simulation.schedule( + () -> { + benchmarkFinished.getFuture().set(null); + }, + cutoff.toNanos(), + TimeUnit.NANOSECONDS); + return this; + } + + public BenchmarkResult run() { + ListenableFuture result = schedule(); + simulation.runClockToInfinity(); + return Futures.getUnchecked(result); + } + + @SuppressWarnings("FutureReturnValueIgnored") + public ListenableFuture schedule() { + HistogramChannel histogramChannel = new HistogramChannel(simulation, channelUnderTest); + + long[] requestsStarted = {0}; + long[] responsesReceived = {0}; + Map statusCodes = new TreeMap<>(); + + requestStream.forEach(req -> { + FutureCallback accumulateStatusCodes = new FutureCallback() { + @Override + public void onSuccess(Response response) { + statusCodes.compute(Integer.toString(response.code()), (c, num) -> num == null ? 1 : num + 1); + } + + @Override + public void onFailure(Throwable throwable) { + log.warn("Benchmark onFailure requestNum={}", req.number(), throwable); + statusCodes.compute(throwable.getMessage(), (c, num) -> num == null ? 1 : num + 1); + } + }; + + simulation.schedule( + () -> { + log.debug( + "time={} starting num={} {}", simulation.clock().read(), req.number(), req); + ListenableFuture future = histogramChannel.execute(req.endpoint(), req.request()); + requestsStarted[0] += 1; + + Futures.addCallback(future, accumulateStatusCodes, MoreExecutors.directExecutor()); + future.addListener( + () -> { + responsesReceived[0] += 1; + benchmarkFinished.update( + Duration.ofNanos(simulation.clock().read()), + requestsStarted[0], + responsesReceived[0]); + }, + MoreExecutors.directExecutor()); + }, + req.sendTime().toNanos(), + TimeUnit.NANOSECONDS); + }); + + benchmarkFinished.getFuture().addListener(simulation.metrics()::report, MoreExecutors.directExecutor()); + + return Futures.transform( + benchmarkFinished.getFuture(), + v -> ImmutableBenchmarkResult.builder() + .clientHistogram(histogramChannel.getHistogram().getSnapshot()) + .endTime(Duration.ofNanos(simulation.clock().read())) + .statusCodes(statusCodes) + .successPercentage( + Math.round(statusCodes.getOrDefault("200", 0) * 1000d / requestsStarted[0]) / 10d) + .numSent(requestsStarted[0]) + .numReceived(responsesReceived[0]) + .build(), + MoreExecutors.directExecutor()); + } + + private Stream infiniteRequests(Duration interval) { + return LongStream.iterate(0, current -> current + 1).mapToObj(number -> { + return ImmutableScheduledRequest.builder() + .number(number) + .request(requestSupplier.apply(number)) + .sendTime(interval.multipliedBy(number)) + .build(); + }); + } + + @Value.Immutable + interface BenchmarkResult { + Snapshot clientHistogram(); + + Duration endTime(); + + Map statusCodes(); + + double successPercentage(); + + long numSent(); + + long numReceived(); + } + + /** + * Determines when the benchmark terminates - useful when a server is behaving like a black hole (not returning). + */ + interface ShouldStopPredicate { + /** Called once to set up a future - when this resolves, the benchmark will stop. */ + SettableFuture getFuture(); + + /** + * Called after every request to give this predicate the opportunity to terminate the benchmark by + * resolving the SettableFuture. + */ + void update(Duration time, long requestsStarted, long responsesReceived); + } + + @Value.Immutable + interface ScheduledRequest { + Endpoint ENDPOINT = SimulationUtils.endpoint("endpoint"); + + long number(); + + Duration sendTime(); + + Request request(); + + @Value.Default + default Endpoint endpoint() { + return ENDPOINT; + } + } + + private static Request constructRequest(long number) { + return Request.builder() + .putHeaderParams(REQUEST_ID_HEADER, Long.toString(number)) + .build(); + } +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/CodahaleClock.java b/simulation/src/main/java/com/palantir/dialogue/core/CodahaleClock.java new file mode 100644 index 000000000..9a39f2d34 --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/CodahaleClock.java @@ -0,0 +1,41 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.codahale.metrics.Clock; +import com.github.benmanes.caffeine.cache.Ticker; +import com.google.common.annotations.VisibleForTesting; +import java.util.concurrent.TimeUnit; + +@VisibleForTesting +final class CodahaleClock extends Clock { + private final Ticker caffeineTicker; + + CodahaleClock(Ticker caffeineTicker) { + this.caffeineTicker = caffeineTicker; + } + + @Override + public long getTick() { + return caffeineTicker.read(); // effectively System.nanoTime() + } + + @Override + public long getTime() { + return TimeUnit.MILLISECONDS.convert(getTick(), TimeUnit.NANOSECONDS); + } +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/EventMarkers.java b/simulation/src/main/java/com/palantir/dialogue/core/EventMarkers.java new file mode 100644 index 000000000..67470da74 --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/EventMarkers.java @@ -0,0 +1,40 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.github.benmanes.caffeine.cache.Ticker; +import java.util.Collections; +import java.util.Map; +import java.util.TreeMap; + +/** Purely cosmetic - to make our graphs easier to understand. */ +public final class EventMarkers { + private final Ticker clock; + private final Map events = new TreeMap<>(); + + public EventMarkers(Ticker clock) { + this.clock = clock; + } + + void event(String string) { + events.put(clock.read(), string); + } + + public Map getEvents() { + return Collections.unmodifiableMap(events); + } +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/HistogramChannel.java b/simulation/src/main/java/com/palantir/dialogue/core/HistogramChannel.java new file mode 100644 index 000000000..e5e24ddb7 --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/HistogramChannel.java @@ -0,0 +1,52 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.SlidingTimeWindowArrayReservoir; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.MoreExecutors; +import com.palantir.dialogue.Channel; +import com.palantir.dialogue.Endpoint; +import com.palantir.dialogue.Request; +import com.palantir.dialogue.Response; +import java.util.concurrent.TimeUnit; + +final class HistogramChannel implements Channel { + private final Simulation simulation; + private final Histogram histogram; + private final Channel channel; + + HistogramChannel(Simulation simulation, Channel channel) { + this.simulation = simulation; + this.channel = channel; + histogram = new Histogram(new SlidingTimeWindowArrayReservoir(1, TimeUnit.DAYS, simulation.codahaleClock())); + } + + /** Unit is nanos. */ + public Histogram getHistogram() { + return histogram; + } + + @Override + public ListenableFuture execute(Endpoint endpoint, Request request) { + long start = simulation.clock().read(); + ListenableFuture future = channel.execute(endpoint, request); + future.addListener(() -> histogram.update(simulation.clock().read() - start), MoreExecutors.directExecutor()); + return future; + } +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/Simulation.java b/simulation/src/main/java/com/palantir/dialogue/core/Simulation.java new file mode 100644 index 000000000..de7395221 --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/Simulation.java @@ -0,0 +1,114 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.github.benmanes.caffeine.cache.Ticker; +import com.google.common.util.concurrent.ListenableScheduledFuture; +import com.google.common.util.concurrent.ListeningScheduledExecutorService; +import com.google.common.util.concurrent.MoreExecutors; +import java.time.Duration; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; +import org.jmock.lib.concurrent.DeterministicScheduler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Combined ScheduledExecutorService and Clock. All tasks get executed on the main thread. */ +final class Simulation { + private static final Logger log = LoggerFactory.getLogger(Simulation.class); + + private final DeterministicScheduler deterministicExecutor = new DeterministicScheduler(); + private final ListeningScheduledExecutorService listenableExecutor = + MoreExecutors.listeningDecorator(deterministicExecutor); + private final TestCaffeineTicker ticker = new TestCaffeineTicker(); + private final SimulationMetrics metrics = new SimulationMetrics(this); + private final CodahaleClock codahaleClock = new CodahaleClock(ticker); + private final EventMarkers eventMarkers = new EventMarkers(ticker); + + Simulation() { + Thread.currentThread().setUncaughtExceptionHandler((t, e) -> log.error("Uncaught throwable", e)); + } + + public ListenableScheduledFuture schedule(Callable command, long delay, TimeUnit unit) { + long scheduleTime = ticker.read(); + long delayNanos = unit.toNanos(delay); + + RuntimeException exceptionForStackTrace = new RuntimeException(); + + return listenableExecutor.schedule( + () -> { + try { + ticker.advanceTo(Duration.ofNanos(scheduleTime + delayNanos)); + return command.call(); + } catch (Throwable e) { + e.addSuppressed(exceptionForStackTrace); + throw e; + } + }, + delay, + unit); + } + + public ListenableScheduledFuture schedule(Runnable command, long delay, TimeUnit unit) { + return schedule( + () -> { + command.run(); + return null; + }, + delay, + unit); + } + + public Ticker clock() { + return ticker; // read only! + } + + public CodahaleClock codahaleClock() { + return codahaleClock; + } + + public SimulationMetrics metrics() { + return metrics; + } + + public EventMarkers events() { + return eventMarkers; + } + + public void runClockToInfinity() { + deterministicExecutor.tick(Duration.ofDays(1).toNanos(), TimeUnit.NANOSECONDS); + } + + private static final class TestCaffeineTicker implements Ticker { + private long nanos = 0; + + @Override + public long read() { + return nanos; + } + + void advanceTo(Duration duration) { + long newNanos = duration.toNanos(); + if (newNanos < nanos) { + log.error("TestTicker time may not go backwards current={} new={}", nanos, newNanos); + return; + } + + nanos = newNanos; + } + } +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/SimulationMetrics.java b/simulation/src/main/java/com/palantir/dialogue/core/SimulationMetrics.java new file mode 100644 index 000000000..b2f6fd8e9 --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/SimulationMetrics.java @@ -0,0 +1,234 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Meter; +import com.codahale.metrics.Metric; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import com.palantir.logsafe.Preconditions; +import com.palantir.logsafe.SafeArg; +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.knowm.xchart.BitmapEncoder; +import org.knowm.xchart.XYChart; +import org.knowm.xchart.XYChartBuilder; +import org.knowm.xchart.XYSeries; +import org.knowm.xchart.style.Styler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This is a combination metric registry, reporter, logger and renderer, all hooked up to + * {@link Simulation#clock()}. Capable of reporting PNGs (although this is slow). + */ +final class SimulationMetrics { + private static final Logger log = LoggerFactory.getLogger(SimulationMetrics.class); + + private final Simulation simulation; + private Map metrics = new HashMap<>(); + + // each of these is a named column + private final LoadingCache> measurements = + Caffeine.newBuilder().build(name -> new ArrayList<>(Collections.nCopies(numMeasurements(), 0d))); + + private static final String X_AXIS = "time_sec"; + + SimulationMetrics(Simulation simulation) { + this.simulation = simulation; + } + + private int numMeasurements() { + List xAxis = measurements.getIfPresent(X_AXIS); + return (xAxis != null) ? xAxis.size() : 0; + } + + public Meter meter(String name) { + if (!metrics.containsKey(name)) { + Meter freshMeter = new Meter(simulation.codahaleClock()); + metrics.put(name, freshMeter); + return freshMeter; + } else { + // have to support 'get existing' because multiple servers inside a composite might be named 'node1' + Metric metric = metrics.get(name); + Preconditions.checkState( + metric instanceof Meter, "Existing metric wasn't a meter", SafeArg.of("name", name)); + return (Meter) metric; + } + } + + public Counter counter(String name) { + if (!metrics.containsKey(name)) { + Counter fresh = new Counter(); + metrics.put(name, fresh); + return fresh; + } else { + Metric metric = metrics.get(name); + Preconditions.checkState( + metric instanceof Counter, "Existing metric wasn't a Counter", SafeArg.of("name", name)); + return (Counter) metric; + } + } + + // @CheckReturnValue + // public Runnable startReporting(Duration interval) { + // metrics = ImmutableMap.copyOf(metrics); // just to make sure nobody tries to create any more after we start! + // AtomicBoolean keepRunning = new AtomicBoolean(true); + // reportInfinitely(keepRunning, interval); + // return () -> keepRunning.set(false); + // } + // + // @SuppressWarnings("FutureReturnValueIgnored") + // private void reportInfinitely(AtomicBoolean keepRunning, Duration interval) { + // report(); + // + // if (keepRunning.get()) { + // simulation.schedule( + // () -> { + // reportInfinitely(keepRunning, interval); + // return null; + // }, + // interval.toNanos(), + // TimeUnit.NANOSECONDS); + // } + // } + + public void report() { + long nanos = simulation.clock().read(); + double seconds = TimeUnit.MILLISECONDS.convert(nanos, TimeUnit.NANOSECONDS) / 1000d; + + metrics.forEach((name, metric) -> { + if (metric instanceof Meter) { + measurements.get(name + ".1m").add(((Meter) metric).getOneMinuteRate()); + measurements.get(name + ".count").add((double) ((Meter) metric).getCount()); + } else if (metric instanceof Counter) { + measurements.get(name + ".count").add((double) ((Counter) metric).getCount()); + } else { + log.error("Unknown metric type {} {}", name, metric); + } + }); + + measurements.get(X_AXIS).add(seconds); + + // Set differentSizes = + // measurements.asMap().values().stream().map(List::size).collect(Collectors.toSet()); + // if (differentSizes.size() > 1) { + // String info = measurements.asMap().entrySet().stream() + // .map(e -> e.getKey() + ":" + e.getValue().size()) + // .collect(Collectors.joining(" ")); + // System.out.println(info); + // } + } + + public void dumpCsv(Path file) { + ConcurrentMap> map = measurements.asMap(); + List xAxis = map.get(X_AXIS); + List columns = ImmutableList.copyOf(Sets.difference(map.keySet(), ImmutableSet.of(X_AXIS))); + + try (BufferedWriter writer = Files.newBufferedWriter( + file, StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) { + + writer.write(X_AXIS); + for (String column : columns) { + writer.write(','); + writer.write(column); + } + writer.newLine(); + + for (int i = 0; i < xAxis.size(); i++) { + writer.write(Double.toString(xAxis.get(i))); + for (String column : columns) { + writer.write(','); + writer.write(Double.toString(map.get(column).get(i))); + } + writer.newLine(); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public XYChart chart(Pattern metricNameRegex) { + XYChart chart = + new XYChartBuilder().width(800).height(600).xAxisTitle(X_AXIS).build(); + + chart.getStyler().setLegendPosition(Styler.LegendPosition.InsideNW); + chart.getStyler().setDefaultSeriesRenderStyle(XYSeries.XYSeriesRenderStyle.Line); + chart.getStyler().setMarkerSize(3); + chart.getStyler().setYAxisLabelAlignment(Styler.TextAlignment.Right); + chart.getStyler().setPlotMargin(0); + chart.getStyler().setPlotContentSize(.95); + chart.getStyler().setToolTipsEnabled(true); + chart.getStyler().setToolTipsAlwaysVisible(true); + + Map> map = measurements.asMap(); + double[] xAxis = map.get(X_AXIS).stream().mapToDouble(d -> d).toArray(); + List columns = map.keySet().stream() + .filter(name -> !name.equals(X_AXIS)) + .filter(metricNameRegex.asPredicate()) + .sorted() + .collect(Collectors.toList()); + String[] nullToolTips = Collections.nCopies(xAxis.length, null).toArray(new String[] {}); + + for (String column : columns) { + double[] series = map.get(column).stream().mapToDouble(d -> d).toArray(); + Preconditions.checkState( + series.length == xAxis.length, + "Series must all be same length", + SafeArg.of("column", column), + SafeArg.of("xaxis", xAxis.length), + SafeArg.of("length", series.length)); + chart.addSeries(column, xAxis, series).setToolTips(nullToolTips); + } + + if (!simulation.events().getEvents().isEmpty()) { + double[] eventXs = simulation.events().getEvents().keySet().stream() + .mapToDouble(nanos -> TimeUnit.MILLISECONDS.convert(nanos, TimeUnit.NANOSECONDS) / 1000d) + .toArray(); + double[] eventYs = new double[eventXs.length]; + String[] strings = simulation.events().getEvents().values().stream().toArray(String[]::new); + XYSeries what = chart.addSeries(" ", eventXs, eventYs); + what.setToolTips(strings); + what.setXYSeriesRenderStyle(XYSeries.XYSeriesRenderStyle.Scatter); + } + + return chart; + } + + public static void png(String file, XYChart... charts) throws IOException { + int rows = charts.length; + int cols = 1; + BitmapEncoder.saveBitmap(ImmutableList.copyOf(charts), rows, cols, file, BitmapEncoder.BitmapFormat.PNG); + } +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/SimulationServer.java b/simulation/src/main/java/com/palantir/dialogue/core/SimulationServer.java new file mode 100644 index 000000000..da5661b7f --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/SimulationServer.java @@ -0,0 +1,278 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Meter; +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.MoreExecutors; +import com.palantir.dialogue.Channel; +import com.palantir.dialogue.Endpoint; +import com.palantir.dialogue.Request; +import com.palantir.dialogue.Response; +import com.palantir.logsafe.Preconditions; +import com.palantir.logsafe.exceptions.SafeRuntimeException; +import java.time.Duration; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Predicate; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A fake 'server' which just schedules responses on the {@link Simulation} executor. + * Responses are produced by the first {@link ServerHandler} that returns a future - these can be used to change + * behaviour at different times points (e.g. a fast handler until 3 seconds, then a slow handler after). + */ +final class SimulationServer implements Channel { + private static final Logger log = LoggerFactory.getLogger(SimulationServer.class); + + private final Simulation simulation; + private final String metricName; + private final Counter globalActiveRequests; + private final ImmutableList handlers; + private long cumulativeServerTimeNanos = 0; + + private SimulationServer(Builder builder) { + this.metricName = Preconditions.checkNotNull(builder.metricName, "metricName"); + this.simulation = Preconditions.checkNotNull(builder.simulation, "simulation"); + this.globalActiveRequests = simulation.metrics().counter(String.format("[%s] activeRequests", metricName)); + Preconditions.checkState(!builder.handlers.isEmpty(), "Handlers can't be empty"); + this.handlers = ImmutableList.copyOf(builder.handlers); + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public ListenableFuture execute(Endpoint endpoint, Request request) { + Meter perEndpointRequests = + simulation.metrics().meter(String.format("[%s] [%s] request", metricName, endpoint.endpointName())); + + globalActiveRequests.inc(); + perEndpointRequests.mark(); + simulation.metrics().report(); + + for (ServerHandler handler : handlers) { + long beforeNanos = simulation.clock().read(); + Optional> maybeResp = handler.maybeExecute(this, endpoint, request); + if (!maybeResp.isPresent()) { + continue; + } + + ListenableFuture resp = maybeResp.get(); + resp.addListener( + () -> { + globalActiveRequests.dec(); + cumulativeServerTimeNanos += simulation.clock().read() - beforeNanos; + }, + MoreExecutors.directExecutor()); + Futures.addCallback( + resp, + new FutureCallback() { + @Override + public void onSuccess(Response result) { + log.debug( + "time={} server={} status={} id={}", + Duration.ofNanos(simulation.clock().read()), + metricName, + result.code(), + request != null ? request.headerParams().get(Benchmark.REQUEST_ID_HEADER) : null); + } + + @Override + public void onFailure(Throwable _throwable) {} + }, + MoreExecutors.directExecutor()); + + return resp; + } + + log.error("No handler available for request {}", request); + globalActiveRequests.dec(); + return Futures.immediateFailedFuture(new SafeRuntimeException("No handler")); + } + + @Override + public String toString() { + return metricName; + } + + // note this is misleading for the black_hole case, because it only increases when a task _returns_ + public Duration getCumulativeServerTime() { + return Duration.ofNanos(cumulativeServerTimeNanos); + } + + public static class Builder { + private String metricName; + private Simulation simulation; + private ImmutableList handlers = ImmutableList.of(); + + Builder metricName(String value) { + metricName = value; + return this; + } + + Builder simulation(Simulation value) { + simulation = value; + return this; + } + + Builder handler(Function configureFunc) { + handlers = ImmutableList.builder() + .addAll(handlers) + .add(configureFunc.apply(new ServerHandler())) + .build(); + return this; + } + + Builder handler(Endpoint endpoint, Function configureFunc) { + return handler(h -> { + HandlerBuilder0 builder = h.endpoint(endpoint); + return configureFunc.apply(builder); + }); + } + + Builder until(Duration cutover) { + return until(cutover, null); + } + + Builder until(Duration cutover, String message) { + long cutoverNanos = cutover.toNanos(); + + for (ServerHandler handler : handlers) { + Predicate existingPredicate = handler.predicate; + boolean[] switched = {false}; + handler.predicate = endpoint -> { + // we just add in this sneaky little precondition to all the existing handlers! + if (simulation.clock().read() >= cutoverNanos) { + if (message != null && !switched[0]) { + simulation.events().event(message); + switched[0] = true; + } + return false; + } + + return existingPredicate.test(endpoint); + }; + } + + return this; + } + + SimulationServer build() { + return new SimulationServer(this); + } + } + + /** Declarative server handler, built using a staged-builder. */ + public static class ServerHandler implements HandlerBuilder0, HandlerBuilder1 { + + private Predicate predicate = endpoint -> true; + private Function responseFunction; + private ResponseTimeFunction responseTimeFunction; + + public Optional> maybeExecute( + SimulationServer server, Endpoint endpoint, Request _request) { + if (predicate != null && !predicate.test(endpoint)) { + return Optional.empty(); + } + + Duration responseTime = responseTimeFunction.getResponseTime(server); + return Optional.of(server.simulation.schedule( + () -> responseFunction.apply(server), responseTime.toNanos(), TimeUnit.NANOSECONDS)); + } + + @Override + public HandlerBuilder0 endpoint(Endpoint endpoint) { + predicate = e -> e.equals(endpoint); + return this; + } + + @Override + public HandlerBuilder1 response(Function func) { + this.responseFunction = func; + return this; + } + + @Override + public ServerHandler responseTime(ResponseTimeFunction func) { + this.responseTimeFunction = func; + return this; + } + } + + public interface HandlerBuilder0 { + HandlerBuilder0 endpoint(Endpoint endpoint); + + HandlerBuilder1 response(Function func); + + default HandlerBuilder1 response(Response resp) { + return response(unused -> resp); + } + + default HandlerBuilder1 response(int status) { + return response(SimulationUtils.response(status, "1.0.0")); + } + + default HandlerBuilder1 respond200UntilCapacity(int errorStatus, int capacity) { + return response(server -> { + if (server.globalActiveRequests.getCount() > capacity) { + return SimulationUtils.response(errorStatus, "1.0.0"); + } else { + return SimulationUtils.response(200, "1.0.0"); + } + }); + } + } + + public interface HandlerBuilder1 { + ServerHandler responseTime(ResponseTimeFunction func); + + /** BEWARE: servers don't actually behave like this. */ + default ServerHandler responseTime(Duration duration) { + return responseTime(server -> duration); + } + + /** + * This heuristic delivers the goal 'responseTime' only when the server is under zero load. At a certain + * number of concurrent requests (the 'capacity'), the response time will double. Above this, the server + * returns 5x response time to simulate overloading. + */ + default ServerHandler linearResponseTime(Duration bestCase, int capacity) { + return responseTime(server -> { + long expected = bestCase.toNanos(); + long inflight = server.globalActiveRequests.getCount(); + + if (inflight > capacity) { + return Duration.ofNanos(5 * expected); // above stated 'capacity', server dies a brutal death + } + + return Duration.ofNanos(expected + (expected * inflight) / capacity); + }); + } + } + + interface ResponseTimeFunction { + Duration getResponseTime(SimulationServer server); + } +} diff --git a/simulation/src/main/java/com/palantir/dialogue/core/SimulationUtils.java b/simulation/src/main/java/com/palantir/dialogue/core/SimulationUtils.java new file mode 100644 index 000000000..a3302f477 --- /dev/null +++ b/simulation/src/main/java/com/palantir/dialogue/core/SimulationUtils.java @@ -0,0 +1,87 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.palantir.dialogue.Endpoint; +import com.palantir.dialogue.HttpMethod; +import com.palantir.dialogue.Response; +import com.palantir.dialogue.UrlBuilder; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.util.List; +import java.util.Map; + +final class SimulationUtils { + + public static Response response(int status, String version) { + return new Response() { + @Override + public InputStream body() { + return new ByteArrayInputStream(new byte[0]); + } + + @Override + public int code() { + return status; + } + + @Override + public Map> headers() { + if (version == null) { + return ImmutableMap.of(); + } + return ImmutableMap.of("server", ImmutableList.of("foundry-catalog/" + version)); + } + }; + } + + public static Endpoint endpoint(String name) { + return new Endpoint() { + @Override + public void renderPath(Map _params, UrlBuilder _url) {} + + @Override + public HttpMethod httpMethod() { + return HttpMethod.GET; + } + + @Override + public String serviceName() { + return "service"; + } + + @Override + public String endpointName() { + return name; + } + + @Override + public String version() { + return "1.0.0"; + } + + @Override + public String toString() { + return endpointName(); + } + }; + } + + private SimulationUtils() {} +} diff --git a/simulation/src/test/java/com/palantir/dialogue/core/SimulationTest.java b/simulation/src/test/java/com/palantir/dialogue/core/SimulationTest.java new file mode 100644 index 000000000..cd4653114 --- /dev/null +++ b/simulation/src/test/java/com/palantir/dialogue/core/SimulationTest.java @@ -0,0 +1,461 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.google.common.base.Stopwatch; +import com.google.common.base.Suppliers; +import com.palantir.dialogue.Channel; +import com.palantir.dialogue.Endpoint; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.knowm.xchart.XYChart; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Simulates client heuristics defined in {@link Strategy} against {@link SimulationServer} nodes. These don't + * actually bind to ports, they just schedule responses to return at some point. All scheduling happens on the + * deterministic scheduler in {@link Simulation} (on the main thread), so hours of requests can be simulated instantly. + * + * These simulations only reveal characteristics and emergent behaviour of the clients - they can't be used to + * compare how efficient (in terms of CPU or allocations) clients are - a dedicated microbenchmarking harness should + * be used for this instead. + * + * We have the following goals. + *
    + *
  1. Minimize user-perceived failures + *
  2. Minimize user-perceived mean response time + *
  3. Minimize total server CPU time spent + *
+ * + * Heuristics should work sensibly for a variety of server response times (incl 1ms, 10ms, 100ms and 1s). + * We usually have O(10) upstream nodes. Live-reloading node list shouldn't go crazy. + * + * The following scenarios are important for clients to handle. + *
    + *
  1. Normal operation: some node node is maybe 10-20% slower (e.g. maybe it's further away) + *
  2. Fast failures (500/503/429) with revert: upgrading one node means everything gets insta 500'd (also 503 / + * 429) + *
  3. Slow failures (500/503/429) with revert: upgrading one node means all requests get slow and also return + * bad errors + *
  4. Drastic slowdown with revert: One node suddenly starts taking 10 seconds to return (but not throwing errors) + *
  5. All nodes return 500s briefly (ideally clients could queue up if they're willing to wait) + *
  6. Black hole: one node just starts accepting requests but never returning responses + *
+ */ +@RunWith(Parameterized.class) +public class SimulationTest { + private static final Logger log = LoggerFactory.getLogger(SimulationTest.class); + + @Parameterized.Parameters(name = "{0}") + public static Strategy[] data() { + return Strategy.values(); + } + + @Parameterized.Parameter + public Strategy strategy; + + @Rule + public final TestName testName = new TestName(); + + private final Simulation simulation = new Simulation(); + private Supplier> servers; + private Benchmark.BenchmarkResult result; + + @Test + public void simplest_possible_case() { + // real servers don't scale like this - see later tests + servers = servers( + SimulationServer.builder() + .metricName("fast") + .simulation(simulation) + .handler(h -> h.response(200).responseTime(Duration.ofMillis(600))) + .build(), + SimulationServer.builder() + .metricName("medium") + .simulation(simulation) + .handler(h -> h.response(200).responseTime(Duration.ofMillis(800))) + .build(), + SimulationServer.builder() + .metricName("slightly_slow") + .simulation(simulation) + .handler(h -> h.response(200).responseTime(Duration.ofMillis(1000))) + .build()); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .requestsPerSecond(50) + .sendUntil(Duration.ofSeconds(20)) + .channel(channel) + .simulation(simulation) + .run(); + } + + @Test + public void slowdown_and_error_thresholds() { + int errorThreshold = 40; + int slowdownThreshold = 30; + servers = servers( + SimulationServer.builder() + .metricName("fast") + .simulation(simulation) + .handler(h -> h.respond200UntilCapacity(500, errorThreshold) + .linearResponseTime(Duration.ofMillis(600), slowdownThreshold)) + .build(), + SimulationServer.builder() + .metricName("medium") + .simulation(simulation) + .handler(h -> h.respond200UntilCapacity(500, errorThreshold) + .linearResponseTime(Duration.ofMillis(800), slowdownThreshold)) + .build(), + SimulationServer.builder() + .metricName("slightly_slow") + .simulation(simulation) + .handler(h -> h.respond200UntilCapacity(500, errorThreshold) + .linearResponseTime(Duration.ofMillis(1000), slowdownThreshold)) + .build()); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .requestsPerSecond(50) + .sendUntil(Duration.ofSeconds(20)) + .channel(channel) + .simulation(simulation) + .run(); + } + + @Test + public void slow_503s_then_revert() { + int capacity = 60; + servers = servers( + SimulationServer.builder() + .metricName("fast") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .build(), + SimulationServer.builder() + .metricName("slow_failures_then_revert") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .until(Duration.ofSeconds(3), "slow 503s") + .handler(h -> h.response(503).linearResponseTime(Duration.ofSeconds(1), capacity)) + .until(Duration.ofSeconds(10), "revert") + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .build()); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .requestsPerSecond(200) + .sendUntil(Duration.ofSeconds(15)) // something weird happens at 1811... bug in DeterministicScheduler? + .channel(channel) + .simulation(simulation) + .run(); + } + + @Test + public void fast_500s_then_revert() { + int capacity = 60; + servers = servers( + SimulationServer.builder() + .metricName("fast") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .build(), + SimulationServer.builder() + .metricName("fast_500s_then_revert") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .until(Duration.ofSeconds(3), "fast 500s") + .handler(h -> h.response(500).linearResponseTime(Duration.ofMillis(10), capacity)) + .until(Duration.ofSeconds(10), "revert") + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .build()); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .requestsPerSecond(250) + .sendUntil(Duration.ofSeconds(15)) + .channel(channel) + .simulation(simulation) + .run(); + } + + @Test + public void drastic_slowdown() { + int capacity = 60; + servers = servers( + SimulationServer.builder() + .metricName("fast") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .build(), + SimulationServer.builder() + .metricName("fast_then_slow_then_fast") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .until(Duration.ofSeconds(3), "slow 200s") + .handler(h -> h.response(200).linearResponseTime(Duration.ofSeconds(10), capacity)) + .until(Duration.ofSeconds(10), "revert") + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(60), capacity)) + .build()); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .requestsPerSecond(200) + .sendUntil(Duration.ofSeconds(20)) + .channel(channel) + .simulation(simulation) + .run(); + } + + @Test + public void all_nodes_500() { + servers = servers( + SimulationServer.builder() + .metricName("node1") + .simulation(simulation) + .handler(h -> h.response(500).responseTime(Duration.ofMillis(600))) + .until(Duration.ofSeconds(10), "revert badness") + .handler(h -> h.response(200).responseTime(Duration.ofMillis(600))) + .build(), + SimulationServer.builder() + .metricName("node2") + .simulation(simulation) + .handler(h -> h.response(500).responseTime(Duration.ofMillis(600))) + .until(Duration.ofSeconds(10), "revert badness") + .handler(h -> h.response(200).responseTime(Duration.ofMillis(600))) + .build()); + + // TODO(dfox): seems like traceid=req-1-attempt-1 happens 31 times??? + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .requestsPerSecond(10) + .sendUntil(Duration.ofSeconds(20)) + .channel(channel) + .simulation(simulation) + .run(); + } + + @Test + public void black_hole() { + servers = servers( + SimulationServer.builder() + .metricName("node1") + .simulation(simulation) + .handler(h -> h.response(200).responseTime(Duration.ofMillis(600))) + .build(), + SimulationServer.builder() + .metricName("node2_black_hole") + .simulation(simulation) + .handler(h -> h.response(200).responseTime(Duration.ofMillis(600))) + .until(Duration.ofSeconds(3), "black hole") + .handler(h -> h.response(200).responseTime(Duration.ofDays(1))) + .build()); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .simulation(simulation) + .requestsPerSecond(20) + .sendUntil(Duration.ofSeconds(10)) + .abortAfter(Duration.ofSeconds(30)) // otherwise the test never terminates! + .channel(channel) + .run(); + } + + @Test + public void one_endpoint_dies_on_each_server() { + Endpoint endpoint1 = SimulationUtils.endpoint("e1"); + Endpoint endpoint2 = SimulationUtils.endpoint("e2"); + + servers = servers( + SimulationServer.builder() + .metricName("server_where_e1_breaks") + .simulation(simulation) + .handler(endpoint1, h -> h.response(200).responseTime(Duration.ofMillis(600))) + .handler(endpoint2, h -> h.response(200).responseTime(Duration.ofMillis(600))) + .until(Duration.ofSeconds(3), "e1 breaks") + .handler(endpoint1, h -> h.response(500).responseTime(Duration.ofMillis(600))) + .handler(endpoint2, h -> h.response(200).responseTime(Duration.ofMillis(600))) + .build(), + SimulationServer.builder() + .metricName("server_where_e2_breaks") + .simulation(simulation) + .handler(endpoint1, h -> h.response(200).responseTime(Duration.ofMillis(600))) + .handler(endpoint2, h -> h.response(200).responseTime(Duration.ofMillis(600))) + .until(Duration.ofSeconds(3), "e2 breaks") + .handler(endpoint1, h -> h.response(200).responseTime(Duration.ofMillis(600))) + .handler(endpoint2, h -> h.response(500).responseTime(Duration.ofMillis(600))) + .build()); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .simulation(simulation) + .requestsPerSecond(51) + .sendUntil(Duration.ofSeconds(10)) + .randomEndpoints(endpoint1, endpoint2) + .abortAfter(Duration.ofMinutes(1)) + .channel(channel) + .run(); + } + + @Test + public void live_reloading() { + int capacity = 60; + servers = liveReloadingServers( + beginAt( + Duration.ZERO, + SimulationServer.builder() + .metricName("always_on") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(600), capacity)) + .build()), + beginAt( + Duration.ZERO, + SimulationServer.builder() + .metricName("always_broken") + .simulation(simulation) + .handler(h -> h.response(500).linearResponseTime(Duration.ofMillis(600), capacity)) + .build()), + beginAt( + Duration.ofSeconds(5), + SimulationServer.builder() + .metricName("added_halfway") + .simulation(simulation) + .handler(h -> h.response(200).linearResponseTime(Duration.ofMillis(600), capacity)) + .build())); + + Channel channel = strategy.getChannel(simulation, servers); + result = Benchmark.builder() + .simulation(simulation) + .requestsPerSecond(40) + .sendUntil(Duration.ofSeconds(10)) + .channel(channel) + .run(); + } + + private Supplier> servers(SimulationServer... values) { + return Suppliers.memoize(() -> Arrays.asList(values)); + } + + /** Use the {@link #beginAt} method to simulate live-reloads. */ + private Supplier> liveReloadingServers( + Supplier>... serverSuppliers) { + return () -> Arrays.stream(serverSuppliers) + .map(Supplier::get) + .filter(Optional::isPresent) + .map(Optional::get) + .collect(Collectors.toList()); + } + + private Supplier> beginAt(Duration beginTime, SimulationServer server) { + boolean[] enabled = {false}; + return () -> { + if (simulation.clock().read() >= beginTime.toNanos()) { + if (!enabled[0]) { + enabled[0] = true; + simulation.events().event("new server: " + server); + } + return Optional.of(server); + } else { + return Optional.empty(); + } + }; + } + + @After + public void after() throws IOException { + Duration serverCpu = Duration.ofNanos(servers.get().stream() // live-reloading breaks this :( + .mapToLong(s -> s.getCumulativeServerTime().toNanos()) + .sum()); + long clientMeanNanos = (long) result.clientHistogram().getMean(); + double clientMeanMillis = TimeUnit.MICROSECONDS.convert(clientMeanNanos, TimeUnit.NANOSECONDS) / 1000d; + + // intentionally using tabs so that opening report.txt with 'cat' aligns columns nicely + String longSummary = String.format( + "success=%s%%\tclient_mean=%-15s\tserver_cpu=%-15s\treceived=%s/%s\tcodes=%s", + result.successPercentage(), + Duration.ofNanos(clientMeanNanos), + serverCpu, + result.numReceived(), + result.numSent(), + result.statusCodes()); + + Path txt = Paths.get("src/test/resources/" + testName.getMethodName() + ".txt"); + String pngPath = "src/test/resources/" + testName.getMethodName() + ".png"; + String onDisk = Files.exists(txt) ? new String(Files.readAllBytes(txt), StandardCharsets.UTF_8) : ""; + boolean txtChanged = !longSummary.equals(onDisk); + + if (txtChanged || !Files.exists(Paths.get(pngPath))) { + // only re-generate PNGs if the txt file changed (as they're slow af) + Stopwatch sw = Stopwatch.createStarted(); + Files.write(txt, longSummary.getBytes(StandardCharsets.UTF_8)); + + XYChart activeRequests = simulation.metrics().chart(Pattern.compile("active")); + activeRequests.setTitle(String.format( + "%s success=%.0f%% client_mean=%.1f ms server_cpu=%s", + strategy, result.successPercentage(), clientMeanMillis, serverCpu)); + XYChart serverRequestCount = simulation.metrics().chart(Pattern.compile("request.*count")); + XYChart clientStuff = simulation.metrics().chart(Pattern.compile("(refusals|starts).count")); + + SimulationMetrics.png(pngPath, activeRequests, serverRequestCount, clientStuff); + log.info("Generated {} ({} ms)", pngPath, sw.elapsed(TimeUnit.MILLISECONDS)); + } + } + + @AfterClass + public static void afterClass() throws IOException { + // squish all txt files together into one report to make it easier to compare during code review + try (Stream list = Files.list(Paths.get("src/test/resources"))) { + String report = list.filter( + p -> p.toString().endsWith(".txt") && !p.toString().endsWith("report.txt")) + .map(p -> { + try { + return String.format( + "%70s:\t%s%n", + p.getFileName().toString(), + new String(Files.readAllBytes(p), StandardCharsets.UTF_8)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }) + .sorted(Comparator.comparing(String::trim)) + .collect(Collectors.joining()); + Files.write(Paths.get("src/test/resources/report.txt"), report.getBytes(StandardCharsets.UTF_8)); + } + } +} diff --git a/simulation/src/test/java/com/palantir/dialogue/core/Strategy.java b/simulation/src/test/java/com/palantir/dialogue/core/Strategy.java new file mode 100644 index 000000000..c9022d8dc --- /dev/null +++ b/simulation/src/test/java/com/palantir/dialogue/core/Strategy.java @@ -0,0 +1,113 @@ +/* + * (c) Copyright 2020 Palantir Technologies Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.palantir.dialogue.core; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Meter; +import com.google.common.util.concurrent.ListenableFuture; +import com.palantir.dialogue.Channel; +import com.palantir.dialogue.Endpoint; +import com.palantir.dialogue.Request; +import com.palantir.dialogue.Response; +import com.palantir.tritium.metrics.registry.DefaultTaggedMetricRegistry; +import java.util.List; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings("ImmutableEnumChecker") +public enum Strategy { + CONCURRENCY_LIMITER(Strategy::concurrencyLimiter), + ROUND_ROBIN(Strategy::roundRobin); + + private static final Logger log = LoggerFactory.getLogger(Strategy.class); + private final BiFunction>, Channel> getChannel; + + Strategy(BiFunction>, Channel> getChannel) { + this.getChannel = getChannel; + } + + public Channel getChannel(Simulation simulation, Supplier> servers) { + return getChannel.apply(simulation, servers); + } + + private static Channel concurrencyLimiter(Simulation sim, Supplier> channelSupplier) { + return RefreshingChannelFactory.RefreshingChannel.create(channelSupplier, channels -> { + List limitedChannels1 = channels.stream() + .map(c1 -> new ConcurrencyLimitedChannel( + c1, () -> ConcurrencyLimitedChannel.createLimiter(sim.clock()))) + .collect(Collectors.toList()); + LimitedChannel limited1 = new RoundRobinChannel(limitedChannels1); + limited1 = instrumentClient(limited1, sim.metrics()); // just for debugging + Channel channel = new QueuedChannel(limited1, DispatcherMetrics.of(new DefaultTaggedMetricRegistry())); + return new RetryingChannel(channel); + }); + } + + private static Channel roundRobin(Simulation sim, Supplier> channelSupplier) { + return RefreshingChannelFactory.RefreshingChannel.create(channelSupplier, channels -> { + List limitedChannels = + channels.stream().map(Strategy::noOpLimitedChannel).collect(Collectors.toList()); + LimitedChannel limited = new RoundRobinChannel(limitedChannels); + limited = instrumentClient(limited, sim.metrics()); // will always be zero due to the noOpLimitedChannel + Channel channel = new QueuedChannel(limited, DispatcherMetrics.of(new DefaultTaggedMetricRegistry())); + return new RetryingChannel(channel); + }); + } + + private static LimitedChannel instrumentClient(LimitedChannel delegate, SimulationMetrics metrics) { + Meter starts = metrics.meter("test_client.starts"); + Counter metric = metrics.counter("test_client.refusals"); + return new LimitedChannel() { + + @Override + public Optional> maybeExecute(Endpoint endpoint, Request request) { + log.debug( + "starting request={}", + request.headerParams().get(Benchmark.REQUEST_ID_HEADER).get(0)); + starts.mark(); + Optional> response = delegate.maybeExecute(endpoint, request); + if (!response.isPresent()) { + metric.inc(); + } + return response; + } + + @Override + public String toString() { + return delegate.toString(); + } + }; + } + + private static LimitedChannel noOpLimitedChannel(Channel delegate) { + return new LimitedChannel() { + @Override + public Optional> maybeExecute(Endpoint endpoint, Request request) { + return Optional.of(delegate.execute(endpoint, request)); + } + + @Override + public String toString() { + return delegate.toString(); + } + }; + } +} diff --git a/simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..b5720a2dc Binary files /dev/null and b/simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..904a41041 --- /dev/null +++ b/simulation/src/test/resources/all_nodes_500[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=50.0% client_mean=PT0.6S server_cpu=PT2M received=200/200 codes={200=100, 500=100} \ No newline at end of file diff --git a/simulation/src/test/resources/all_nodes_500[ROUND_ROBIN].png b/simulation/src/test/resources/all_nodes_500[ROUND_ROBIN].png new file mode 100644 index 000000000..a84fdcee7 Binary files /dev/null and b/simulation/src/test/resources/all_nodes_500[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/all_nodes_500[ROUND_ROBIN].txt b/simulation/src/test/resources/all_nodes_500[ROUND_ROBIN].txt new file mode 100644 index 000000000..904a41041 --- /dev/null +++ b/simulation/src/test/resources/all_nodes_500[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=50.0% client_mean=PT0.6S server_cpu=PT2M received=200/200 codes={200=100, 500=100} \ No newline at end of file diff --git a/simulation/src/test/resources/black_hole[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/black_hole[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..d018d94a3 Binary files /dev/null and b/simulation/src/test/resources/black_hole[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/black_hole[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/black_hole[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..ab490d3f0 --- /dev/null +++ b/simulation/src/test/resources/black_hole[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=90.0% client_mean=PT0.6S server_cpu=PT1M48S received=180/200 codes={200=180} \ No newline at end of file diff --git a/simulation/src/test/resources/black_hole[ROUND_ROBIN].png b/simulation/src/test/resources/black_hole[ROUND_ROBIN].png new file mode 100644 index 000000000..528d5f5e2 Binary files /dev/null and b/simulation/src/test/resources/black_hole[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/black_hole[ROUND_ROBIN].txt b/simulation/src/test/resources/black_hole[ROUND_ROBIN].txt new file mode 100644 index 000000000..75f764339 --- /dev/null +++ b/simulation/src/test/resources/black_hole[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=65.0% client_mean=PT0.6S server_cpu=PT1M18S received=130/200 codes={200=130} \ No newline at end of file diff --git a/simulation/src/test/resources/drastic_slowdown[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/drastic_slowdown[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..3362c3c2d Binary files /dev/null and b/simulation/src/test/resources/drastic_slowdown[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/drastic_slowdown[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/drastic_slowdown[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..755cb4fa4 --- /dev/null +++ b/simulation/src/test/resources/drastic_slowdown[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=100.0% client_mean=PT0.131675583S server_cpu=PT8M46.702333319S received=4000/4000 codes={200=4000} \ No newline at end of file diff --git a/simulation/src/test/resources/drastic_slowdown[ROUND_ROBIN].png b/simulation/src/test/resources/drastic_slowdown[ROUND_ROBIN].png new file mode 100644 index 000000000..e170e6528 Binary files /dev/null and b/simulation/src/test/resources/drastic_slowdown[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/drastic_slowdown[ROUND_ROBIN].txt b/simulation/src/test/resources/drastic_slowdown[ROUND_ROBIN].txt new file mode 100644 index 000000000..5e045c66a --- /dev/null +++ b/simulation/src/test/resources/drastic_slowdown[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=100.0% client_mean=PT8.340639499S server_cpu=PT9H16M2.557999978S received=4000/4000 codes={200=4000} \ No newline at end of file diff --git a/simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..31067c379 Binary files /dev/null and b/simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..eebb1a6a7 --- /dev/null +++ b/simulation/src/test/resources/fast_500s_then_revert[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=76.7% client_mean=PT0.055281733S server_cpu=PT3M27.306499709S received=3750/3750 codes={200=2875, 500=875} \ No newline at end of file diff --git a/simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].png b/simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].png new file mode 100644 index 000000000..1b125d3b2 Binary files /dev/null and b/simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].txt b/simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].txt new file mode 100644 index 000000000..eebb1a6a7 --- /dev/null +++ b/simulation/src/test/resources/fast_500s_then_revert[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=76.7% client_mean=PT0.055281733S server_cpu=PT3M27.306499709S received=3750/3750 codes={200=2875, 500=875} \ No newline at end of file diff --git a/simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..697664670 Binary files /dev/null and b/simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..9bcad841c --- /dev/null +++ b/simulation/src/test/resources/live_reloading[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=58.3% client_mean=PT0.7228S server_cpu=PT4M49.12S received=400/400 codes={200=233, 500=167} \ No newline at end of file diff --git a/simulation/src/test/resources/live_reloading[ROUND_ROBIN].png b/simulation/src/test/resources/live_reloading[ROUND_ROBIN].png new file mode 100644 index 000000000..fd4ddccba Binary files /dev/null and b/simulation/src/test/resources/live_reloading[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/live_reloading[ROUND_ROBIN].txt b/simulation/src/test/resources/live_reloading[ROUND_ROBIN].txt new file mode 100644 index 000000000..9bcad841c --- /dev/null +++ b/simulation/src/test/resources/live_reloading[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=58.3% client_mean=PT0.7228S server_cpu=PT4M49.12S received=400/400 codes={200=233, 500=167} \ No newline at end of file diff --git a/simulation/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..fee8cba1e Binary files /dev/null and b/simulation/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..eb682e056 --- /dev/null +++ b/simulation/src/test/resources/one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=67.6% client_mean=PT0.6S server_cpu=PT5M6S received=510/510 codes={200=345, 500=165} \ No newline at end of file diff --git a/simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].png b/simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].png new file mode 100644 index 000000000..25a9a3bfe Binary files /dev/null and b/simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].txt b/simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].txt new file mode 100644 index 000000000..eb682e056 --- /dev/null +++ b/simulation/src/test/resources/one_endpoint_dies_on_each_server[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=67.6% client_mean=PT0.6S server_cpu=PT5M6S received=510/510 codes={200=345, 500=165} \ No newline at end of file diff --git a/simulation/src/test/resources/report.txt b/simulation/src/test/resources/report.txt new file mode 100644 index 000000000..206151c99 --- /dev/null +++ b/simulation/src/test/resources/report.txt @@ -0,0 +1,18 @@ + all_nodes_500[CONCURRENCY_LIMITER].txt: success=50.0% client_mean=PT0.6S server_cpu=PT2M received=200/200 codes={200=100, 500=100} + all_nodes_500[ROUND_ROBIN].txt: success=50.0% client_mean=PT0.6S server_cpu=PT2M received=200/200 codes={200=100, 500=100} + black_hole[CONCURRENCY_LIMITER].txt: success=90.0% client_mean=PT0.6S server_cpu=PT1M48S received=180/200 codes={200=180} + black_hole[ROUND_ROBIN].txt: success=65.0% client_mean=PT0.6S server_cpu=PT1M18S received=130/200 codes={200=130} + drastic_slowdown[CONCURRENCY_LIMITER].txt: success=100.0% client_mean=PT0.131675583S server_cpu=PT8M46.702333319S received=4000/4000 codes={200=4000} + drastic_slowdown[ROUND_ROBIN].txt: success=100.0% client_mean=PT8.340639499S server_cpu=PT9H16M2.557999978S received=4000/4000 codes={200=4000} + fast_500s_then_revert[CONCURRENCY_LIMITER].txt: success=76.7% client_mean=PT0.055281733S server_cpu=PT3M27.306499709S received=3750/3750 codes={200=2875, 500=875} + fast_500s_then_revert[ROUND_ROBIN].txt: success=76.7% client_mean=PT0.055281733S server_cpu=PT3M27.306499709S received=3750/3750 codes={200=2875, 500=875} + live_reloading[CONCURRENCY_LIMITER].txt: success=58.3% client_mean=PT0.7228S server_cpu=PT4M49.12S received=400/400 codes={200=233, 500=167} + live_reloading[ROUND_ROBIN].txt: success=58.3% client_mean=PT0.7228S server_cpu=PT4M49.12S received=400/400 codes={200=233, 500=167} + one_endpoint_dies_on_each_server[CONCURRENCY_LIMITER].txt: success=67.6% client_mean=PT0.6S server_cpu=PT5M6S received=510/510 codes={200=345, 500=165} + one_endpoint_dies_on_each_server[ROUND_ROBIN].txt: success=67.6% client_mean=PT0.6S server_cpu=PT5M6S received=510/510 codes={200=345, 500=165} + simplest_possible_case[CONCURRENCY_LIMITER].txt: success=100.0% client_mean=PT0.7998S server_cpu=PT13M19.8S received=1000/1000 codes={200=1000} + simplest_possible_case[ROUND_ROBIN].txt: success=100.0% client_mean=PT0.7998S server_cpu=PT13M19.8S received=1000/1000 codes={200=1000} + slow_503s_then_revert[CONCURRENCY_LIMITER].txt: success=95.7% client_mean=PT0.125944777S server_cpu=PT6M17.834333281S received=3000/3000 codes={200=2870, 503=130} + slow_503s_then_revert[ROUND_ROBIN].txt: success=76.7% client_mean=PT1.185206666S server_cpu=PT59M15.619999978S received=3000/3000 codes={200=2300, 503=700} + slowdown_and_error_thresholds[CONCURRENCY_LIMITER].txt: success=100.0% client_mean=PT1.977419999S server_cpu=PT31M8.473333135S received=1000/1000 codes={200=1000} + slowdown_and_error_thresholds[ROUND_ROBIN].txt: success=77.1% client_mean=PT2.231446666S server_cpu=PT37M11.446666464S received=1000/1000 codes={200=771, 500=229} diff --git a/simulation/src/test/resources/simplest_possible_case[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/simplest_possible_case[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..fb8c23c8f Binary files /dev/null and b/simulation/src/test/resources/simplest_possible_case[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/simplest_possible_case[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/simplest_possible_case[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..1ec3c6a47 --- /dev/null +++ b/simulation/src/test/resources/simplest_possible_case[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=100.0% client_mean=PT0.7998S server_cpu=PT13M19.8S received=1000/1000 codes={200=1000} \ No newline at end of file diff --git a/simulation/src/test/resources/simplest_possible_case[ROUND_ROBIN].png b/simulation/src/test/resources/simplest_possible_case[ROUND_ROBIN].png new file mode 100644 index 000000000..65f5371c4 Binary files /dev/null and b/simulation/src/test/resources/simplest_possible_case[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/simplest_possible_case[ROUND_ROBIN].txt b/simulation/src/test/resources/simplest_possible_case[ROUND_ROBIN].txt new file mode 100644 index 000000000..1ec3c6a47 --- /dev/null +++ b/simulation/src/test/resources/simplest_possible_case[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=100.0% client_mean=PT0.7998S server_cpu=PT13M19.8S received=1000/1000 codes={200=1000} \ No newline at end of file diff --git a/simulation/src/test/resources/slow_503s_then_revert[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/slow_503s_then_revert[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..351819d43 Binary files /dev/null and b/simulation/src/test/resources/slow_503s_then_revert[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/slow_503s_then_revert[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/slow_503s_then_revert[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..eaddd179d --- /dev/null +++ b/simulation/src/test/resources/slow_503s_then_revert[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=95.7% client_mean=PT0.125944777S server_cpu=PT6M17.834333281S received=3000/3000 codes={200=2870, 503=130} \ No newline at end of file diff --git a/simulation/src/test/resources/slow_503s_then_revert[ROUND_ROBIN].png b/simulation/src/test/resources/slow_503s_then_revert[ROUND_ROBIN].png new file mode 100644 index 000000000..d38937d46 Binary files /dev/null and b/simulation/src/test/resources/slow_503s_then_revert[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/slow_503s_then_revert[ROUND_ROBIN].txt b/simulation/src/test/resources/slow_503s_then_revert[ROUND_ROBIN].txt new file mode 100644 index 000000000..6b5f5f3c3 --- /dev/null +++ b/simulation/src/test/resources/slow_503s_then_revert[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=76.7% client_mean=PT1.185206666S server_cpu=PT59M15.619999978S received=3000/3000 codes={200=2300, 503=700} \ No newline at end of file diff --git a/simulation/src/test/resources/slowdown_and_error_thresholds[CONCURRENCY_LIMITER].png b/simulation/src/test/resources/slowdown_and_error_thresholds[CONCURRENCY_LIMITER].png new file mode 100644 index 000000000..e3f1cf37c Binary files /dev/null and b/simulation/src/test/resources/slowdown_and_error_thresholds[CONCURRENCY_LIMITER].png differ diff --git a/simulation/src/test/resources/slowdown_and_error_thresholds[CONCURRENCY_LIMITER].txt b/simulation/src/test/resources/slowdown_and_error_thresholds[CONCURRENCY_LIMITER].txt new file mode 100644 index 000000000..234cc2c2f --- /dev/null +++ b/simulation/src/test/resources/slowdown_and_error_thresholds[CONCURRENCY_LIMITER].txt @@ -0,0 +1 @@ +success=100.0% client_mean=PT1.977419999S server_cpu=PT31M8.473333135S received=1000/1000 codes={200=1000} \ No newline at end of file diff --git a/simulation/src/test/resources/slowdown_and_error_thresholds[ROUND_ROBIN].png b/simulation/src/test/resources/slowdown_and_error_thresholds[ROUND_ROBIN].png new file mode 100644 index 000000000..59f17c6c6 Binary files /dev/null and b/simulation/src/test/resources/slowdown_and_error_thresholds[ROUND_ROBIN].png differ diff --git a/simulation/src/test/resources/slowdown_and_error_thresholds[ROUND_ROBIN].txt b/simulation/src/test/resources/slowdown_and_error_thresholds[ROUND_ROBIN].txt new file mode 100644 index 000000000..f009c1276 --- /dev/null +++ b/simulation/src/test/resources/slowdown_and_error_thresholds[ROUND_ROBIN].txt @@ -0,0 +1 @@ +success=77.1% client_mean=PT2.231446666S server_cpu=PT37M11.446666464S received=1000/1000 codes={200=771, 500=229} \ No newline at end of file diff --git a/versions.lock b/versions.lock index a50365aad..2dc0fb638 100644 --- a/versions.lock +++ b/versions.lock @@ -8,7 +8,7 @@ com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.10.2 (2 constraints: 2529 com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.10.2 (1 constraints: 7b1c92a4) com.fasterxml.jackson.module:jackson-module-afterburner:2.10.2 (2 constraints: 2529c2cb) com.github.ben-manes.caffeine:caffeine:2.8.1 (2 constraints: c017484f) -com.google.code.findbugs:jsr305:3.0.2 (5 constraints: de3b219d) +com.google.code.findbugs:jsr305:3.0.2 (6 constraints: 944b12c7) com.google.errorprone:error_prone_annotations:2.3.4 (5 constraints: 4a4818d6) com.google.guava:failureaccess:1.0.1 (1 constraints: 140ae1b4) com.google.guava:guava:28.1-jre (7 constraints: 7e7647a3) @@ -41,14 +41,28 @@ net.bytebuddy:byte-buddy-agent:1.10.5 (1 constraints: 410b37de) org.assertj:assertj-core:3.15.0 (3 constraints: 4f25f54b) org.checkerframework:checker-qual:3.1.0 (2 constraints: 161ab343) org.codehaus.mojo:animal-sniffer-annotations:1.18 (1 constraints: ee09d9aa) -org.hamcrest:hamcrest:2.2 (1 constraints: 720b95d5) -org.hamcrest:hamcrest-core:2.2 (2 constraints: 730adbbf) +org.hamcrest:hamcrest:2.2 (2 constraints: 8612aa2d) +org.hamcrest:hamcrest-core:2.2 (3 constraints: 2f17d637) org.immutables:value:2.8.3 (1 constraints: 0f051036) org.mockito:mockito-core:3.2.4 (1 constraints: 0b050436) org.objenesis:objenesis:2.6 (1 constraints: b40a14bd) -org.slf4j:slf4j-api:1.7.30 (5 constraints: bb463cfe) +org.slf4j:slf4j-api:1.7.30 (6 constraints: ba50ca8e) [Test dependencies] +com.google.code.findbugs:annotations:3.0.1 (1 constraints: 9e0aafc3) com.palantir.safe-logging:preconditions-assertj:1.13.0 (1 constraints: 3705323b) +commons-logging:commons-logging:1.2 (2 constraints: 8215ead1) +de.erichseifert.vectorgraphics2d:VectorGraphics2D:0.13 (1 constraints: 8c0a80bb) +de.rototor.pdfbox:graphics2d:0.25 (1 constraints: 8f0a84bb) +net.jcip:jcip-annotations:1.0 (1 constraints: 560ff165) +org.apache-extras.beanshell:bsh:2.0b6 (1 constraints: ac07626b) org.apache.commons:commons-lang3:3.9 (1 constraints: b004292c) +org.apache.pdfbox:fontbox:2.0.17 (1 constraints: 180b71d8) +org.apache.pdfbox:pdfbox:2.0.17 (1 constraints: b40c5915) org.assertj:assertj-guava:3.3.0 (1 constraints: 08050336) +org.hamcrest:hamcrest-library:2.1 (1 constraints: 1507415c) +org.jmock:jmock:2.12.0 (1 constraints: 3705353b) +org.jmock:jmock-testjar:2.12.0 (1 constraints: a507a272) +org.knowm.xchart:xchart:3.6.1 (1 constraints: 0c050d36) +org.ow2.asm:asm:7.1 (1 constraints: 1a07505c) +org.slf4j:slf4j-simple:1.7.30 (1 constraints: 3d05453b) diff --git a/versions.props b/versions.props index 9dbaf613d..338c82913 100644 --- a/versions.props +++ b/versions.props @@ -23,6 +23,8 @@ org.hamcrest:hamcrest-core = 2.2 org.mockito:mockito-core = 3.2.4 javax.ws.rs:javax.ws.rs-api = 2.0 org.apache.commons:commons-lang3 = 3.9 +org.jmock:jmock = 2.12.0 +org.knowm.xchart:xchart = 3.6.1 # dependency-upgrader:OFF # Match conjure-java-runtime okhttp version