Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add more specific metrics and logging for DNS lookup failures #2179

Merged
merged 14 commits into from
Feb 28, 2024
2 changes: 2 additions & 0 deletions dialogue-clients/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ Dialogue DNS metrics.
- `success`: DNS resolution succeeded using `InetAddress.getAllByName`.
- `fallback`: DNS resolution using the primary mechanism failed, however addresses were available in the fallback cache.
- `failure`: No addresses could be resolved for the given hostname.
- `client.dns.failure` (meter): DNS resolver query failures.
- `error-type`: Describes the error type returned by getaddrinfo() when lookup fails.

### client.uri
Dialogue URI parsing metrics.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,26 @@
import com.google.common.collect.ImmutableSet;
import com.palantir.dialogue.core.DialogueDnsResolver;
import com.palantir.logsafe.Preconditions;
import com.palantir.logsafe.Safe;
import com.palantir.logsafe.SafeArg;
import com.palantir.logsafe.UnsafeArg;
import com.palantir.logsafe.logger.SafeLogger;
import com.palantir.logsafe.logger.SafeLoggerFactory;
import com.palantir.tritium.metrics.registry.TaggedMetricRegistry;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Objects;
import java.util.Optional;

enum DefaultDialogueDnsResolver implements DialogueDnsResolver {
INSTANCE;

final class DefaultDialogueDnsResolver implements DialogueDnsResolver {
private static final SafeLogger log = SafeLoggerFactory.get(DefaultDialogueDnsResolver.class);

private final ClientDnsMetrics metrics;

DefaultDialogueDnsResolver(TaggedMetricRegistry registry) {
this.metrics = ClientDnsMetrics.of(registry);
}

@Override
public ImmutableSet<InetAddress> resolve(String hostname) {
Preconditions.checkNotNull(hostname, "hostname is required");
Expand All @@ -41,8 +50,77 @@ public ImmutableSet<InetAddress> resolve(String hostname) {
}
return ImmutableSet.copyOf(results);
} catch (UnknownHostException e) {
log.warn("Unknown host '{}'", UnsafeArg.of("hostname", hostname), e);
GaiError gaiError = extractGaiError(e, hostname);
log.warn(
"Unknown host '{}'",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Unknown host '{}'",
"Unknown host '{}'. {}: {}",

SafeArg.of("gaiErrorType", gaiError.name()),
SafeArg.of("gaiErrorMessage", gaiError.errorMessage()),
UnsafeArg.of("hostname", hostname),
e);
metrics.failure(gaiError.name()).mark();
return ImmutableSet.of();
}
}

// these strings were taken from glibc-2.39, but likely have not changed in quite a while
// strings may be different on BSD systems like macos
// TODO(dns): update this list to try to match against known strings on other platforms
private enum GaiError {
EAI_ADDRFAMILY("Address family for hostname not supported"),
EAI_AGAIN("Temporary failure in name resolution"),
EAI_BADFLAGS("Bad value for ai_flags"),
EAI_FAIL("Non-recoverable failure in name resolution"),
EAI_FAMILY("ai_family not supported"),
EAI_MEMORY("Memory allocation failure"),
EAI_NODATA("No address associated with hostname"),
EAI_NONAME("Name or service not known"),
EAI_SERVICE("Servname not supported for ai_socktype"),
EAI_SOCKTYPE("ai_socktype not supported"),
EAI_SYSTEM("System error"),
EAI_INPROGRESS("Processing request in progress"),
EAI_CANCELED("Request canceled"),
EAI_NOTCANCELED("Request not canceled"),
EAI_ALLDONE("All requests done"),
EAI_INTR("Interrupted by a signal"),
EAI_IDN_ENCODE("Parameter string not correctly encoded"),
EAI_OVERFLOW("Result too large for supplied buffer"),
CACHED(),
UNKNOWN();

private final Optional<String> errorMessage;

GaiError() {
this.errorMessage = Optional.empty();
}

GaiError(String errorMessage) {
this.errorMessage = Optional.of(errorMessage);
}

@Safe
String errorMessage() {
return errorMessage.orElseGet(this::name);
}
}

private static GaiError extractGaiError(UnknownHostException exception, String requestedHostname) {
if (exception.getMessage() == null) {
return GaiError.UNKNOWN;
}

try {
if (Objects.equals(requestedHostname, exception.getMessage())) {
return GaiError.CACHED;
}

for (GaiError error : GaiError.values()) {
if (error.errorMessage.isPresent() && exception.getMessage().contains(error.errorMessage.get())) {
return error;
}
}
return GaiError.UNKNOWN;
} catch (Exception e) {
return GaiError.UNKNOWN;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ default ConjureRuntime runtime() {
@Value.Default
default DialogueDnsResolver dnsResolver() {
return new CachingFallbackDnsResolver(
new ProtocolVersionFilteringDialogueDnsResolver(DefaultDialogueDnsResolver.INSTANCE),
new ProtocolVersionFilteringDialogueDnsResolver(new DefaultDialogueDnsResolver(taggedMetrics())),
taggedMetrics());
}

Expand Down
6 changes: 6 additions & 0 deletions dialogue-clients/src/main/metrics/dialogue-core-metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ namespaces:
- value: failure
docs: No addresses could be resolved for the given hostname.
docs: DNS resolver query metrics, on a per-hostname basis.
failure:
type: meter
tags:
- name: error-type
docs: Describes the error type returned by getaddrinfo() when lookup fails.
docs: DNS resolver query failures.
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.assertj.core.api.Assumptions.assumeThat;

import com.google.common.collect.ImmutableSet;
import com.palantir.dialogue.core.DialogueDnsResolver;
import com.palantir.logsafe.exceptions.SafeNullPointerException;
import com.palantir.tritium.metrics.registry.DefaultTaggedMetricRegistry;
import com.palantir.tritium.metrics.registry.TaggedMetricRegistry;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.UUID;
import org.junit.jupiter.api.Test;

class DefaultDialogueDnsResolverTest {
Expand Down Expand Up @@ -66,7 +71,43 @@ void malformedIpv6_noBrackets() {
assertThat(resolve("::z")).isEmpty();
}

@Test
void unknown_host() {
assumeThat(System.getProperty("os.name").toLowerCase().startsWith("linux"))
.describedAs("GAI Error Strings are only defined for Linux environments")
.isTrue();

TaggedMetricRegistry registry = new DefaultTaggedMetricRegistry();
DialogueDnsResolver resolver = new DefaultDialogueDnsResolver(registry);

String badHost = UUID.randomUUID() + ".palantir.com";
ImmutableSet<InetAddress> result = resolver.resolve(badHost);

assertThat(result).isEmpty();
ClientDnsMetrics metrics = ClientDnsMetrics.of(registry);
assertThat(metrics.failure("EAI_NONAME").getCount()).isEqualTo(1);
}

@Test
void unknown_host_from_cache() {
TaggedMetricRegistry registry = new DefaultTaggedMetricRegistry();
DialogueDnsResolver resolver = new DefaultDialogueDnsResolver(registry);
ClientDnsMetrics metrics = ClientDnsMetrics.of(registry);

String badHost = UUID.randomUUID() + ".palantir.com";
ImmutableSet<InetAddress> result = resolver.resolve(badHost);

assertThat(result).isEmpty();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add assertThat(metrics.lookupError("EAI_NONAME").getCount()).isEqualTo(1); in addition to this assert

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

assertThat(metrics.failure("EAI_NONAME").getCount()).isEqualTo(1);

// should resolve from cache
ImmutableSet<InetAddress> result2 = resolver.resolve(badHost);
assertThat(result2).isEmpty();
assertThat(metrics.failure("CACHED").getCount()).isEqualTo(1);
}

private static ImmutableSet<InetAddress> resolve(String hostname) {
return DefaultDialogueDnsResolver.INSTANCE.resolve(hostname);
DialogueDnsResolver resolver = new DefaultDialogueDnsResolver(new DefaultTaggedMetricRegistry());
return resolver.resolve(hostname);
}
}
Loading