diff --git a/changelog/@unreleased/pr-2179.v2.yml b/changelog/@unreleased/pr-2179.v2.yml new file mode 100644 index 000000000..697c4b87e --- /dev/null +++ b/changelog/@unreleased/pr-2179.v2.yml @@ -0,0 +1,5 @@ +type: improvement +improvement: + description: try to log error messages when getaddrinfo fails + links: + - https://github.com/palantir/dialogue/pull/2179 diff --git a/dialogue-clients/metrics.md b/dialogue-clients/metrics.md index 09fe0c72e..80353d894 100644 --- a/dialogue-clients/metrics.md +++ b/dialogue-clients/metrics.md @@ -68,6 +68,8 @@ Dialogue DNS metrics. - `success`: DNS resolution succeeded using `InetAddress.getAllByName`. - `fallback`: DNS resolution using the primary mechanism failed, however addresses were available in the fallback cache. - `failure`: No addresses could be resolved for the given hostname. +- `client.dns.failure` (meter): DNS resolver query failures. + - `error-type`: Describes the error type returned by getaddrinfo() when lookup fails. ### client.uri Dialogue URI parsing metrics. diff --git a/dialogue-clients/src/main/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolver.java b/dialogue-clients/src/main/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolver.java index d4f4f173d..98528e439 100644 --- a/dialogue-clients/src/main/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolver.java +++ b/dialogue-clients/src/main/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolver.java @@ -19,17 +19,26 @@ import com.google.common.collect.ImmutableSet; import com.palantir.dialogue.core.DialogueDnsResolver; import com.palantir.logsafe.Preconditions; +import com.palantir.logsafe.Safe; +import com.palantir.logsafe.SafeArg; import com.palantir.logsafe.UnsafeArg; import com.palantir.logsafe.logger.SafeLogger; import com.palantir.logsafe.logger.SafeLoggerFactory; +import com.palantir.tritium.metrics.registry.TaggedMetricRegistry; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.Objects; +import java.util.Optional; -enum DefaultDialogueDnsResolver implements DialogueDnsResolver { - INSTANCE; - +final class DefaultDialogueDnsResolver implements DialogueDnsResolver { private static final SafeLogger log = SafeLoggerFactory.get(DefaultDialogueDnsResolver.class); + private final ClientDnsMetrics metrics; + + DefaultDialogueDnsResolver(TaggedMetricRegistry registry) { + this.metrics = ClientDnsMetrics.of(registry); + } + @Override public ImmutableSet resolve(String hostname) { Preconditions.checkNotNull(hostname, "hostname is required"); @@ -41,8 +50,77 @@ public ImmutableSet resolve(String hostname) { } return ImmutableSet.copyOf(results); } catch (UnknownHostException e) { - log.warn("Unknown host '{}'", UnsafeArg.of("hostname", hostname), e); + GaiError gaiError = extractGaiError(e, hostname); + log.warn( + "Unknown host '{}'. {}: {}", + SafeArg.of("gaiErrorType", gaiError.name()), + SafeArg.of("gaiErrorMessage", gaiError.errorMessage()), + UnsafeArg.of("hostname", hostname), + e); + metrics.failure(gaiError.name()).mark(); return ImmutableSet.of(); } } + + // these strings were taken from glibc-2.39, but likely have not changed in quite a while + // strings may be different on BSD systems like macos + // TODO(dns): update this list to try to match against known strings on other platforms + private enum GaiError { + EAI_ADDRFAMILY("Address family for hostname not supported"), + EAI_AGAIN("Temporary failure in name resolution"), + EAI_BADFLAGS("Bad value for ai_flags"), + EAI_FAIL("Non-recoverable failure in name resolution"), + EAI_FAMILY("ai_family not supported"), + EAI_MEMORY("Memory allocation failure"), + EAI_NODATA("No address associated with hostname"), + EAI_NONAME("Name or service not known"), + EAI_SERVICE("Servname not supported for ai_socktype"), + EAI_SOCKTYPE("ai_socktype not supported"), + EAI_SYSTEM("System error"), + EAI_INPROGRESS("Processing request in progress"), + EAI_CANCELED("Request canceled"), + EAI_NOTCANCELED("Request not canceled"), + EAI_ALLDONE("All requests done"), + EAI_INTR("Interrupted by a signal"), + EAI_IDN_ENCODE("Parameter string not correctly encoded"), + EAI_OVERFLOW("Result too large for supplied buffer"), + CACHED(), + UNKNOWN(); + + private final Optional errorMessage; + + GaiError() { + this.errorMessage = Optional.empty(); + } + + GaiError(String errorMessage) { + this.errorMessage = Optional.of(errorMessage); + } + + @Safe + String errorMessage() { + return errorMessage.orElseGet(this::name); + } + } + + private static GaiError extractGaiError(UnknownHostException exception, String requestedHostname) { + if (exception.getMessage() == null) { + return GaiError.UNKNOWN; + } + + try { + if (Objects.equals(requestedHostname, exception.getMessage())) { + return GaiError.CACHED; + } + + for (GaiError error : GaiError.values()) { + if (error.errorMessage.isPresent() && exception.getMessage().contains(error.errorMessage.get())) { + return error; + } + } + return GaiError.UNKNOWN; + } catch (Exception e) { + return GaiError.UNKNOWN; + } + } } diff --git a/dialogue-clients/src/main/java/com/palantir/dialogue/clients/ReloadingClientFactory.java b/dialogue-clients/src/main/java/com/palantir/dialogue/clients/ReloadingClientFactory.java index 103b98190..22ad614c4 100644 --- a/dialogue-clients/src/main/java/com/palantir/dialogue/clients/ReloadingClientFactory.java +++ b/dialogue-clients/src/main/java/com/palantir/dialogue/clients/ReloadingClientFactory.java @@ -161,7 +161,7 @@ default ConjureRuntime runtime() { @Value.Default default DialogueDnsResolver dnsResolver() { return new CachingFallbackDnsResolver( - new ProtocolVersionFilteringDialogueDnsResolver(DefaultDialogueDnsResolver.INSTANCE), + new ProtocolVersionFilteringDialogueDnsResolver(new DefaultDialogueDnsResolver(taggedMetrics())), taggedMetrics()); } diff --git a/dialogue-clients/src/main/metrics/dialogue-core-metrics.yml b/dialogue-clients/src/main/metrics/dialogue-core-metrics.yml index af8a4c9f0..2e614a8e6 100644 --- a/dialogue-clients/src/main/metrics/dialogue-core-metrics.yml +++ b/dialogue-clients/src/main/metrics/dialogue-core-metrics.yml @@ -37,3 +37,9 @@ namespaces: - value: failure docs: No addresses could be resolved for the given hostname. docs: DNS resolver query metrics, on a per-hostname basis. + failure: + type: meter + tags: + - name: error-type + docs: Describes the error type returned by getaddrinfo() when lookup fails. + docs: DNS resolver query failures. diff --git a/dialogue-clients/src/test/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolverTest.java b/dialogue-clients/src/test/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolverTest.java index f7db06a3f..b739726ac 100644 --- a/dialogue-clients/src/test/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolverTest.java +++ b/dialogue-clients/src/test/java/com/palantir/dialogue/clients/DefaultDialogueDnsResolverTest.java @@ -18,11 +18,16 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import com.google.common.collect.ImmutableSet; +import com.palantir.dialogue.core.DialogueDnsResolver; import com.palantir.logsafe.exceptions.SafeNullPointerException; +import com.palantir.tritium.metrics.registry.DefaultTaggedMetricRegistry; +import com.palantir.tritium.metrics.registry.TaggedMetricRegistry; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.UUID; import org.junit.jupiter.api.Test; class DefaultDialogueDnsResolverTest { @@ -66,7 +71,43 @@ void malformedIpv6_noBrackets() { assertThat(resolve("::z")).isEmpty(); } + @Test + void unknown_host() { + assumeThat(System.getProperty("os.name").toLowerCase().startsWith("linux")) + .describedAs("GAI Error Strings are only defined for Linux environments") + .isTrue(); + + TaggedMetricRegistry registry = new DefaultTaggedMetricRegistry(); + DialogueDnsResolver resolver = new DefaultDialogueDnsResolver(registry); + + String badHost = UUID.randomUUID() + ".palantir.com"; + ImmutableSet result = resolver.resolve(badHost); + + assertThat(result).isEmpty(); + ClientDnsMetrics metrics = ClientDnsMetrics.of(registry); + assertThat(metrics.failure("EAI_NONAME").getCount()).isEqualTo(1); + } + + @Test + void unknown_host_from_cache() { + TaggedMetricRegistry registry = new DefaultTaggedMetricRegistry(); + DialogueDnsResolver resolver = new DefaultDialogueDnsResolver(registry); + ClientDnsMetrics metrics = ClientDnsMetrics.of(registry); + + String badHost = UUID.randomUUID() + ".palantir.com"; + ImmutableSet result = resolver.resolve(badHost); + + assertThat(result).isEmpty(); + assertThat(metrics.failure("EAI_NONAME").getCount()).isEqualTo(1); + + // should resolve from cache + ImmutableSet result2 = resolver.resolve(badHost); + assertThat(result2).isEmpty(); + assertThat(metrics.failure("CACHED").getCount()).isEqualTo(1); + } + private static ImmutableSet resolve(String hostname) { - return DefaultDialogueDnsResolver.INSTANCE.resolve(hostname); + DialogueDnsResolver resolver = new DefaultDialogueDnsResolver(new DefaultTaggedMetricRegistry()); + return resolver.resolve(hostname); } }