From 4e6f8ec924c1e5819af7c6eb6a2cee96685f8fe4 Mon Sep 17 00:00:00 2001 From: Gary Luo Date: Mon, 22 Jan 2018 02:05:55 -0600 Subject: [PATCH] Fix availability incompatibility issue with druid partial data and partion composite table --- CHANGELOG.md | 4 +++ .../table/availability/Availability.java | 2 +- .../availability/PartitionAvailability.java | 15 +++++++-- .../PartitionAvailabilitySpec.groovy | 31 +++++++++++++++++++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 876743e51e..66dde68a71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -798,6 +798,10 @@ Removals: ### Fixed: +- [Fix druid partial data and partition table incompatibility](https://github.com/yahoo/fili/pull/615) + * Datasource names returned by partition table now contains only datasources that are actually used in the query + * Fix the problem where uncovered intervals is given by druid for partition table that fili filtered out + - [Fix the generic example for loading multiple tables](https://github.com/yahoo/fili/pull/309) * Loading multiple tables caused it to hang and eventually time out. * Also fixed issue causing all tables to show the same set of dimensions. diff --git a/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/Availability.java b/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/Availability.java index d5068237f3..788e9e74dc 100644 --- a/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/Availability.java +++ b/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/Availability.java @@ -34,7 +34,7 @@ public interface Availability { */ @Deprecated default Set getDataSourceNames(PhysicalDataSourceConstraint constraint) { - return getDataSourceNames(); + return getDataSourceNames((DataSourceConstraint) constraint); } /** diff --git a/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/PartitionAvailability.java b/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/PartitionAvailability.java index f3b61a3d98..dc132c4201 100644 --- a/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/PartitionAvailability.java +++ b/fili-core/src/main/java/com/yahoo/bard/webservice/table/availability/PartitionAvailability.java @@ -2,12 +2,16 @@ // Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms. package com.yahoo.bard.webservice.table.availability; +import com.yahoo.bard.webservice.data.config.names.DataSourceName; +import com.yahoo.bard.webservice.table.resolver.DataSourceConstraint; import com.yahoo.bard.webservice.table.resolver.DataSourceFilter; import com.yahoo.bard.webservice.table.resolver.PhysicalDataSourceConstraint; import com.yahoo.bard.webservice.util.SimplifiedIntervalList; import java.util.Map; import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; import javax.validation.constraints.NotNull; @@ -70,7 +74,7 @@ public PartitionAvailability(@NotNull Map availa * * @return A stream of availabilities which participate given the constraint */ - private Stream filteredAvailabilities(PhysicalDataSourceConstraint constraint) { + private Stream filteredAvailabilities(DataSourceConstraint constraint) { return availabilityFilters.entrySet().stream() .filter(entry -> entry.getValue().apply(constraint)) .map(Map.Entry::getKey); @@ -83,7 +87,7 @@ private Stream filteredAvailabilities(PhysicalDataSourceConstraint * * @return The intervals which are available for the given constraint */ - private SimplifiedIntervalList mergeAvailabilities(PhysicalDataSourceConstraint constraint) { + private SimplifiedIntervalList mergeAvailabilities(DataSourceConstraint constraint) { return filteredAvailabilities(constraint) .map(availability -> availability.getAvailableIntervals(constraint)) .reduce(SimplifiedIntervalList::intersect).orElse(new SimplifiedIntervalList()); @@ -94,6 +98,13 @@ public SimplifiedIntervalList getAvailableIntervals(PhysicalDataSourceConstraint return mergeAvailabilities(constraint); } + @Override + public Set getDataSourceNames(DataSourceConstraint constraint) { + return filteredAvailabilities(constraint) + .map(availability -> availability.getDataSourceNames(constraint)) + .flatMap(Set::stream).collect(Collectors.toSet()); + } + @Override public String toString() { return availabilityFilters.toString(); diff --git a/fili-core/src/test/groovy/com/yahoo/bard/webservice/table/availability/PartitionAvailabilitySpec.groovy b/fili-core/src/test/groovy/com/yahoo/bard/webservice/table/availability/PartitionAvailabilitySpec.groovy index 58feda31f8..ff8573398f 100644 --- a/fili-core/src/test/groovy/com/yahoo/bard/webservice/table/availability/PartitionAvailabilitySpec.groovy +++ b/fili-core/src/test/groovy/com/yahoo/bard/webservice/table/availability/PartitionAvailabilitySpec.groovy @@ -2,10 +2,12 @@ // Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms. package com.yahoo.bard.webservice.table.availability +import com.yahoo.bard.webservice.data.config.names.DataSourceName import com.yahoo.bard.webservice.data.config.names.TableName import com.yahoo.bard.webservice.data.time.ZonedTimeGrain import com.yahoo.bard.webservice.data.time.ZonedTimeGrainSpec import com.yahoo.bard.webservice.table.Column +import com.yahoo.bard.webservice.table.resolver.DataSourceConstraint import com.yahoo.bard.webservice.table.resolver.DataSourceFilter import com.yahoo.bard.webservice.table.resolver.PhysicalDataSourceConstraint import com.yahoo.bard.webservice.util.SimplifiedIntervalList @@ -17,6 +19,7 @@ import org.joda.time.Interval import spock.lang.Shared import spock.lang.Specification import spock.lang.Unroll + /** * Test for partition availability behavior. */ @@ -71,6 +74,34 @@ class PartitionAvailabilitySpec extends Specification{ [SOURCE1, SOURCE2] | [SOURCE1] | [SOURCE1, SOURCE2] } + def "getDataSourceNames(constraint) returns only datasource names that are actually needed"() { + given: + DataSourceName name1 = DataSourceName.of("datasource1") + DataSourceName name2 = DataSourceName.of("datasource2") + + availability1 = Mock(Availability) + availability2 = Mock(Availability) + + availability1.getDataSourceNames() >> ([name1] as Set) + availability2.getDataSourceNames() >> ([name2] as Set) + + availability1.getDataSourceNames(_ as DataSourceConstraint) >> ([name1] as Set) + availability2.getDataSourceNames(_ as DataSourceConstraint) >> ([name2] as Set) + + DataSourceFilter partition1 = Mock(DataSourceFilter) + DataSourceFilter partition2 = Mock(DataSourceFilter) + + partition1.apply(_ as DataSourceConstraint) >> true + partition2.apply(_ as DataSourceConstraint) >> false + + Map partitionMap = [(availability1): partition1, (availability2) : partition2] + + partitionAvailability = new PartitionAvailability(partitionMap) + + expect: + partitionAvailability.getDataSourceNames(Mock(DataSourceConstraint)) == [name1] as Set + } + @Unroll def "getAllAvailableIntervals returns merged intervals grouped by columns when two availabilities have #caseDescription (metadata map merge)"() { given: