Skip to content

Commit

Permalink
Implement QueryPlanning and DataSource Constraint in to resolvers and…
Browse files Browse the repository at this point in the history
… matchers
  • Loading branch information
michael-mclawhorn committed Mar 6, 2017
1 parent 21ac864 commit 472d905
Show file tree
Hide file tree
Showing 23 changed files with 375 additions and 328 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ Current
-------

### Added:
- [QueryPlanningConstraint and DataSourceConstraint](https://github.com/yahoo/fili/pull/169)
* Added `QueryPlanningConstraint` to replace current interface of Matchers and Resolvers arguments during query planning
* Added `DataSourceConstraint` to allow implementation of `PartitionedFactTable`'s availability in the near future


- [dateTime based sort feature for the final ResultSet added](https://github.com/yahoo/fili/pull/178)
Expand All @@ -25,6 +28,9 @@ Current
- [Support timeouts for lucene search provider](https://github.com/yahoo/fili/pull/183)

### Changed:
- [QueryPlanningConstraint and DataSourceConstraint](https://github.com/yahoo/fili/pull/169)
* `QueryPlanningConstraint` replaces current interface of Matchers and Resolvers `DataApiRequest` and `TemplateDruidQuery` arguments during query planning
* Modified `findMissingTimeGrainIntervals` method in `PartialDataHandler` to take a set of columns instead of `DataApiRequest` and `DruidAggregationQuery`

-- [Request IDS now supports underscores.](https://github.com/yahoo/fili/pull/176)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,9 @@ public static PreResponse buildErrorPreResponse(Throwable throwable) {
}

return new PreResponse(
new ResultSet(new ResultSetSchema(
AllGranularity.INSTANCE,
Collections.emptySet()),
Collections.emptyList()),
new ResultSet(
new ResultSetSchema(AllGranularity.INSTANCE, Collections.emptySet()), Collections.emptyList()
),
responseContext
);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.data;

Expand Down Expand Up @@ -29,6 +29,7 @@
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.table.TableGroup;
import com.yahoo.bard.webservice.table.TableIdentifier;
import com.yahoo.bard.webservice.table.resolver.QueryPlanningConstraint;
import com.yahoo.bard.webservice.table.resolver.NoMatchFoundException;
import com.yahoo.bard.webservice.table.resolver.PhysicalTableResolver;
import com.yahoo.bard.webservice.web.DataApiRequest;
Expand Down Expand Up @@ -120,7 +121,10 @@ public DruidAggregationQuery<?> buildQuery(
TableGroup group = logicalTable.getTableGroup();

// Resolve the table from the the group, the combined dimensions in request, and template time grain
PhysicalTable table = resolver.resolve(group.getPhysicalTables(), request, template);
PhysicalTable table = resolver.resolve(
group.getPhysicalTables(),
new QueryPlanningConstraint(request, template)
);

return druidTopNMetric != null ?
buildTopNQuery(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.data;

Expand Down Expand Up @@ -57,8 +57,7 @@ public SimplifiedIntervalList findMissingRequestTimeGrainIntervals(
throw new IllegalArgumentException(message);
}
return findMissingTimeGrainIntervals(
apiRequest,
query,
TableUtils.getColumnNames(apiRequest, query),
physicalTables,
new SimplifiedIntervalList(apiRequest.getIntervals()),
apiRequest.getGranularity()
Expand All @@ -79,23 +78,21 @@ public SimplifiedIntervalList findMissingRequestTimeGrainIntervals(
* present for a given combination of request metrics and dimensions (pulled from the API request and generated
* druid query) at the specified granularity.
*
* @param apiRequest api request made by the end user
* @param query used to fetch data from druid
* @param columnNames all the column names the request depends on
* @param physicalTables the tables whose column availabilities are checked
* @param requestedIntervals The intervals that may not be fully satisfied
* @param granularity The granularity at which to find missing intervals
*
* @return subintervals of the requested intervals with incomplete data
*/
public SimplifiedIntervalList findMissingTimeGrainIntervals(
DataApiRequest apiRequest,
DruidAggregationQuery<?> query,
Set<String> columnNames,
Set<PhysicalTable> physicalTables,
@NotNull SimplifiedIntervalList requestedIntervals,
Granularity granularity
) {
SimplifiedIntervalList availableIntervals = physicalTables.stream()
.map(table -> getAvailability(table, TableUtils.getColumnNames(apiRequest, query)))
.map(table -> getAvailability(table, columnNames))
.flatMap(SimplifiedIntervalList::stream)
.collect(SimplifiedIntervalList.getCollector());

Expand All @@ -104,6 +101,7 @@ public SimplifiedIntervalList findMissingTimeGrainIntervals(
requestedIntervals,
granularity
);

if (granularity instanceof AllGranularity && !missingIntervals.isEmpty()) {
missingIntervals = requestedIntervals;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ public ResultSet map(ResultSet resultSet) {

@Override
protected Result map(Result result, ResultSetSchema schema) {
// map for rows is not
throw new UnsupportedOperationException("This code should never be reached.");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public class TableDataSource extends DataSource {
public TableDataSource(ConcretePhysicalTable physicalTable) {
super(DefaultDataSourceType.TABLE, Collections.singleton(physicalTable));

this.name = physicalTable.getFactTableName();
this.name = physicalTable.getAvailability().getDataSourceNames().stream().findFirst().get().asName();
}

public String getName() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.table.resolver;

import static com.yahoo.bard.webservice.web.ErrorMessageFormat.NO_TABLE_FOR_NON_AGGREGATABLE;

import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.data.dimension.Dimension;
import com.yahoo.bard.webservice.data.metric.TemplateDruidQuery;
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.util.StreamUtils;
import com.yahoo.bard.webservice.util.TableUtils;
import com.yahoo.bard.webservice.web.DataApiRequest;
import com.yahoo.bard.webservice.web.ErrorMessageFormat;

import org.slf4j.Logger;
Expand All @@ -29,23 +26,20 @@ public class AggregatableDimensionsMatcher implements PhysicalTableMatcher {

public static final ErrorMessageFormat MESSAGE_FORMAT = NO_TABLE_FOR_NON_AGGREGATABLE;

private final DataApiRequest request;
private final TemplateDruidQuery query;
private final QueryPlanningConstraint requestConstraints;

/**
* Constructor saves metrics, dimensions, coarsest time grain, and logical table name (for logging).
*
* @param request The request whose dimensions are being matched on
* @param query The query whose columns are being matched
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*/
public AggregatableDimensionsMatcher(DataApiRequest request, TemplateDruidQuery query) {
this.request = request;
this.query = query;
public AggregatableDimensionsMatcher(QueryPlanningConstraint requestConstraints) {
this.requestConstraints = requestConstraints;
}

@Override
public boolean test(PhysicalTable table) {
Set<String> columnNames = TableUtils.getColumnNames(request, query.getInnermostQuery());
Set<String> columnNames = requestConstraints.getAllColumnNames();

// If table contains non-agg dimensions, query must contain all these non-agg dimensions to use this table.
return table.getDimensions().stream()
Expand All @@ -56,12 +50,12 @@ public boolean test(PhysicalTable table) {

@Override
public NoMatchFoundException noneFoundException() {
Set<String> aggDimensions = request.getDimensions().stream()
Set<String> aggDimensions = requestConstraints.getRequestDimensions().stream()
.filter(Dimension::isAggregatable)
.map(Dimension::getApiName)
.collect(Collectors.toSet());

Set<String> nonAggDimensions = request.getDimensions().stream()
Set<String> nonAggDimensions = requestConstraints.getRequestDimensions().stream()
.filter(StreamUtils.not(Dimension::isAggregatable))
.map(Dimension::getApiName)
.collect(Collectors.toSet());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.table.resolver;

import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.application.MetricRegistryFactory;
import com.yahoo.bard.webservice.data.dimension.Dimension;
import com.yahoo.bard.webservice.data.metric.LogicalMetric;
import com.yahoo.bard.webservice.data.metric.TemplateDruidQuery;
import com.yahoo.bard.webservice.druid.model.query.Granularity;
import com.yahoo.bard.webservice.util.TableUtils;
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.web.DataApiRequest;
import com.yahoo.bard.webservice.web.ErrorMessageFormat;

Expand All @@ -23,7 +20,6 @@
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.BinaryOperator;
import java.util.stream.Collectors;

/**
* Abstract parent to with business rule agnostic implementations of core methods.
Expand All @@ -45,43 +41,36 @@ public BasePhysicalTableResolver() {
/**
* Create a list of matchers based on a request and query.
*
* @param apiRequest The ApiRequest for the query
* @param query a partial query representation
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*
* @return a list of matchers to be applied, in order
*/
public abstract List<PhysicalTableMatcher> getMatchers(DataApiRequest apiRequest, TemplateDruidQuery query);
public abstract List<PhysicalTableMatcher> getMatchers(QueryPlanningConstraint requestConstraints);

/**
* Create a binary operator which returns the 'better' of two physical table.
*
* @param apiRequest The ApiRequest for the query
* @param query a partial query representation
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*
* @return a list of matchers to be applied, in order
*/
public abstract BinaryOperator<PhysicalTable> getBetterTableOperator(
DataApiRequest apiRequest,
TemplateDruidQuery query
);
public abstract BinaryOperator<PhysicalTable> getBetterTableOperator(QueryPlanningConstraint requestConstraints);

/**
* Filter to a set of tables matching the rules of this resolver.
*
* @param candidateTables The physical tables being filtered
* @param apiRequest The request being filtered to
* @param query a partial query representation
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*
* @return a set of physical tables which all match the criteria of a request and partial query
*
* @throws NoMatchFoundException if any of the filters reduce the filter set to empty
*/
public Set<PhysicalTable> filter(
Collection<PhysicalTable> candidateTables,
DataApiRequest apiRequest,
TemplateDruidQuery query
QueryPlanningConstraint requestConstraints
) throws NoMatchFoundException {
return filter(candidateTables, getMatchers(apiRequest, query));
return filter(candidateTables, getMatchers(requestConstraints));
}

/**
Expand All @@ -108,15 +97,12 @@ public Set<PhysicalTable> filter(
@Override
public PhysicalTable resolve(
Collection<PhysicalTable> candidateTables,
DataApiRequest apiRequest,
TemplateDruidQuery query
QueryPlanningConstraint requestConstraints
) throws NoMatchFoundException {

// Minimum grain at which the request can be aggregated from
Granularity minimumTableTimeGrain = resolveAcceptingGrain.apply(apiRequest, query);
TemplateDruidQuery innerQuery = (TemplateDruidQuery) query.getInnermostQuery();
Set<String> columnNames = TableUtils.getDimensions(apiRequest, innerQuery)
.map(Dimension::getApiName)
.collect(Collectors.toSet());
Granularity minimumTableTimeGrain = requestConstraints.getMinimumTimeGran();
Set<String> columnNames = requestConstraints.getAllColumnNames();
LOG.trace(
"Resolving Table using TimeGrain: {}, dimension API names: {} and TableGroup: {}",
minimumTableTimeGrain,
Expand All @@ -125,41 +111,34 @@ public PhysicalTable resolve(
);

try {
Set<PhysicalTable> physicalTables = filter(candidateTables, apiRequest, query);
Set<PhysicalTable> physicalTables = filter(candidateTables, requestConstraints);

BinaryOperator<PhysicalTable> betterTable = getBetterTableOperator(apiRequest, query);
BinaryOperator<PhysicalTable> betterTable = getBetterTableOperator(requestConstraints);
PhysicalTable bestTable = physicalTables.stream().reduce(betterTable).get();

REGISTRY.meter("request.physical.table." + bestTable.getName() + "." + bestTable.getTimeGrain()).mark();
LOG.trace("Found best Table: {}", bestTable);
return bestTable;
} catch (NoMatchFoundException me) {
// Blow up if we couldn't match a table, log and return if we can
logMatchException(apiRequest, minimumTableTimeGrain, innerQuery);
logMatchException(requestConstraints, minimumTableTimeGrain);
throw me;
}
}

/**
* Log out inability to find a matching table.
*
* @param apiRequest Request for which we're trying to find a table
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
* @param minimumTableTimeGrain Minimum grain that we needed to meet
* @param innerQuery Innermost query for the query we were trying to match
*/
public void logMatchException(
DataApiRequest apiRequest,
Granularity minimumTableTimeGrain,
TemplateDruidQuery innerQuery
QueryPlanningConstraint requestConstraints,
Granularity minimumTableTimeGrain
) {
// Get the dimensions and metrics as lists of names
Set<String> requestDimensionNames = TableUtils.getDimensions(apiRequest, innerQuery)
.map(Dimension::getApiName)
.collect(Collectors.toSet());

Set<String> requestMetricNames = apiRequest.getLogicalMetrics().stream()
.map(LogicalMetric::getName)
.collect(Collectors.toCollection(LinkedHashSet::new));
Set<String> requestDimensionNames = requestConstraints.getAllDimensionNames();
Set<String> requestMetricNames = requestConstraints.getLogicalMetricNames();

String msg = ErrorMessageFormat.NO_PHYSICAL_TABLE_MATCHED.logFormat(
requestDimensionNames,
Expand Down
Loading

0 comments on commit 472d905

Please sign in to comment.