Skip to content

Commit

Permalink
Implement QueryPlanning and DataSource Constraint in to resolvers and…
Browse files Browse the repository at this point in the history
… matchers
  • Loading branch information
garyluoex committed Feb 23, 2017
1 parent e44a25f commit 1252271
Show file tree
Hide file tree
Showing 21 changed files with 405 additions and 360 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,18 @@ Current
-------

### Added:
- [QueryPlanningConstraint and DataSourceConstraint](https://github.com/yahoo/fili/pull/169)
* Added `QueryPlanningConstraint` to replace current interface of Matchers and Resolvers arguments during query planning
* Added DataSourceConstraint to allow implementation of PartitionedFactTable`'s availability in the near future

- [Detect unset userPrincipal in Preface log block](https://github.com/yahoo/fili/pull/154)
* Logs a warning if no userPrincipal is set on the request (ie. we don't know who the user is), and sets the
`user` field in the `Preface` log block to `NO_USER_PRINCIPAL`.

### Changed:
- [QueryPlanningConstraint and DataSourceConstraint](https://github.com/yahoo/fili/pull/169)
* `QueryPlanningConstraint` replaces current interface of Matchers and Resolvers `DataApiRequest` and `TemplateDruidQuery` arguments during query planning
* Modified `findMissingTimeGrainIntervals` method in `PartialDataHandler` to take a set of columns instead of `DataApiRequest` and `DruidAggregationQuery`

- [Add dimension fields to fullView table format](https://github.com/yahoo/fili/pull/155)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.data;

Expand Down Expand Up @@ -27,6 +27,7 @@
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.table.TableGroup;
import com.yahoo.bard.webservice.table.TableIdentifier;
import com.yahoo.bard.webservice.table.resolver.QueryPlanningConstraint;
import com.yahoo.bard.webservice.table.resolver.NoMatchFoundException;
import com.yahoo.bard.webservice.table.resolver.PhysicalTableResolver;
import com.yahoo.bard.webservice.web.DataApiRequest;
Expand Down Expand Up @@ -115,7 +116,10 @@ public DruidAggregationQuery<?> buildQuery(
TableGroup group = logicalTable.getTableGroup();

// Resolve the table from the the group, the combined dimensions in request, and template time grain
PhysicalTable table = resolver.resolve(group.getPhysicalTables(), request, template);
PhysicalTable table = resolver.resolve(
group.getPhysicalTables(),
new QueryPlanningConstraint(request, template)
);

return druidTopNMetric != null ?
buildTopNQuery(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.data;

Expand Down Expand Up @@ -35,9 +35,9 @@ public class PartialDataHandler {
* Find the request time grain intervals for which partial data is present for a given combination of request
* metrics and dimensions (pulled from the API request and generated druid query).
*
* @param apiRequest api request made by the end user
* @param query used to fetch data from druid
* @param physicalTables the tables whose column availabilities are checked
* @param apiRequest api request made by the end user
* @param query used to fetch data from druid
* @param physicalTables the tables whose column availabilities are checked
*
* @return list of simplified intervals with incomplete data
*
Expand All @@ -57,8 +57,7 @@ public SimplifiedIntervalList findMissingRequestTimeGrainIntervals(
throw new IllegalArgumentException(message);
}
return findMissingTimeGrainIntervals(
apiRequest,
query,
TableUtils.getColumnNames(apiRequest, query),
physicalTables,
new SimplifiedIntervalList(apiRequest.getIntervals()),
apiRequest.getGranularity()
Expand All @@ -79,23 +78,21 @@ public SimplifiedIntervalList findMissingRequestTimeGrainIntervals(
* present for a given combination of request metrics and dimensions (pulled from the API request and generated
* druid query) at the specified granularity.
*
* @param apiRequest api request made by the end user
* @param query used to fetch data from druid
* @param physicalTables the tables whose column availabilities are checked
* @param requestedIntervals The intervals that may not be fully satisfied
* @param granularity The granularity at which to find missing intervals
* @param columnNames all the column names the request depends on
* @param physicalTables the tables whose column availabilities are checked
* @param requestedIntervals The intervals that may not be fully satisfied
* @param granularity the granularity at which to find missing intervals
*
* @return subintervals of the requested intervals with incomplete data
*/
public SimplifiedIntervalList findMissingTimeGrainIntervals(
DataApiRequest apiRequest,
DruidAggregationQuery<?> query,
Set<String> columnNames,
Set<PhysicalTable> physicalTables,
@NotNull SimplifiedIntervalList requestedIntervals,
Granularity granularity
) {
SimplifiedIntervalList availableIntervals = physicalTables.stream()
.map(table -> getAvailability(table, TableUtils.getColumnNames(apiRequest, query)))
.map(table -> getAvailability(table, columnNames))
.flatMap(SimplifiedIntervalList::stream)
.collect(SimplifiedIntervalList.getCollector());

Expand All @@ -104,6 +101,7 @@ public SimplifiedIntervalList findMissingTimeGrainIntervals(
requestedIntervals,
granularity
);

if (granularity instanceof AllGranularity && !missingIntervals.isEmpty()) {
missingIntervals = requestedIntervals;
}
Expand All @@ -115,8 +113,8 @@ public SimplifiedIntervalList findMissingTimeGrainIntervals(
* Given a table and a list of column names, get the intervals for those columns from the physical table then
* merge into a single availability list.
*
* @param physicalTable The fact source for the columns
* @param columnNames The names of the columns whose availability is being checked
* @param physicalTable the fact source for the columns
* @param columnNames the names of the columns whose availability is being checked
*
* @return the simplified available intervals
*/
Expand All @@ -130,10 +128,10 @@ public SimplifiedIntervalList getAvailability(PhysicalTable physicalTable, Set<S
* Take a list of column names, get the intervals for those columns from the physical table then merge into a
* single availability list by intersecting subintervals.
*
* @param columnNames The names of the columns in the physical table
* @param physicalTable The physical table the columns appear in
* @param columnNames the names of the columns in the physical table
* @param physicalTable the physical table the columns appear in
*
* @return The set of all intervals fully satisfied on the request columns for the physical table
* @return the set of all intervals fully satisfied on the request columns for the physical table
*/
public SimplifiedIntervalList getIntersectSubintervalsForColumns(
Collection<String> columnNames,
Expand All @@ -150,10 +148,10 @@ public SimplifiedIntervalList getIntersectSubintervalsForColumns(
* Take a list of column names, get the intervals for those columns from the physical table then merge into a
* single availability list by unioning subintervals.
*
* @param columnNames The names of the columns in the physical table
* @param physicalTable The physical table the columns appear in
* @param columnNames the names of the columns in the physical table
* @param physicalTable the physical table the columns appear in
*
* @return The set of all intervals partially satisfied on the request columns for the physical table
* @return the set of all intervals partially satisfied on the request columns for the physical table
*/
public SimplifiedIntervalList getUnionSubintervalsForColumns(
Collection<String> columnNames,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

import com.yahoo.bard.webservice.data.time.ZonedTimeGrain;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.Map;
import java.util.Set;
Expand All @@ -19,9 +16,7 @@
*/
public class PhysicalTableSchema extends BaseSchema implements Schema {

private static final Logger LOG = LoggerFactory.getLogger(PhysicalTableSchema.class);

ZonedTimeGrain timeGrain;
private final ZonedTimeGrain timeGrain;
private final Map<String, String> logicalToPhysicalColumnNames;
private final Map<String, Set<String>> physicalToLogicalColumnNames;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.table.resolver;

import static com.yahoo.bard.webservice.web.ErrorMessageFormat.NO_TABLE_FOR_NON_AGGREGATABLE;

import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.data.dimension.Dimension;
import com.yahoo.bard.webservice.data.metric.TemplateDruidQuery;
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.util.StreamUtils;
import com.yahoo.bard.webservice.util.TableUtils;
import com.yahoo.bard.webservice.web.DataApiRequest;
import com.yahoo.bard.webservice.web.ErrorMessageFormat;

import org.slf4j.Logger;
Expand All @@ -29,23 +26,20 @@ public class AggregatableDimensionsMatcher implements PhysicalTableMatcher {

public static final ErrorMessageFormat MESSAGE_FORMAT = NO_TABLE_FOR_NON_AGGREGATABLE;

private final DataApiRequest request;
private final TemplateDruidQuery query;
private final QueryPlanningConstraint requestConstraints;

/**
* Constructor saves metrics, dimensions, coarsest time grain, and logical table name (for logging).
*
* @param request The request whose dimensions are being matched on
* @param query The query whose columns are being matched
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*/
public AggregatableDimensionsMatcher(DataApiRequest request, TemplateDruidQuery query) {
this.request = request;
this.query = query;
public AggregatableDimensionsMatcher(QueryPlanningConstraint requestConstraints) {
this.requestConstraints = requestConstraints;
}

@Override
public boolean test(PhysicalTable table) {
Set<String> columnNames = TableUtils.getColumnNames(request, query.getInnermostQuery());
Set<String> columnNames = requestConstraints.getAllColumnNames();

// If table contains non-agg dimensions, query must contain all these non-agg dimensions to use this table.
return table.getDimensions().stream()
Expand All @@ -56,12 +50,12 @@ public boolean test(PhysicalTable table) {

@Override
public NoMatchFoundException noneFoundException() {
Set<String> aggDimensions = request.getDimensions().stream()
Set<String> aggDimensions = requestConstraints.getRequestDimensions().stream()
.filter(Dimension::isAggregatable)
.map(Dimension::getApiName)
.collect(Collectors.toSet());

Set<String> nonAggDimensions = request.getDimensions().stream()
Set<String> nonAggDimensions = requestConstraints.getRequestDimensions().stream()
.filter(StreamUtils.not(Dimension::isAggregatable))
.map(Dimension::getApiName)
.collect(Collectors.toSet());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
// Copyright 2016 Yahoo Inc.
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.table.resolver;

import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.application.MetricRegistryFactory;
import com.yahoo.bard.webservice.data.dimension.Dimension;
import com.yahoo.bard.webservice.data.metric.LogicalMetric;
import com.yahoo.bard.webservice.data.metric.TemplateDruidQuery;
import com.yahoo.bard.webservice.druid.model.query.Granularity;
import com.yahoo.bard.webservice.util.TableUtils;
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.web.DataApiRequest;
import com.yahoo.bard.webservice.web.ErrorMessageFormat;

Expand All @@ -23,7 +20,6 @@
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.BinaryOperator;
import java.util.stream.Collectors;

/**
* Abstract parent to with business rule agnostic implementations of core methods.
Expand All @@ -45,56 +41,49 @@ public BasePhysicalTableResolver() {
/**
* Create a list of matchers based on a request and query.
*
* @param apiRequest The ApiRequest for the query
* @param query a partial query representation
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*
* @return a list of matchers to be applied, in order
*/
public abstract List<PhysicalTableMatcher> getMatchers(DataApiRequest apiRequest, TemplateDruidQuery query);
public abstract List<PhysicalTableMatcher> getMatchers(QueryPlanningConstraint requestConstraints);

/**
* Create a binary operator which returns the 'better' of two physical table.
*
* @param apiRequest The ApiRequest for the query
* @param query a partial query representation
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*
* @return a list of matchers to be applied, in order
*/
public abstract BinaryOperator<PhysicalTable> getBetterTableOperator(
DataApiRequest apiRequest,
TemplateDruidQuery query
);
public abstract BinaryOperator<PhysicalTable> getBetterTableOperator(QueryPlanningConstraint requestConstraints);

/**
* Filter to a set of tables matching the rules of this resolver.
*
* @param candidateTables The physical tables being filtered
* @param apiRequest The request being filtered to
* @param query a partial query representation
* @param candidateTables the physical tables being filtered
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
*
* @return a set of physical tables which all match the criteria of a request and partial query
*
* @throws NoMatchFoundException if any of the filters reduce the filter set to empty
*/
public Set<PhysicalTable> filter(
protected Set<PhysicalTable> filter(
Collection<PhysicalTable> candidateTables,
DataApiRequest apiRequest,
TemplateDruidQuery query
QueryPlanningConstraint requestConstraints
) throws NoMatchFoundException {
return filter(candidateTables, getMatchers(apiRequest, query));
return filter(candidateTables, getMatchers(requestConstraints));
}

/**
* Filter a list of tables through each matcher using a list of matchers sequentially.
*
* @param candidateTables The collection of tables to be filtered
* @param matchers The matchers to apply, in order
* @param candidateTables the collection of tables to be filtered
* @param matchers the matchers to apply, in order
*
* @return A set of tables which satisfy all matchers
* @return a set of tables which satisfy all matchers
*
* @throws NoMatchFoundException if no tables match the filter
*/
public Set<PhysicalTable> filter(
protected Set<PhysicalTable> filter(
Collection<PhysicalTable> candidateTables,
List<PhysicalTableMatcher> matchers
) throws NoMatchFoundException {
Expand All @@ -108,15 +97,12 @@ public Set<PhysicalTable> filter(
@Override
public PhysicalTable resolve(
Collection<PhysicalTable> candidateTables,
DataApiRequest apiRequest,
TemplateDruidQuery query
QueryPlanningConstraint requestConstraints
) throws NoMatchFoundException {

// Minimum grain at which the request can be aggregated from
Granularity minimumTableTimeGrain = resolveAcceptingGrain.apply(apiRequest, query);
TemplateDruidQuery innerQuery = (TemplateDruidQuery) query.getInnermostQuery();
Set<String> columnNames = TableUtils.getDimensions(apiRequest, innerQuery)
.map(Dimension::getApiName)
.collect(Collectors.toSet());
Granularity minimumTableTimeGrain = requestConstraints.getMinimumTimeGran();
Set<String> columnNames = requestConstraints.getAllColumnNames();
LOG.trace(
"Resolving Table using TimeGrain: {}, dimension API names: {} and TableGroup: {}",
minimumTableTimeGrain,
Expand All @@ -125,41 +111,34 @@ public PhysicalTable resolve(
);

try {
Set<PhysicalTable> physicalTables = filter(candidateTables, apiRequest, query);
Set<PhysicalTable> physicalTables = filter(candidateTables, requestConstraints);

BinaryOperator<PhysicalTable> betterTable = getBetterTableOperator(apiRequest, query);
BinaryOperator<PhysicalTable> betterTable = getBetterTableOperator(requestConstraints);
PhysicalTable bestTable = physicalTables.stream().reduce(betterTable).get();

REGISTRY.meter("request.physical.table." + bestTable.getName() + "." + bestTable.getTimeGrain()).mark();
LOG.trace("Found best Table: {}", bestTable);
return bestTable;
} catch (NoMatchFoundException me) {
// Blow up if we couldn't match a table, log and return if we can
logMatchException(apiRequest, minimumTableTimeGrain, innerQuery);
logMatchException(requestConstraints, minimumTableTimeGrain);
throw me;
}
}

/**
* Log out inability to find a matching table.
*
* @param apiRequest Request for which we're trying to find a table
* @param minimumTableTimeGrain Minimum grain that we needed to meet
* @param innerQuery Innermost query for the query we were trying to match
* @param requestConstraints contains the request constraints extracted from DataApiRequest and TemplateDruidQuery
* @param minimumTableTimeGrain minimum grain that we needed to meet
*/
public void logMatchException(
DataApiRequest apiRequest,
Granularity minimumTableTimeGrain,
TemplateDruidQuery innerQuery
QueryPlanningConstraint requestConstraints,
Granularity minimumTableTimeGrain
) {
// Get the dimensions and metrics as lists of names
Set<String> requestDimensionNames = TableUtils.getDimensions(apiRequest, innerQuery)
.map(Dimension::getApiName)
.collect(Collectors.toSet());

Set<String> requestMetricNames = apiRequest.getLogicalMetrics().stream()
.map(LogicalMetric::getName)
.collect(Collectors.toCollection(LinkedHashSet::new));
Set<String> requestDimensionNames = requestConstraints.getAllDimensionNames();
Set<String> requestMetricNames = requestConstraints.getLogicalMetricNames();

String msg = ErrorMessageFormat.NO_PHYSICAL_TABLE_MATCHED.logFormat(
requestDimensionNames,
Expand Down
Loading

0 comments on commit 1252271

Please sign in to comment.