Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(#392): Cannot fetch bodies for multiple entities from multiple collections #394

Merged
merged 3 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import io.evitadb.api.requestResponse.data.structure.EntityReference;
import io.evitadb.api.requestResponse.extraResult.QueryTelemetry.QueryPhase;
import io.evitadb.core.query.algebra.Formula;
import io.evitadb.core.query.algebra.prefetch.SelectionFormula.PrefetchFormulaVisitor;
import io.evitadb.core.query.algebra.prefetch.PrefetchFormulaVisitor;
import io.evitadb.core.query.extraResult.ExtraResultProducer;
import io.evitadb.core.query.sort.ConditionalSorter;
import io.evitadb.core.query.sort.Sorter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import io.evitadb.api.query.require.EntityContentRequire;
import io.evitadb.core.query.algebra.Formula;
import io.evitadb.core.query.algebra.base.EmptyFormula;
import io.evitadb.core.query.algebra.prefetch.SelectionFormula.PrefetchFormulaVisitor;
import io.evitadb.core.query.algebra.prefetch.PrefetchFormulaVisitor;
import io.evitadb.core.query.extraResult.ExtraResultProducer;
import io.evitadb.core.query.indexSelection.TargetIndexes;
import io.evitadb.core.query.sort.NoSorter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
import io.evitadb.core.query.algebra.Formula;
import io.evitadb.core.query.algebra.base.NotFormula;
import io.evitadb.core.query.algebra.debug.CacheableVariantsGeneratingVisitor;
import io.evitadb.core.query.algebra.prefetch.PrefetchFormulaVisitor;
import io.evitadb.core.query.algebra.prefetch.SelectionFormula;
import io.evitadb.core.query.algebra.prefetch.SelectionFormula.PrefetchFormulaVisitor;
import io.evitadb.core.query.extraResult.CacheDisabledExtraResultAccessor;
import io.evitadb.core.query.extraResult.CacheTranslatingExtraResultAccessor;
import io.evitadb.core.query.extraResult.CacheableExtraResultProducer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
package io.evitadb.core.query.algebra.attribute;

import io.evitadb.api.query.require.AttributeHistogram;
import io.evitadb.api.query.require.EntityRequire;
import io.evitadb.api.requestResponse.data.AttributesContract.AttributeKey;
import io.evitadb.api.requestResponse.data.AttributesContract.AttributeValue;
import io.evitadb.core.query.algebra.AbstractFormula;
import io.evitadb.core.query.algebra.Formula;
import io.evitadb.core.query.algebra.prefetch.RequirementsDefiner;
import io.evitadb.exception.EvitaInternalError;
import io.evitadb.index.bitmap.Bitmap;
import io.evitadb.index.bitmap.EmptyBitmap;
Expand All @@ -40,6 +42,9 @@
import java.math.BigDecimal;
import java.util.function.Predicate;

import static io.evitadb.api.query.QueryConstraints.attributeContent;
import static io.evitadb.api.query.QueryConstraints.entityFetch;

/**
* Attribute formula envelopes {@link Formula} that compute {@link io.evitadb.api.query.FilterConstraint} targeted
* at attribute values. The formula simply delegates its {@link #compute()} method to the single delegating formula.
Expand All @@ -49,7 +54,7 @@
*
* @author Jan Novotný (novotny@fg.cz), FG Forrest a.s. (c) 2022
*/
public class AttributeFormula extends AbstractFormula {
public class AttributeFormula extends AbstractFormula implements RequirementsDefiner {
private static final long CLASS_ID = 4944486926494447594L;
public static final String ERROR_SINGLE_FORMULA_EXPECTED = "Exactly one inner formula is expected!";
/**
Expand Down Expand Up @@ -139,4 +144,10 @@ protected long getClassId() {
return CLASS_ID;
}

@Nullable
@Override
public EntityRequire getEntityRequire() {
return entityFetch(attributeContent(attributeKey.attributeName()));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import io.evitadb.core.query.algebra.AbstractFormula;
import io.evitadb.core.query.algebra.Formula;
import io.evitadb.core.query.algebra.base.ConstantFormula;
import io.evitadb.core.query.algebra.prefetch.SelectionFormula.PrefetchFormulaVisitor;
import io.evitadb.index.bitmap.Bitmap;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
/*
*
* _ _ ____ ____
* _____ _(_) |_ __ _| _ \| __ )
* / _ \ \ / / | __/ _` | | | | _ \
* | __/\ V /| | || (_| | |_| | |_) |
* \___| \_/ |_|\__\__,_|____/|____/
*
* Copyright (c) 2024
*
* Licensed under the Business Source License, Version 1.1 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://github.com/FgForrest/evitaDB/blob/main/LICENSE
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.evitadb.core.query.algebra.prefetch;

import io.evitadb.api.query.require.EntityContentRequire;
import io.evitadb.api.query.require.EntityFetch;
import io.evitadb.api.query.require.EntityFetchRequire;
import io.evitadb.api.query.require.EntityRequire;
import io.evitadb.api.requestResponse.data.EntityReferenceContract;
import io.evitadb.core.query.QueryContext;
import io.evitadb.core.query.algebra.Formula;
import io.evitadb.core.query.algebra.FormulaPostProcessor;
import io.evitadb.core.query.algebra.FormulaVisitor;
import io.evitadb.core.query.algebra.base.ConstantFormula;
import io.evitadb.core.query.filter.FilterByVisitor;
import io.evitadb.function.ToLongDoubleIntBiFunction;
import io.evitadb.index.bitmap.BaseBitmap;
import io.evitadb.index.bitmap.Bitmap;
import io.evitadb.index.bitmap.EmptyBitmap;
import io.evitadb.index.bitmap.RoaringBitmapBackedBitmap;
import org.roaringbitmap.RoaringBitmap;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Supplier;

/**
* This formula visitor identifies the entity ids that are accessible within conjunction scope from the formula root
* and detects possible {@link SelectionFormula} in the tree. It prepares:
*
* - {@link #getRequirements()} set that to fetch entity with
* - {@link #getConjunctiveEntities()} entity primary keys to fetch
* - {@link #getExpectedComputationalCosts()} costs that is estimated to be paid with regular execution
*/
public class PrefetchFormulaVisitor implements FormulaVisitor, FormulaPostProcessor {
/**
* Threshold where we avoid prefetching entities whatsoever.
*/
private static final int BITMAP_SIZE_THRESHOLD = 1000;
/**
* Contains set of requirements collected from all {@link SelectionFormula} in the tree.
*/
protected final Map<Class<? extends EntityContentRequire>, EntityContentRequire> requirements = new HashMap<>();
/**
* Contains all bitmaps of entity ids found in conjunctive scope of the formula.
*/
@Nonnull private final List<Bitmap> conjunctiveEntityIds = new LinkedList<>();
/**
* Contains set of entity primary keys (masked by {@link QueryContext#translateEntityReference(EntityReferenceContract...)}
* that needs to be prefetched.
*/
@Nonnull private final Bitmap entityReferences = new BaseBitmap();
/**
* Flag that signalizes {@link #visit(Formula)} happens in conjunctive scope.
*/
protected boolean conjunctiveScope = true;
/**
* Result of {@link FormulaPostProcessor} interface - basically root of the formula. This implementation doesn't
* change the input formula tree - just analyzes it.
*/
protected Formula outputFormula;
/**
* Contains sum of all collected bitmap cardinalities. If sum is greater than {@link #BITMAP_SIZE_THRESHOLD}
* the prefetch will automatically signalize it has no sense.
*/
private int estimatedBitmapCardinality = -1;
/**
* Contains aggregated costs that is estimated to be paid with regular execution of the {@link SelectionFormula}.
*/
private long expectedComputationalCosts = 0L;
/**
* Default formula for computing the prefetch costs. Can be overridden in tests so that we can make sure multiple
* paths of the calculation are tested (i.e. with or without prefetch).
*/
private static ToLongDoubleIntBiFunction PREFETCH_COST_ESTIMATOR =
(prefetchedEntityCount, requirementCount) -> prefetchedEntityCount * requirementCount * 148L;

/**
* Method allows to run given {@link Runnable} with custom prefetch cost estimator.
*/
public static <T> T doWithCustomPrefetchCostEstimator(@Nonnull Supplier<T> supplier, @Nonnull ToLongDoubleIntBiFunction costEstimator) {
final ToLongDoubleIntBiFunction old = PREFETCH_COST_ESTIMATOR;
try {
PREFETCH_COST_ESTIMATOR = costEstimator;
return supplier.get();
} finally {
PREFETCH_COST_ESTIMATOR = old;
}
}

/**
* We've performed benchmark of reading data from disk - using Linux file cache the reading performance was:
*
* Benchmark Mode Cnt Score Error Units
* SenesiThroughputBenchmark.memTableRead thrpt 140496.829 ops/s
*
* For two storage parts - this means 280992 reads / sec. When the linux cache would be empty it would require I/O
* which may be 40x times slower (source: https://www.quora.com/Is-the-speed-of-SSD-and-RAM-the-same) for 4kB payload
* it means that the lowest expectations are 6782 reads / sec.
*
* Recomputed on 1. mil operations ({@link io.evitadb.spike.FormulaCostMeasurement}) it's cost of 148.
*/
private static long estimatePrefetchCost(int prefetchedEntityCount, @Nonnull EntityFetchRequire requirements) {
return PREFETCH_COST_ESTIMATOR.apply(prefetchedEntityCount, requirements.getRequirements().length);
}

/**
* We don't alter the input formula - just analyze it.
*/
@Nonnull
@Override
public Formula getPostProcessedFormula() {
final Formula result = outputFormula;
this.outputFormula = null;
return Objects.requireNonNull(result, "The visit method was not executed prior to calling `getPostProcessedFormula`!");
}

/**
* Method allows to add a requirement that will be used by {@link QueryContext#prefetchEntities(Bitmap, EntityFetchRequire)}
* to fetch wide enough scope of the entity so that all filtering/sorting logic would have all data present
* for its evaluation.
*/
public void addRequirement(@Nonnull EntityContentRequire... requirement) {
for (EntityContentRequire theRequirement : requirement) {
requirements.merge(
theRequirement.getClass(), theRequirement,
EntityContentRequire::combineWith
);
}
}

/**
* Method will prefetch the entities identified in {@link PrefetchFormulaVisitor} but only in case the prefetch
* is possible and would "pay off". In case the possible prefetching would be more costly than executing the standard
* filtering logic, the prefetch is not executed.
*/
@Nullable
public Runnable createPrefetchLambdaIfNeededOrWorthwhile(@Nonnull QueryContext queryContext) {
EntityFetchRequire requirements = null;
Bitmap entitiesToPrefetch = null;
// are we forced to prefetch entities from catalog index?
if (!entityReferences.isEmpty()) {
requirements = getRequirements();
entitiesToPrefetch = entityReferences;
}
// do we know entity ids to prefetch?
if (isPrefetchPossible()) {
final Bitmap conjunctiveEntities = getConjunctiveEntities();
requirements = requirements == null ? getRequirements() : requirements;
// does the prefetch pay off?
if (getExpectedComputationalCosts() > estimatePrefetchCost(conjunctiveEntities.size(), requirements)) {
if (entitiesToPrefetch == null) {
entitiesToPrefetch = conjunctiveEntities;
} else {
final RoaringBitmap roaringBitmapA = RoaringBitmapBackedBitmap.getRoaringBitmap(entitiesToPrefetch);
final RoaringBitmap roaringBitmapB = RoaringBitmapBackedBitmap.getRoaringBitmap(conjunctiveEntities);
entitiesToPrefetch = new BaseBitmap(
RoaringBitmap.or(roaringBitmapA, roaringBitmapB)
);
}
}
}

if (entitiesToPrefetch != null && requirements != null) {
final Bitmap finalEntitiesToPrefetch = entitiesToPrefetch;
final EntityFetchRequire finalRequirements = requirements;
return () -> queryContext.prefetchEntities(
finalEntitiesToPrefetch,
finalRequirements
);
} else {
return null;
}
}

@Override
public void visit(@Nonnull Formula formula) {
if (outputFormula == null) {
this.outputFormula = formula;
}
if (formula instanceof final SelectionFormula selectionFormula) {
expectedComputationalCosts += selectionFormula.getDelegate().getEstimatedCost();
}
if (formula instanceof final RequirementsDefiner requirementsDefiner) {
final EntityRequire entityRequire = requirementsDefiner.getEntityRequire();
final EntityContentRequire[] requirements = entityRequire == null ? new EntityContentRequire[0] : entityRequire.getRequirements();
for (EntityContentRequire requirement : requirements) {
addRequirement(requirement);
}
}

if (this.conjunctiveScope && formula instanceof ConstantFormula constantFormula) {
final Bitmap bitmap = constantFormula.getDelegate();
this.conjunctiveEntityIds.add(bitmap);
final int bitmapSize = bitmap.size();
estimatedBitmapCardinality = estimatedBitmapCardinality == -1 || bitmapSize < estimatedBitmapCardinality ?
bitmapSize : estimatedBitmapCardinality;
} else if (formula instanceof final MultipleEntityFormula multipleEntityFormula) {
entityReferences.addAll(multipleEntityFormula.getDirectEntityReferences());
}

traverse(formula);
}

/**
* Returns set of requirements to fetch entities with.
*/
protected EntityFetchRequire getRequirements() {
return new EntityFetch(
requirements.values().toArray(new EntityContentRequire[0])
);
}

/**
* Traverses the formula into its children. Enable recursive traversal through formula tree.
*/
protected void traverse(@Nonnull Formula formula) {
final boolean formerConjunctiveScope = this.conjunctiveScope;
try {
if (!FilterByVisitor.isConjunctiveFormula(formula.getClass())) {
this.conjunctiveScope = false;
}
for (Formula innerFormula : formula.getInnerFormulas()) {
innerFormula.accept(this);
}
} finally {
this.conjunctiveScope = formerConjunctiveScope;
}
}

/**
* Returns entity primary keys to fetch.
*/
private Bitmap getConjunctiveEntities() {
return estimatedBitmapCardinality <= BITMAP_SIZE_THRESHOLD ?
conjunctiveEntityIds.stream()
.reduce((bitmapA, bitmapB) -> {
final RoaringBitmap roaringBitmapA = RoaringBitmapBackedBitmap.getRoaringBitmap(bitmapA);
final RoaringBitmap roaringBitmapB = RoaringBitmapBackedBitmap.getRoaringBitmap(bitmapB);
return new BaseBitmap(
RoaringBitmap.and(roaringBitmapA, roaringBitmapB)
);
})
.orElse(EmptyBitmap.INSTANCE) : EmptyBitmap.INSTANCE;
}

/**
* Returns true if there is any entity id known in conjunction scope and at least single {@link SelectionFormula}
* is present in the formula tree.
*/
private boolean isPrefetchPossible() {
return !conjunctiveEntityIds.isEmpty() && !requirements.isEmpty() && estimatedBitmapCardinality <= BITMAP_SIZE_THRESHOLD;
}

/**
* Method returns the expected computational cost in case prefetch would not be executed. This number plays
* a key role in evaluating whether to execute optional prefetch or not. Only if our estimated price for
* prefetching would be less than result of this method, the prefetch would be executed.
*/
private long getExpectedComputationalCosts() {
return expectedComputationalCosts;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
package io.evitadb.core.query.algebra.prefetch;

import io.evitadb.api.query.require.EntityContentRequire;
import io.evitadb.api.query.require.EntityFetchRequire;
import io.evitadb.api.query.require.EntityRequire;
import io.evitadb.api.requestResponse.data.SealedEntity;
import io.evitadb.core.query.QueryContext;
Expand All @@ -34,7 +35,7 @@
/**
* Implementations of this interface signalize that the entities needs to be prefetched with particular {@link StoragePart}
* loaded in order they can operate correctly. The storage parts loading is triggered by placing respective
* {@link CombinableEntityContentRequire} in {@link QueryContext#fetchEntities(int[], EntityContentRequire...) fetch method}.
* {@link EntityContentRequire} in {@link QueryContext#fetchEntities(String, int[], EntityFetchRequire)} fetch method}.
*
* @author Jan Novotný (novotny@fg.cz), FG Forrest a.s. (c) 2022
*/
Expand Down
Loading
Loading