diff --git a/compaction.go b/compaction.go index 868f9ef825..94ec48717a 100644 --- a/compaction.go +++ b/compaction.go @@ -1244,9 +1244,14 @@ func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) { ingestFlushable.exciseSpan.Contains(d.cmp, file.FileMetadata.Largest) { level = 6 } else { - var err error - level, fileToSplit, err = ingestTargetLevel(ctx, overlapChecker, - baseLevel, d.mu.compact.inProgress, file.FileMetadata, suggestSplit) + // TODO(radu): this can perform I/O; we should not do this while holding DB.mu. + lsmOverlap, err := overlapChecker.DetermineLSMOverlap(ctx, file.UserKeyBounds()) + if err != nil { + return nil, err + } + level, fileToSplit, err = ingestTargetLevel( + ctx, d.cmp, lsmOverlap, baseLevel, d.mu.compact.inProgress, file.FileMetadata, suggestSplit, + ) if err != nil { return nil, err } diff --git a/ingest.go b/ingest.go index 49d24e87c8..c2df83017d 100644 --- a/ingest.go +++ b/ingest.go @@ -831,7 +831,8 @@ func ingestUpdateSeqNum( // is returned as the splitFile. func ingestTargetLevel( ctx context.Context, - overlapChecker *overlapChecker, + cmp base.Compare, + lsmOverlap lsmOverlap, baseLevel int, compactions map[*compaction]struct{}, meta *fileMetadata, @@ -903,46 +904,32 @@ func ingestTargetLevel( // existing point that falls within the ingested table bounds as being "data // overlap". - // This assertion implicitly checks that we have the current version of - // the metadata. - if overlapChecker.v.L0Sublevels == nil { - return 0, nil, base.AssertionFailedf("could not read L0 sublevels") - } - bounds := meta.UserKeyBounds() - - // Check for overlap over the keys of L0. - if overlap, err := overlapChecker.DetermineAnyDataOverlapInLevel(ctx, bounds, 0); err != nil { - return 0, nil, err - } else if overlap { + if lsmOverlap[0].result == dataOverlap { return 0, nil, nil } - + targetLevel = 0 + splitFile = nil for level := baseLevel; level < numLevels; level++ { - dataOverlap, err := overlapChecker.DetermineAnyDataOverlapInLevel(ctx, bounds, level) - if err != nil { - return 0, nil, err - } else if dataOverlap { + var candidateSplitFile *fileMetadata + switch lsmOverlap[level].result { + case dataOverlap: + // We cannot ingest into or under this level; return the best target level + // so far. return targetLevel, splitFile, nil - } - // Check boundary overlap. - var candidateSplitFile *fileMetadata - boundaryOverlaps := overlapChecker.v.Overlaps(level, bounds) - if !boundaryOverlaps.Empty() { - // We are already guaranteed to not have any data overlaps with files - // in boundaryOverlaps, otherwise we'd have returned in the above if - // statements. Use this, plus boundaryOverlaps.Len() == 1 to detect for - // the case where we can slot this file into the current level despite - // a boundary overlap, by splitting one existing file into two virtual - // sstables. - if suggestSplit && boundaryOverlaps.Len() == 1 { - iter := boundaryOverlaps.Iter() - candidateSplitFile = iter.First() - } else { - // We either don't want to suggest ingest-time splits (i.e. - // !suggestSplit), or we boundary-overlapped with more than one file. + case noDataOverlap: + if !suggestSplit || lsmOverlap[level].splitFile == nil { + // We can ingest under this level, but not into this level. continue } + // We can ingest into this level if we split this file. + candidateSplitFile = lsmOverlap[level].splitFile + + case noBoundaryOverlap: + // We can ingest into this level. + + default: + return 0, nil, base.AssertionFailedf("unexpected lsmOverlap result: %v", lsmOverlap[level].result) } // Check boundary overlap with any ongoing compactions. We consider an @@ -964,8 +951,8 @@ func ingestTargetLevel( if c.outputLevel == nil || level != c.outputLevel.level { continue } - if overlapChecker.comparer.Compare(meta.Smallest.UserKey, c.largest.UserKey) <= 0 && - overlapChecker.comparer.Compare(meta.Largest.UserKey, c.smallest.UserKey) >= 0 { + if cmp(meta.Smallest.UserKey, c.largest.UserKey) <= 0 && + cmp(meta.Largest.UserKey, c.smallest.UserKey) >= 0 { overlaps = true break } @@ -2202,14 +2189,20 @@ func (d *DB) ingestApply( f.Level = 6 } } else { - // TODO(bilal): ingestTargetLevel does disk IO (reading files for data - // overlap) even though we're holding onto d.mu. Consider unlocking - // d.mu while we do this. We already hold versions.logLock so we should - // not see any version applications while we're at this. The one - // complication here would be pulling out the mu.compact.inProgress - // check from ingestTargetLevel, as that requires d.mu to be held. - f.Level, splitFile, err = ingestTargetLevel(ctx, overlapChecker, - baseLevel, d.mu.compact.inProgress, m, shouldIngestSplit) + // We check overlap against the LSM without holding DB.mu. Note that we + // are still holding the log lock, so the version cannot change. + // TODO(radu): perform this check optimistically outside of the log lock. + var overlap lsmOverlap + overlap, err = func() (lsmOverlap, error) { + d.mu.Unlock() + defer d.mu.Lock() + return overlapChecker.DetermineLSMOverlap(ctx, m.UserKeyBounds()) + }() + if err == nil { + f.Level, splitFile, err = ingestTargetLevel( + ctx, d.cmp, overlap, baseLevel, d.mu.compact.inProgress, m, shouldIngestSplit, + ) + } } if splitFile != nil { diff --git a/ingest_test.go b/ingest_test.go index edd3e51f3e..b63937402e 100644 --- a/ingest_test.go +++ b/ingest_test.go @@ -2289,8 +2289,12 @@ func TestIngestTargetLevel(t *testing.T) { }, v: d.mu.versions.currentVersion(), } - level, overlapFile, err := ingestTargetLevel(context.Background(), overlapChecker, - 1, d.mu.compact.inProgress, meta, suggestSplit) + lsmOverlap, err := overlapChecker.DetermineLSMOverlap(context.Background(), meta.UserKeyBounds()) + if err != nil { + return err.Error() + } + level, overlapFile, err := ingestTargetLevel( + context.Background(), d.cmp, lsmOverlap, 1, d.mu.compact.inProgress, meta, suggestSplit) if err != nil { return err.Error() } diff --git a/overlap.go b/overlap.go index 7c8cf6ba2c..561ea66094 100644 --- a/overlap.go +++ b/overlap.go @@ -28,6 +28,83 @@ type overlapChecker struct { keyspanLevelIter keyspanimpl.LevelIter } +// levelOverlapResult indicates the result of checking data overlap between a +// key range and a level. We check two types of overlap: +// +// - file boundary overlap: whether the key range overlaps any of the level's +// user key boundaries; +// +// - data overlap: whether the key range overlaps any keys or ranges in the +// level. Data overlap implies file boundary overlap. +type levelOverlapResult uint8 + +const ( + // The key range of interest doesn't overlap any tables on the level. + noBoundaryOverlap levelOverlapResult = iota + 1 + // The key range of interest overlaps some tables on the level in terms of + // boundary, but the tables contain no data within that range. + noDataOverlap + // At least a key or range in the level overlaps with the key range of + // interest. Note that the data overlap check is best-effort and there could + // be false positives. + dataOverlap +) + +// levelOverlap is the result of checking overlap against an LSM level. +type levelOverlap struct { + result levelOverlapResult + // splitFile can be set only when result is noDataOverlap. If it is set, this + // file can be split to free up the range of interest. + splitFile *fileMetadata +} + +// lsmOverlap stores the result of checking for data overlap for the LSM levels, +// starting from the top (L0). The overlap checks are only populated up to the +// first level where we find data overlap. +// +// This is akin to a tetris game where a horizontal bar is falling and we want +// to determine where it lands. +type lsmOverlap [numLevels]levelOverlap + +// DetermineLSMOverlap calculates the lsmOverlap for the given bounds. +func (c *overlapChecker) DetermineLSMOverlap( + ctx context.Context, bounds base.UserKeyBounds, +) (lsmOverlap, error) { + var result lsmOverlap + for level := 0; level < numLevels; level++ { + var err error + result[level], err = c.DetermineLevelOverlap(ctx, bounds, level) + if err != nil || result[level].result == dataOverlap { + return result, err + } + } + return result, nil +} + +func (c *overlapChecker) DetermineLevelOverlap( + ctx context.Context, bounds base.UserKeyBounds, level int, +) (levelOverlap, error) { + // First, check boundary overlap. + boundaryOverlaps := c.v.Overlaps(level, bounds) + if boundaryOverlaps.Empty() { + return levelOverlap{result: noBoundaryOverlap}, nil + } + + overlap, err := c.DetermineAnyDataOverlapInLevel(ctx, bounds, level) + if err != nil || overlap { + return levelOverlap{result: dataOverlap}, err + } + var splitFile *fileMetadata + if boundaryOverlaps.Len() == 1 { + iter := boundaryOverlaps.Iter() + splitFile = iter.First() + } + return levelOverlap{ + result: noDataOverlap, + splitFile: splitFile, + }, nil +} + // DetermineAnyDataOverlapInLevel checks whether any keys within the provided // bounds and level exist within the checker's associated LSM version. This // function checks for actual keys within the bounds, performing I/O if @@ -52,6 +129,9 @@ func (c *overlapChecker) DetermineAnyDataOverlapInLevel( // NB: sublevel 0 contains the newest keys, whereas sublevel n contains the // oldest keys. if level == 0 { + if c.v.L0Sublevels == nil { + return true, base.AssertionFailedf("could not read L0 sublevels") + } for subLevel := 0; subLevel < len(c.v.L0SublevelFiles); subLevel++ { manifestIter := c.v.L0Sublevels.Levels[subLevel].Iter() pointOverlap, err := c.determinePointKeyOverlapInLevel(