From ce955badd0265774b1194b5fd12fa012bd7fabd2 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Mon, 27 Nov 2023 00:15:05 -0800 Subject: [PATCH 01/27] HBASE-25972 Dual File Compactor --- .../org/apache/hadoop/hbase/mob/MobFile.java | 4 +- .../regionserver/DefaultStoreEngine.java | 6 +- .../regionserver/DualFileStoreEngine.java | 127 +++++++++++ .../hbase/regionserver/DualFileWriter.java | 100 +++++++++ .../hadoop/hbase/regionserver/HStore.java | 23 +- .../hadoop/hbase/regionserver/HStoreFile.java | 19 ++ .../hbase/regionserver/StoreFileScanner.java | 17 +- .../hbase/regionserver/StoreScanner.java | 6 +- .../compactions/DualFileCompactor.java | 75 +++++++ .../hbase/regionserver/TestCompaction.java | 1 + .../regionserver/TestFSErrorsExposed.java | 2 +- .../hadoop/hbase/regionserver/TestHStore.java | 4 +- .../regionserver/TestReversibleScanners.java | 7 +- .../compactions/TestCompactor.java | 12 ++ .../compactions/TestDualFileCompactor.java | 199 ++++++++++++++++++ 15 files changed, 575 insertions(+), 27 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java index 3293208771ac..4b2e01315e31 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java @@ -55,7 +55,7 @@ public StoreFileScanner getScanner() throws IOException { List sfs = new ArrayList<>(); sfs.add(sf); List sfScanners = StoreFileScanner.getScannersForStoreFiles(sfs, false, true, - false, false, sf.getMaxMemStoreTS()); + false, false, sf.getMaxMemStoreTS(), false); return sfScanners.get(0); } @@ -82,7 +82,7 @@ public MobCell readCell(Cell search, boolean cacheMobBlocks, long readPt) throws boolean succ = false; try { List sfScanners = StoreFileScanner.getScannersForStoreFiles( - Collections.singletonList(sf), cacheMobBlocks, true, false, false, readPt); + Collections.singletonList(sf), cacheMobBlocks, true, false, false, readPt, false); if (!sfScanners.isEmpty()) { scanner = sfScanners.get(0); if (scanner.seek(search)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 0c9fb9adcc2c..d29dce6a2523 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -47,11 +47,11 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_STORE_FLUSHER_CLASS = + public static final Class DEFAULT_STORE_FLUSHER_CLASS = DefaultStoreFlusher.class; - private static final Class DEFAULT_COMPACTOR_CLASS = + public static final Class DEFAULT_COMPACTOR_CLASS = DefaultCompactor.class; - private static final Class DEFAULT_COMPACTION_POLICY_CLASS = + public static final Class DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java new file mode 100644 index 000000000000..b196d42cca4d --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; +import org.apache.hadoop.hbase.regionserver.compactions.DualFileCompactor; +import org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy; +import org.apache.hadoop.hbase.regionserver.compactions.RatioBasedCompactionPolicy; +import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; +import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.util.ReflectionUtils; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * HBASE-25972 This store engine allows us to store data in two files, + * one for the latest put cells and the other for the rest of the cells (i.e., + * older put cells and delete markers). + */ +@InterfaceAudience.Private +public class DualFileStoreEngine extends StoreEngine { + public static final String DUAL_FILE_STORE_FLUSHER_CLASS_KEY = + "hbase.hstore.dualfileengine.storeflusher.class"; + public static final String DUAL_FILE_COMPACTOR_CLASS_KEY = + "hbase.hstore.dualfileengine.compactor.class"; + public static final String DUAL_FILE_COMPACTION_POLICY_CLASS_KEY = + "hbase.hstore.dualfileengine.compactionpolicy.class"; + + public static final Class DUAL_FILE_STORE_FLUSHER_CLASS = + DefaultStoreFlusher.class; + public static final Class DUAL_FILE_COMPACTOR_CLASS = + DualFileCompactor.class; + public static final Class + DUAL_FILE_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; + @Override + public boolean needsCompaction(List filesCompacting) { + return compactionPolicy.needsCompaction(storeFileManager.getStorefiles(), filesCompacting); + } + + @Override + public CompactionContext createCompaction() throws IOException { + return new DualFileCompactionContext(); + } + + @Override + protected void createComponents(Configuration conf, HStore store, CellComparator kvComparator) + throws IOException { + createCompactor(conf, store); + createCompactionPolicy(conf, store); + createStoreFlusher(conf, store); + this.storeFileManager = new DefaultStoreFileManager(kvComparator, + StoreFileComparators.SEQ_ID_MAX_TIMESTAMP, conf, compactionPolicy.getConf()); + } + + protected void createCompactor(Configuration conf, HStore store) throws IOException { + String className = conf.get(DUAL_FILE_COMPACTOR_CLASS_KEY, DUAL_FILE_COMPACTOR_CLASS.getName()); + try { + compactor = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured compactor '" + className + "'", e); + } + } + private void createCompactionPolicy(Configuration conf, HStore store) throws IOException { + String className = + conf.get(DUAL_FILE_COMPACTION_POLICY_CLASS_KEY, DUAL_FILE_COMPACTION_POLICY_CLASS.getName()); + try { + compactionPolicy = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, StoreConfigInformation.class }, + new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured compaction policy '" + className + "'", e); + } + } + + private void createStoreFlusher(Configuration conf, HStore store) throws IOException { + String className = + conf.get(DUAL_FILE_STORE_FLUSHER_CLASS_KEY, DUAL_FILE_STORE_FLUSHER_CLASS.getName()); + try { + storeFlusher = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured store flusher '" + className + "'", e); + } + } + private class DualFileCompactionContext extends CompactionContext { + @Override + public boolean select(List filesCompacting, boolean isUserCompaction, + boolean mayUseOffPeak, boolean forceMajor) throws IOException { + request = compactionPolicy.selectCompaction(storeFileManager.getStorefiles(), filesCompacting, + isUserCompaction, mayUseOffPeak, forceMajor); + return request != null; + } + + @Override + public List compact(ThroughputController throughputController, User user) + throws IOException { + return compactor.compact(request, throughputController, user); + } + + @Override + public List preSelect(List filesCompacting) { + return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStorefiles(), + filesCompacting); + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java new file mode 100644 index 000000000000..fd1e0ebf1a31 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.HAS_LATEST_VERSION_KEY; + +/** + * Separates the provided cells into two files, one file for the latest put cells and + * the other for the rest of the cells. + */ +@InterfaceAudience.Private +public class DualFileWriter extends AbstractMultiFileWriter { + + private final CellComparator comparator; + private StoreFileWriter latestVersionWriter; + private StoreFileWriter multiVersionWriter; + + private final List writers; + private Cell lastCell = null; + private boolean deleteFamily = false; + public DualFileWriter(CellComparator comparator) { + this.comparator = comparator; + writers = new ArrayList<>(2); + } + + @Override + public void append(Cell cell) throws IOException { + if (lastCell != null && comparator.compareRows(lastCell, cell) != 0) { + // It is a new row and thus time to reset deleteFamily and lastCell + deleteFamily = false; + lastCell = null; + } + + if (!deleteFamily && cell.getType() == Cell.Type.Put + && (lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) { + // No delete family marker has been seen for the current row and this is a put cell and + // the first cell (i.e., the latest version) of a column. We can store it in the latest + // version writer + if (latestVersionWriter == null) { + latestVersionWriter = writerFactory.createWriter(); + writers.add(latestVersionWriter); + } + latestVersionWriter.append(cell); + } else { + if (cell.getType() == Cell.Type.DeleteFamily + || cell.getType() == Cell.Type.DeleteFamilyVersion) { + deleteFamily = true; + } + if (multiVersionWriter == null) { + multiVersionWriter = writerFactory.createWriter(); + writers.add(multiVersionWriter); + } + multiVersionWriter.append(cell); + } + lastCell = cell; + } + + @Override + protected Collection writers() { + return writers; + } + + @Override + protected void preCommitWriters() throws IOException { + if (writers.isEmpty()) { + latestVersionWriter = writerFactory.createWriter(); + writers.add(latestVersionWriter); + } + if (latestVersionWriter != null) { + latestVersionWriter.appendFileInfo(HAS_LATEST_VERSION_KEY, Bytes.toBytes(true)); + } + if (multiVersionWriter != null) { + multiVersionWriter.appendFileInfo(HAS_LATEST_VERSION_KEY, Bytes.toBytes(false)); + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 9954c78142e9..addab546984d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -956,10 +956,11 @@ private void notifyChangedReadersObservers(List sfs) throws IOExcept * @return all scanners for this store */ public List getScanners(boolean cacheBlocks, boolean isGet, boolean usePread, - boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow, byte[] stopRow, long readPt) + boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow, byte[] stopRow, long readPt, + boolean onlyLatestVersion) throws IOException { return getScanners(cacheBlocks, usePread, isCompaction, matcher, startRow, true, stopRow, false, - readPt); + readPt, onlyLatestVersion); } /** @@ -977,7 +978,8 @@ public List getScanners(boolean cacheBlocks, boolean isGet, boo */ public List getScanners(boolean cacheBlocks, boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow, boolean includeStartRow, - byte[] stopRow, boolean includeStopRow, long readPt) throws IOException { + byte[] stopRow, boolean includeStopRow, long readPt, boolean onlyLatestVersion) + throws IOException { Collection storeFilesToScan; List memStoreScanners; this.storeEngine.readLock(); @@ -1002,7 +1004,8 @@ public List getScanners(boolean cacheBlocks, boolean usePread, // but now we get them in ascending order, which I think is // actually more correct, since memstore get put at the end. List sfScanners = StoreFileScanner.getScannersForStoreFiles( - storeFilesToScan, cacheBlocks, usePread, isCompaction, false, matcher, readPt); + storeFilesToScan, cacheBlocks, usePread, isCompaction, false, matcher, readPt, + onlyLatestVersion); List scanners = new ArrayList<>(sfScanners.size() + 1); scanners.addAll(sfScanners); // Then the memstore scanners @@ -1042,10 +1045,11 @@ private static void clearAndClose(List scanners) { */ public List getScanners(List files, boolean cacheBlocks, boolean isGet, boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, - byte[] startRow, byte[] stopRow, long readPt, boolean includeMemstoreScanner) + byte[] startRow, byte[] stopRow, long readPt, boolean includeMemstoreScanner, + boolean onlyLatestVersion) throws IOException { return getScanners(files, cacheBlocks, usePread, isCompaction, matcher, startRow, true, stopRow, - false, readPt, includeMemstoreScanner); + false, readPt, includeMemstoreScanner, onlyLatestVersion); } /** @@ -1060,6 +1064,7 @@ public List getScanners(List files, boolean cacheBl * @param includeStartRow true to include start row, false if not * @param stopRow the stop row * @param includeStopRow true to include stop row, false if not + * @param maxVersions the max number of versions to fetch * @param readPt the read point of the current scan * @param includeMemstoreScanner true if memstore has to be included * @return scanners on the given files and on the memstore if specified @@ -1067,7 +1072,7 @@ public List getScanners(List files, boolean cacheBl public List getScanners(List files, boolean cacheBlocks, boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow, boolean includeStartRow, byte[] stopRow, boolean includeStopRow, long readPt, - boolean includeMemstoreScanner) throws IOException { + boolean includeMemstoreScanner, boolean onlyLatestVersion) throws IOException { List memStoreScanners = null; if (includeMemstoreScanner) { this.storeEngine.readLock(); @@ -1079,7 +1084,7 @@ public List getScanners(List files, boolean cacheBl } try { List sfScanners = StoreFileScanner.getScannersForStoreFiles(files, - cacheBlocks, usePread, isCompaction, false, matcher, readPt); + cacheBlocks, usePread, isCompaction, false, matcher, readPt, onlyLatestVersion); List scanners = new ArrayList<>(sfScanners.size() + 1); scanners.addAll(sfScanners); // Then the memstore scanners @@ -1762,7 +1767,7 @@ public List recreateScanners(List currentFileS return null; } return getScanners(filesToReopen, cacheBlocks, false, false, matcher, startRow, - includeStartRow, stopRow, includeStopRow, readPt, false); + includeStartRow, stopRow, includeStopRow, readPt, false, false); } finally { this.storeEngine.readUnlock(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index 5df02bfb26a8..5ffddfa26872 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -125,6 +125,8 @@ public class HStoreFile implements StoreFile { */ public static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID"); + public static final byte[] HAS_LATEST_VERSION_KEY = Bytes.toBytes("HAS_LATEST_VERSION"); + private final StoreFileInfo fileInfo; // StoreFile.Reader @@ -138,6 +140,12 @@ public class HStoreFile implements StoreFile { // Indicates if the file got compacted private volatile boolean compactedAway = false; + // Indicate if the file contains only latest (i.e., single) cell version for a given column + // in a row. MemStore flushes generate files with multiple cell versions. However, + // compactions can generate two files, one with the latest version cells and the other + // with the remaining (non-latest) cell versions. + private volatile boolean hasLatestVersion = true; + // Keys for metadata stored in backing HFile. // Set when we obtain a Reader. private long sequenceid = -1; @@ -337,6 +345,10 @@ public boolean isCompactedAway() { return compactedAway; } + public boolean hasLatestVersion() { + return hasLatestVersion; + } + public int getRefCount() { return fileInfo.getRefCount(); } @@ -455,6 +467,10 @@ private void open() throws IOException { b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY); this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b)); + b = metadataMap.get(HAS_LATEST_VERSION_KEY); + if (b != null) { + hasLatestVersion = Bytes.toBoolean(b); + } BloomType hfileBloomType = initialReader.getBloomFilterType(); if (cfBloomType != BloomType.NONE) { initialReader.loadBloomfilter(BlockType.GENERAL_BLOOM_META, metrics); @@ -583,6 +599,9 @@ public void markCompactedAway() { this.compactedAway = true; } + public void setHasLatestVersion(boolean hasLatestVersion) { + this.hasLatestVersion = hasLatestVersion; + } @Override public String toString() { return this.fileInfo.toString(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index fd941de4df87..cdbf231372f9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -112,10 +112,11 @@ public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVC * Return an array of scanners corresponding to the given set of store files. */ public static List getScannersForStoreFiles(Collection files, - boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind, long readPt) + boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind, long readPt, + boolean onlyLatestVersion) throws IOException { return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction, useDropBehind, null, - readPt); + readPt, onlyLatestVersion); } /** @@ -124,7 +125,7 @@ public static List getScannersForStoreFiles(Collection getScannersForStoreFiles(Collection files, boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean canUseDrop, - ScanQueryMatcher matcher, long readPt) throws IOException { + ScanQueryMatcher matcher, long readPt, boolean onlyLatestVersion) throws IOException { if (files.isEmpty()) { return Collections.emptyList(); } @@ -135,11 +136,17 @@ public static List getScannersForStoreFiles(Collection sfs, List memStoreSc // store files. In case of stream scanners this eager creation does not induce performance // penalty because in scans (that uses stream scanners) the next() call is bound to happen. List scanners = store.getScanners(sfs, cacheBlocks, get, usePread, - isCompaction, matcher, scan.getStartRow(), scan.getStopRow(), this.readPt, false); + isCompaction, matcher, scan.getStartRow(), scan.getStopRow(), + this.readPt, false, !scan.isRaw() && scan.getMaxVersions() == 1); flushedstoreFileScanners.addAll(scanners); if (!CollectionUtils.isEmpty(memStoreScanners)) { clearAndClose(memStoreScannersAfterFlush); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java new file mode 100644 index 000000000000..2cabf8cfd416 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.compactions; + +import java.io.IOException; +import java.util.List; +import java.util.function.Consumer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.regionserver.DualFileWriter; +import org.apache.hadoop.hbase.regionserver.HStore; +import org.apache.hadoop.hbase.regionserver.InternalScanner; +import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; +import org.apache.hadoop.hbase.security.User; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This compactor generates two files, one for the latest put cells and the other for + * the rest of the cells (i.e., older put cells and delete markers). + */ +@InterfaceAudience.Private +public class DualFileCompactor extends AbstractMultiOutputCompactor { + + private static final Logger LOG = LoggerFactory.getLogger(DualFileCompactor.class); + + public DualFileCompactor(Configuration conf, HStore store) { + super(conf, store); + } + + public List compact(final CompactionRequestImpl request, ThroughputController throughputController, + User user) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Executing compaction with "); + } + + return compact(request, defaultScannerFactory, + new CellSinkFactory() { + + @Override + public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, + boolean shouldDropBehind, boolean major, Consumer writerCreationTracker) + throws IOException { + DualFileWriter writer = new DualFileWriter(store.getComparator()); + initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); + return writer; + } + }, throughputController, user); + } + + @Override + protected List commitWriter(DualFileWriter writer, FileDetails fd, + CompactionRequestImpl request) throws IOException { + List pathList = + writer.commitWriters(fd.maxSeqId, request.isAllFiles(), request.getFiles()); + return pathList; + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java index c0bc72079cb7..4f3efccaeede 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java @@ -67,6 +67,7 @@ import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequestImpl; import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; +import org.apache.hadoop.hbase.regionserver.compactions.DualFileCompactor; import org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory; import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java index 55320e94a9f9..9fd4136e05ff 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java @@ -146,7 +146,7 @@ public void testStoreFileScannerThrowsErrors() throws IOException { List scanners = StoreFileScanner.getScannersForStoreFiles( Collections.singletonList(sf), false, true, false, false, // 0 is passed as readpoint because this test operates on HStoreFile directly - 0); + 0, false); KeyValueScanner scanner = scanners.get(0); FaultyInputStream inStream = faultyfs.inStreams.get(0).get(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java index e888639eac4a..979e6b040352 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java @@ -1805,10 +1805,10 @@ private static class MyStore extends HStore { public List getScanners(List files, boolean cacheBlocks, boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow, boolean includeStartRow, byte[] stopRow, boolean includeStopRow, long readPt, - boolean includeMemstoreScanner) throws IOException { + boolean includeMemstoreScanner, boolean onlyLatestVersion) throws IOException { hook.getScanners(this); return super.getScanners(files, cacheBlocks, usePread, isCompaction, matcher, startRow, true, - stopRow, false, readPt, includeMemstoreScanner); + stopRow, false, readPt, includeMemstoreScanner, onlyLatestVersion); } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java index 391f1bef69cc..e086e340d35e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java @@ -126,13 +126,13 @@ public void testReversibleStoreFileScanner() throws IOException { BloomType.NONE, true); List scanners = StoreFileScanner.getScannersForStoreFiles( - Collections.singletonList(sf), false, true, false, false, Long.MAX_VALUE); + Collections.singletonList(sf), false, true, false, false, Long.MAX_VALUE, false); StoreFileScanner scanner = scanners.get(0); seekTestOfReversibleKeyValueScanner(scanner); for (int readPoint = 0; readPoint < MAXMVCC; readPoint++) { LOG.info("Setting read point to " + readPoint); scanners = StoreFileScanner.getScannersForStoreFiles(Collections.singletonList(sf), false, - true, false, false, readPoint); + true, false, false, readPoint, false); seekTestOfReversibleKeyValueScannerWithMVCC(scanners, readPoint); } } @@ -482,7 +482,8 @@ private ReversedKeyValueHeap getReversibleKeyValueHeap(MemStore memstore, HStore private List getScanners(MemStore memstore, HStoreFile sf1, HStoreFile sf2, byte[] startRow, boolean doSeek, int readPoint) throws IOException { List fileScanners = StoreFileScanner - .getScannersForStoreFiles(Lists.newArrayList(sf1, sf2), false, true, false, false, readPoint); + .getScannersForStoreFiles(Lists.newArrayList(sf1, sf2), false, true, false, false, readPoint, + false); List memScanners = memstore.getScanners(readPoint); List scanners = new ArrayList<>(fileScanners.size() + 1); scanners.addAll(fileScanners); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java index 5359dec2e64d..8ccbc521b75a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java @@ -189,6 +189,18 @@ public void verifyKvs(KeyValue[][] kvss, boolean allFiles, List boundaries } } + public void verifyKvs(KeyValue[][] kvss) { + assertEquals(kvss.length, writers.size()); + for (int i = 0; i < kvss.length; ++i) { + KeyValue[] kvs = kvss[i]; + Writer w = writers.get(i); + assertEquals(kvs.length, w.kvs.size()); + for (int j = 0; j < kvs.length; ++j) { + assertEquals(kvs[j], w.kvs.get(j)); + } + } + } + public List getWriters() { return writers; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java new file mode 100644 index 000000000000..48102a90bd1e --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.compactions; + +import static org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.createDummyRequest; +import static org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.createDummyStoreFile; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.OptionalLong; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparatorImpl; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.regionserver.CreateStoreFileWriterParams; +import org.apache.hadoop.hbase.regionserver.HStore; +import org.apache.hadoop.hbase.regionserver.HStoreFile; +import org.apache.hadoop.hbase.regionserver.InternalScanner; +import org.apache.hadoop.hbase.regionserver.ScanInfo; +import org.apache.hadoop.hbase.regionserver.ScanType; +import org.apache.hadoop.hbase.regionserver.StoreEngine; +import org.apache.hadoop.hbase.regionserver.StoreFileScanner; +import org.apache.hadoop.hbase.regionserver.StoreUtils; +import org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.Scanner; +import org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.StoreFileWritersCapture; +import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestDualFileCompactor { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestDualFileCompactor.class); + + private static final byte[] NAME_OF_THINGS = Bytes.toBytes("foo"); + + private static final TableName TABLE_NAME = TableName.valueOf(NAME_OF_THINGS, NAME_OF_THINGS); + + private static final KeyValue KV_A_1 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + Bytes.toBytes("a"),300L, KeyValue.Type.Put); + private static final KeyValue KV_A_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + Bytes.toBytes("a"),200L, KeyValue.Type.Put); + private static final KeyValue KV_A_3 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + Bytes.toBytes("a"),100L, KeyValue.Type.Put); + + private static final KeyValue KV_B_Delete_Column = new KeyValue(Bytes.toBytes("123"), + Bytes.toBytes("0"), Bytes.toBytes("b"),200L, KeyValue.Type.DeleteColumn); + private static final KeyValue KV_B = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + Bytes.toBytes("b"),100L, KeyValue.Type.Put); + + + private static final KeyValue KV_C = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + Bytes.toBytes("c"),100L, KeyValue.Type.Put); + + private static final KeyValue KV_D_1 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + Bytes.toBytes("d"),200L, KeyValue.Type.Put); + private static final KeyValue KV_D_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + Bytes.toBytes("d"),100L, KeyValue.Type.Put); + + private static final KeyValue KV_Delete_Family = new KeyValue(Bytes.toBytes("456"), + Bytes.toBytes("0"), null ,200L, KeyValue.Type.DeleteFamily); + private static final KeyValue KV_E = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), + Bytes.toBytes("e"),100L, KeyValue.Type.Put); + private static final KeyValue KV_F = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), + Bytes.toBytes("f"),100L, KeyValue.Type.Put); + private static final KeyValue KV_G = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), + Bytes.toBytes("g"),100L, KeyValue.Type.Put); + + @Parameters(name = "{index}: usePrivateReaders={0}") + public static Iterable data() { + return Arrays.asList(new Object[] { true }, new Object[] { false }); + } + + @Parameter + public boolean usePrivateReaders; + + private DualFileCompactor createCompactor(StoreFileWritersCapture writers, + final KeyValue[] input, List storefiles) throws Exception { + Configuration conf = HBaseConfiguration.create(); + conf.setBoolean("hbase.regionserver.compaction.private.readers", usePrivateReaders); + final Scanner scanner = new Scanner(input); + // Create store mock that is satisfactory for compactor. + ColumnFamilyDescriptor familyDescriptor = ColumnFamilyDescriptorBuilder.of(NAME_OF_THINGS); + ScanInfo si = + new ScanInfo(conf, familyDescriptor, Long.MAX_VALUE, 0, CellComparatorImpl.COMPARATOR); + HStore store = mock(HStore.class); + when(store.getStorefiles()).thenReturn(storefiles); + when(store.getColumnFamilyDescriptor()).thenReturn(familyDescriptor); + when(store.getScanInfo()).thenReturn(si); + when(store.areWritesEnabled()).thenReturn(true); + when(store.getFileSystem()).thenReturn(mock(FileSystem.class)); + when(store.getRegionInfo()).thenReturn(RegionInfoBuilder.newBuilder(TABLE_NAME).build()); + StoreEngine storeEngine = mock(StoreEngine.class); + when(storeEngine.createWriter(any(CreateStoreFileWriterParams.class))).thenAnswer(writers); + when(store.getStoreEngine()).thenReturn(storeEngine); + when(store.getComparator()).thenReturn(CellComparatorImpl.COMPARATOR); + OptionalLong maxSequenceId = StoreUtils.getMaxSequenceIdInList(storefiles); + when(store.getMaxSequenceId()).thenReturn(maxSequenceId); + + return new DualFileCompactor(conf, store) { + @Override + protected InternalScanner createScanner(HStore store, ScanInfo scanInfo, + List scanners, long smallestReadPoint, long earliestPutTs, + byte[] dropDeletesFromRow, byte[] dropDeletesToRow) throws IOException { + return scanner; + } + + @Override + protected InternalScanner createScanner(HStore store, ScanInfo scanInfo, + List scanners, ScanType scanType, long smallestReadPoint, + long earliestPutTs) throws IOException { + return scanner; + } + }; + } + + private void verify(KeyValue[] input, KeyValue[][] output) throws Exception { + StoreFileWritersCapture writers = new StoreFileWritersCapture(); + HStoreFile sf1 = createDummyStoreFile(1L); + HStoreFile sf2 = createDummyStoreFile(2L); + DualFileCompactor dfc = createCompactor(writers, input, Arrays.asList(sf1, sf2)); + List paths = dfc.compact(new CompactionRequestImpl(Arrays.asList(sf1)), + NoLimitThroughputController.INSTANCE, null); + writers.verifyKvs(output); + assertEquals(output.length, paths.size()); + } + + @SuppressWarnings("unchecked") + private static T[] a(T... a) { + return a; + } + + @Test + public void test() throws Exception { + verify(a(KV_A_1, KV_A_2, KV_A_3, KV_B_Delete_Column, KV_B, KV_C, KV_D_1, KV_D_2, + KV_Delete_Family, KV_E, KV_F, KV_G), + a( + a(KV_A_1, KV_C, KV_D_1, KV_G), // Latest versions + a(KV_A_2, KV_A_3, KV_B_Delete_Column, KV_B, KV_D_2, KV_Delete_Family, KV_E, KV_F) + )); + } + + @Test + public void testEmptyOutputFile() throws Exception { + StoreFileWritersCapture writers = new StoreFileWritersCapture(); + CompactionRequestImpl request = createDummyRequest(); + DualFileCompactor dtc = + createCompactor(writers, new KeyValue[0], new ArrayList<>(request.getFiles())); + List paths = dtc.compact(request, NoLimitThroughputController.INSTANCE, null); + assertEquals(1, paths.size()); + List dummyWriters = writers.getWriters(); + assertEquals(1, dummyWriters.size()); + StoreFileWritersCapture.Writer dummyWriter = dummyWriters.get(0); + assertTrue(dummyWriter.kvs.isEmpty()); + assertTrue(dummyWriter.hasMetadata); + } +} From ffbccebfbf5a0dfd044019f48ae06d6cfa0f98e4 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Sat, 2 Dec 2023 14:53:09 -0800 Subject: [PATCH 02/27] Refactored StoreEngine --- .../hbase/io/hfile/HFilePrettyPrinter.java | 1 + .../hadoop/hbase/mob/MobStoreEngine.java | 8 +--- .../regionserver/DefaultStoreEngine.java | 35 ++++------------ .../regionserver/DualFileStoreEngine.java | 41 +++---------------- .../hbase/regionserver/StoreEngine.java | 35 ++++++++++++++++ .../hbase/regionserver/StoreFileScanner.java | 2 +- 6 files changed, 52 insertions(+), 70 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index 24db92b4de1c..d035ea63b7a8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -537,6 +537,7 @@ private void printMeta(HFile.Reader reader, Map fileInfo) throws Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY) + || Bytes.equals(e.getKey(), HStoreFile.HAS_LATEST_VERSION_KEY) ) { out.println(Bytes.toBoolean(e.getValue())); } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java index 633781d464e0..d2061ee6305b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java @@ -44,12 +44,6 @@ protected void createStoreFlusher(Configuration conf, HStore store) throws IOExc */ @Override protected void createCompactor(Configuration conf, HStore store) throws IOException { - String className = conf.get(MOB_COMPACTOR_CLASS_KEY, DefaultMobStoreCompactor.class.getName()); - try { - compactor = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); - } catch (RuntimeException e) { - throw new IOException("Unable to load configured compactor '" + className + "'", e); - } + createCompactor(conf, store, MOB_COMPACTOR_CLASS_KEY, DefaultMobStoreCompactor.class.getName()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index d29dce6a2523..5ffc1a18d6d3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -47,11 +47,11 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_STORE_FLUSHER_CLASS = + private static final Class DEFAULT_STORE_FLUSHER_CLASS = DefaultStoreFlusher.class; - public static final Class DEFAULT_COMPACTOR_CLASS = + private static final Class DEFAULT_COMPACTOR_CLASS = DefaultCompactor.class; - public static final Class DEFAULT_COMPACTION_POLICY_CLASS = + private static final Class DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; @Override @@ -70,36 +70,17 @@ protected void createComponents(Configuration conf, HStore store, CellComparator } protected void createCompactor(Configuration conf, HStore store) throws IOException { - String className = conf.get(DEFAULT_COMPACTOR_CLASS_KEY, DEFAULT_COMPACTOR_CLASS.getName()); - try { - compactor = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured compactor '" + className + "'", e); - } + createCompactor(conf, store, DEFAULT_COMPACTOR_CLASS_KEY, DEFAULT_COMPACTOR_CLASS.getName()); } protected void createCompactionPolicy(Configuration conf, HStore store) throws IOException { - String className = - conf.get(DEFAULT_COMPACTION_POLICY_CLASS_KEY, DEFAULT_COMPACTION_POLICY_CLASS.getName()); - try { - compactionPolicy = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, StoreConfigInformation.class }, - new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured compaction policy '" + className + "'", e); - } + createCompactionPolicy(conf, store, DEFAULT_COMPACTION_POLICY_CLASS_KEY, + DEFAULT_COMPACTION_POLICY_CLASS.getName()); } protected void createStoreFlusher(Configuration conf, HStore store) throws IOException { - String className = - conf.get(DEFAULT_STORE_FLUSHER_CLASS_KEY, DEFAULT_STORE_FLUSHER_CLASS.getName()); - try { - storeFlusher = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured store flusher '" + className + "'", e); - } + createStoreFlusher(conf, store, DEFAULT_STORE_FLUSHER_CLASS_KEY, + DEFAULT_STORE_FLUSHER_CLASS.getName()); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java index b196d42cca4d..d4f63fd177fa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java @@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.regionserver.compactions.RatioBasedCompactionPolicy; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; -import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** @@ -65,44 +64,16 @@ public CompactionContext createCompaction() throws IOException { @Override protected void createComponents(Configuration conf, HStore store, CellComparator kvComparator) throws IOException { - createCompactor(conf, store); - createCompactionPolicy(conf, store); - createStoreFlusher(conf, store); + createCompactor(conf, store, DUAL_FILE_COMPACTOR_CLASS_KEY, + DUAL_FILE_COMPACTOR_CLASS.getName()); + createCompactionPolicy(conf, store, DUAL_FILE_COMPACTION_POLICY_CLASS_KEY, + DUAL_FILE_COMPACTION_POLICY_CLASS.getName()); + createStoreFlusher(conf, store, DUAL_FILE_STORE_FLUSHER_CLASS_KEY, + DUAL_FILE_STORE_FLUSHER_CLASS.getName()); this.storeFileManager = new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID_MAX_TIMESTAMP, conf, compactionPolicy.getConf()); } - protected void createCompactor(Configuration conf, HStore store) throws IOException { - String className = conf.get(DUAL_FILE_COMPACTOR_CLASS_KEY, DUAL_FILE_COMPACTOR_CLASS.getName()); - try { - compactor = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured compactor '" + className + "'", e); - } - } - private void createCompactionPolicy(Configuration conf, HStore store) throws IOException { - String className = - conf.get(DUAL_FILE_COMPACTION_POLICY_CLASS_KEY, DUAL_FILE_COMPACTION_POLICY_CLASS.getName()); - try { - compactionPolicy = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, StoreConfigInformation.class }, - new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured compaction policy '" + className + "'", e); - } - } - - private void createStoreFlusher(Configuration conf, HStore store) throws IOException { - String className = - conf.get(DUAL_FILE_STORE_FLUSHER_CLASS_KEY, DUAL_FILE_STORE_FLUSHER_CLASS.getName()); - try { - storeFlusher = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured store flusher '" + className + "'", e); - } - } private class DualFileCompactionContext extends CompactionContext { @Override public boolean select(List filesCompacting, boolean isUserCompaction, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java index 34f882516bae..b0d28c1aa089 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java @@ -544,4 +544,39 @@ ReadWriteLock getLock() { public BloomFilterMetrics getBloomFilterMetrics() { return bloomFilterMetrics; } + + + protected void createCompactor(Configuration conf, HStore store, String classKey, + String defaultClassName) throws IOException { + String className = conf.get(classKey, defaultClassName); + try { + compactor = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured compactor '" + className + "'", e); + } + } + + protected void createCompactionPolicy(Configuration conf, HStore store, String classKey, + String defaultClassName) throws IOException { + String className = conf.get(classKey, defaultClassName); + try { + compactionPolicy = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, StoreConfigInformation.class }, + new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured compaction policy '" + className + "'", e); + } + } + + protected void createStoreFlusher(Configuration conf, HStore store, String classKey, + String defaultClassName) throws IOException { + String className = conf.get(classKey, defaultClassName); + try { + storeFlusher = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured store flusher '" + className + "'", e); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index cdbf231372f9..e5c4886260db 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -146,7 +146,7 @@ public static List getScannersForStoreFiles(Collection Date: Sun, 3 Dec 2023 00:48:50 -0800 Subject: [PATCH 03/27] Added DualFileStoreFileManager --- .../regionserver/DefaultStoreFileManager.java | 10 +- .../regionserver/DualFileStoreEngine.java | 2 +- .../DualFileStoreFileManager.java | 102 ++++++++++++++++++ .../hadoop/hbase/regionserver/HStore.java | 2 +- .../hbase/regionserver/StoreFileManager.java | 9 +- .../hbase/regionserver/StoreFileScanner.java | 10 +- .../regionserver/StripeStoreFileManager.java | 2 +- .../TestStripeStoreFileManager.java | 9 +- 8 files changed, 122 insertions(+), 24 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index f2d7cd973688..4f3caa28c6e6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -71,7 +71,7 @@ public DefaultStoreFileManager(CellComparator cellComparator, } @Override - public void loadFiles(List storeFiles) { + public void loadFiles(List storeFiles) throws IOException { this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, storeFiles); } @@ -86,7 +86,7 @@ public Collection getCompactedfiles() { } @Override - public void insertNewFiles(Collection sfs) { + public void insertNewFiles(Collection sfs) throws IOException { this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, Iterables.concat(this.storefiles, sfs)); } @@ -117,7 +117,7 @@ public final int getCompactedFilesCount() { @Override public void addCompactionResults(Collection newCompactedfiles, - Collection results) { + Collection results) throws IOException { this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, Iterables .concat(Iterables.filter(storefiles, sf -> !newCompactedfiles.contains(sf)), results)); // Mark the files as compactedAway once the storefiles and compactedfiles list is finalized @@ -157,8 +157,8 @@ public final Optional getSplitPoint() throws IOException { } @Override - public final Collection getFilesForScan(byte[] startRow, boolean includeStartRow, - byte[] stopRow, boolean includeStopRow) { + public Collection getFilesForScan(byte[] startRow, boolean includeStartRow, + byte[] stopRow, boolean includeStopRow, boolean onlyLatestVersion) { // We cannot provide any useful input and already have the files sorted by seqNum. return getStorefiles(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java index d4f63fd177fa..69cae4d4014e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java @@ -70,7 +70,7 @@ protected void createComponents(Configuration conf, HStore store, CellComparator DUAL_FILE_COMPACTION_POLICY_CLASS.getName()); createStoreFlusher(conf, store, DUAL_FILE_STORE_FLUSHER_CLASS_KEY, DUAL_FILE_STORE_FLUSHER_CLASS.getName()); - this.storeFileManager = new DefaultStoreFileManager(kvComparator, + this.storeFileManager = new DualFileStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID_MAX_TIMESTAMP, conf, compactionPolicy.getConf()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java new file mode 100644 index 000000000000..bf04a9905ff8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableCollection; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; + +/** + * Implementation of {@link StoreFileManager} for {@link DualFileStoreEngine}. Not thread-safe. + */ +@InterfaceAudience.Private +class DualFileStoreFileManager extends DefaultStoreFileManager { + /** + * List of store files that include the latest put cells inside this store. This is an + * immutable list that is atomically replaced when its contents change. + */ + private volatile ImmutableList latestVersionStoreFiles = ImmutableList.of(); + + public DualFileStoreFileManager(CellComparator cellComparator, + Comparator storeFileComparator, Configuration conf, + CompactionConfiguration comConf) { + super(cellComparator, storeFileComparator, conf, comConf); + } + + private List extractHasLatestVersionFiles(Collection storeFiles) + throws IOException { + List hasLatestVersionFiles = new ArrayList<>(storeFiles.size()); + for (HStoreFile file : storeFiles) { + file.initReader(); + if (file.hasLatestVersion()) { + hasLatestVersionFiles.add(file); + } + } + return hasLatestVersionFiles; + } + + @Override + public void loadFiles(List storeFiles) throws IOException { + super.loadFiles(storeFiles); + this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), + extractHasLatestVersionFiles(storeFiles)); + } + + @Override + public void insertNewFiles(Collection sfs) throws IOException { + super.insertNewFiles(sfs); + this.latestVersionStoreFiles = + ImmutableList.sortedCopyOf(getStoreFileComparator(), + Iterables.concat(this.latestVersionStoreFiles, extractHasLatestVersionFiles(sfs))); + } + + @Override + public ImmutableCollection clearFiles() { + latestVersionStoreFiles = ImmutableList.of(); + return super.clearFiles(); + } + + @Override + public void addCompactionResults(Collection newCompactedFiles, + Collection results) throws IOException { + Collection newFilesHasLatestVersion= extractHasLatestVersionFiles(results); + this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), Iterables + .concat(Iterables.filter(latestVersionStoreFiles, + sf -> !newCompactedFiles.contains(sf)), newFilesHasLatestVersion)); + super.addCompactionResults(newCompactedFiles, results); + } + + @Override + public Collection getFilesForScan(byte[] startRow, boolean includeStartRow, + byte[] stopRow, boolean includeStopRow, boolean onlyLatestVersion) { + if (onlyLatestVersion) { + return latestVersionStoreFiles; + } + return super.getFilesForScan(startRow, includeStartRow, stopRow, includeStopRow, false); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index addab546984d..fb07258f22a8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -985,7 +985,7 @@ public List getScanners(boolean cacheBlocks, boolean usePread, this.storeEngine.readLock(); try { storeFilesToScan = this.storeEngine.getStoreFileManager().getFilesForScan(startRow, - includeStartRow, stopRow, includeStopRow); + includeStartRow, stopRow, includeStopRow, onlyLatestVersion); memStoreScanners = this.memstore.getScanners(readPt); // NOTE: here we must increase the refCount for storeFiles because we would open the // storeFiles and get the StoreFileScanners for them.If we don't increase the refCount here, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java index 387fa559dcd3..9cdd3ca5e8e1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java @@ -50,7 +50,7 @@ public interface StoreFileManager { */ @RestrictedApi(explanation = "Should only be called in StoreEngine", link = "", allowedOnPath = ".*(/org/apache/hadoop/hbase/regionserver/StoreEngine.java|/src/test/.*)") - void loadFiles(List storeFiles); + void loadFiles(List storeFiles) throws IOException; /** * Adds new files, either for from MemStore flush or bulk insert, into the structure. @@ -58,7 +58,7 @@ public interface StoreFileManager { */ @RestrictedApi(explanation = "Should only be called in StoreEngine", link = "", allowedOnPath = ".*(/org/apache/hadoop/hbase/regionserver/StoreEngine.java|/src/test/.*)") - void insertNewFiles(Collection sfs); + void insertNewFiles(Collection sfs) throws IOException; /** * Adds only the new compaction results into the structure. @@ -67,7 +67,8 @@ public interface StoreFileManager { */ @RestrictedApi(explanation = "Should only be called in StoreEngine", link = "", allowedOnPath = ".*(/org/apache/hadoop/hbase/regionserver/StoreEngine.java|/src/test/.*)") - void addCompactionResults(Collection compactedFiles, Collection results); + void addCompactionResults(Collection compactedFiles, Collection results) + throws IOException; /** * Remove the compacted files @@ -124,7 +125,7 @@ public interface StoreFileManager { * @return The list of files that are to be read for this request. */ Collection getFilesForScan(byte[] startRow, boolean includeStartRow, byte[] stopRow, - boolean includeStopRow); + boolean includeStopRow, boolean onlyLatestVersion); /** * Gets initial, full list of candidate store files to check for row-key-before. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index e5c4886260db..4128e7b2853c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -136,17 +136,11 @@ public static List getScannersForStoreFiles(Collection getFilesForScan(byte[] startRow, boolean includeStartRow, - byte[] stopRow, boolean includeStopRow) { + byte[] stopRow, boolean includeStopRow, boolean onlyLatestVersion) { if (state.stripeFiles.isEmpty()) { return state.level0Files; // There's just L0. } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java index b61be8de00cc..13a50ccd218e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java @@ -95,15 +95,16 @@ public void testInsertFilesIntoL0() throws Exception { MockHStoreFile sf = createFile(); manager.insertNewFiles(al(sf)); assertEquals(1, manager.getStorefileCount()); - Collection filesForGet = manager.getFilesForScan(KEY_A, true, KEY_A, true); + Collection filesForGet = manager.getFilesForScan(KEY_A, true, KEY_A, true, + false); assertEquals(1, filesForGet.size()); assertTrue(filesForGet.contains(sf)); // Add some stripes and make sure we get this file for every stripe. manager.addCompactionResults(al(), al(createFile(OPEN_KEY, KEY_B), createFile(KEY_B, OPEN_KEY))); - assertTrue(manager.getFilesForScan(KEY_A, true, KEY_A, true).contains(sf)); - assertTrue(manager.getFilesForScan(KEY_C, true, KEY_C, true).contains(sf)); + assertTrue(manager.getFilesForScan(KEY_A, true, KEY_A, true, false).contains(sf)); + assertTrue(manager.getFilesForScan(KEY_C, true, KEY_C, true, false).contains(sf)); } @Test @@ -556,7 +557,7 @@ private void verifyGetOrScanScenario(StripeStoreFileManager manager, byte[] star Collection results) throws Exception { start = start != null ? start : HConstants.EMPTY_START_ROW; end = end != null ? end : HConstants.EMPTY_END_ROW; - Collection sfs = manager.getFilesForScan(start, true, end, false); + Collection sfs = manager.getFilesForScan(start, true, end, false, false); assertEquals(results.size(), sfs.size()); for (HStoreFile result : results) { assertTrue(sfs.contains(result)); From 893783a4978eb0faca2fa680fdd1a16cc15d598c Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Mon, 4 Dec 2023 07:58:40 -0800 Subject: [PATCH 04/27] Included the latest delete markers in the latest version files --- .../regionserver/DualFileStoreEngine.java | 2 +- .../DualFileStoreFileManager.java | 2 +- .../hbase/regionserver/DualFileWriter.java | 133 ++++++++++++++---- .../hbase/regionserver/StoreScanner.java | 9 +- .../compactions/DualFileCompactor.java | 2 +- .../compactions/TestDualFileCompactor.java | 32 ++++- 6 files changed, 143 insertions(+), 37 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java index 69cae4d4014e..49fb42d8d90c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java @@ -32,7 +32,7 @@ /** * HBASE-25972 This store engine allows us to store data in two files, - * one for the latest put cells and the other for the rest of the cells (i.e., + * one for the latest cells and the other for the rest of the cells (i.e., * older put cells and delete markers). */ @InterfaceAudience.Private diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java index bf04a9905ff8..461a1edc9712 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java @@ -37,7 +37,7 @@ @InterfaceAudience.Private class DualFileStoreFileManager extends DefaultStoreFileManager { /** - * List of store files that include the latest put cells inside this store. This is an + * List of store files that include the latest cells inside this store. This is an * immutable list that is atomically replaced when its contents change. */ private volatile ImmutableList latestVersionStoreFiles = ImmutableList.of(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index fd1e0ebf1a31..6c5fe1e54548 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -29,8 +29,11 @@ import static org.apache.hadoop.hbase.regionserver.HStoreFile.HAS_LATEST_VERSION_KEY; /** - * Separates the provided cells into two files, one file for the latest put cells and - * the other for the rest of the cells. + * Separates the provided cells into two files, one file for the latest cells and + * the other for the rest of the cells. The latest cells includes the latest put cells that are + * not deleted by a delete marker, the delete markers that delete latest put cells, and the + * version delete markers (that is, DeleteFamilyVersion and Delete) that are not deleted by other + * delete markers (that is DeleteFamily and DeleteColumn). */ @InterfaceAudience.Private public class DualFileWriter extends AbstractMultiFileWriter { @@ -40,41 +43,121 @@ public class DualFileWriter extends AbstractMultiFileWriter { private StoreFileWriter multiVersionWriter; private final List writers; - private Cell lastCell = null; - private boolean deleteFamily = false; + private Cell lastCell; + private Cell deleteFamily; + private List deleteFamilyVersionList = new ArrayList<>(); + private Cell deleteColumn; + private List deleteColumnVersionList = new ArrayList<>(); + private Cell firstAndPutCellOfAColumn; public DualFileWriter(CellComparator comparator) { this.comparator = comparator; writers = new ArrayList<>(2); + initRowState(); } + private void initRowState() { + deleteFamily = null; + deleteFamilyVersionList.clear(); + lastCell = null; + } + + private void initColumnState() { + deleteColumn = null; + deleteColumnVersionList.clear(); + firstAndPutCellOfAColumn = null; + } + + private void addLatestVersion(Cell cell) throws IOException { + if (latestVersionWriter == null) { + latestVersionWriter = writerFactory.createWriter(); + writers.add(latestVersionWriter); + } + latestVersionWriter.append(cell); + } + + private void addOlderVersion(Cell cell) throws IOException { + if (multiVersionWriter == null) { + multiVersionWriter = writerFactory.createWriter(); + writers.add(multiVersionWriter); + } + multiVersionWriter.append(cell); + } + + private boolean isDeletedByDeleteFamily(Cell cell) { + return deleteFamily != null && deleteFamily.getTimestamp() >= cell.getTimestamp(); + } + + private boolean isDeletedByDeleteFamilyVersion(Cell cell) { + for (Cell deleteFamilyVersion : deleteFamilyVersionList) { + if (deleteFamilyVersion.getTimestamp() == cell.getTimestamp()) return true; + } + return false; + } + + private boolean isDeletedByDeleteColumnVersion(Cell cell) { + for (Cell deleteColumnVersion : deleteColumnVersionList) { + if (deleteColumnVersion.getTimestamp() == cell.getTimestamp()) return true; + } + return false; + } + + private boolean isDeleted(Cell cell) { + return isDeletedByDeleteFamily(cell) || deleteColumn != null + || isDeletedByDeleteFamilyVersion(cell) || isDeletedByDeleteColumnVersion(cell); + } @Override public void append(Cell cell) throws IOException { if (lastCell != null && comparator.compareRows(lastCell, cell) != 0) { - // It is a new row and thus time to reset deleteFamily and lastCell - deleteFamily = false; - lastCell = null; + // It is a new row and thus time to reset the state + initRowState(); } - - if (!deleteFamily && cell.getType() == Cell.Type.Put - && (lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) { - // No delete family marker has been seen for the current row and this is a put cell and - // the first cell (i.e., the latest version) of a column. We can store it in the latest - // version writer - if (latestVersionWriter == null) { - latestVersionWriter = writerFactory.createWriter(); - writers.add(latestVersionWriter); + if ((lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) { + initColumnState(); + } + if (cell.getType() == Cell.Type.DeleteFamily) { + if (deleteFamily == null) { + if (cell.getType() == Cell.Type.DeleteFamily) { + deleteFamily = cell; + addLatestVersion(cell); + } else { + addOlderVersion(cell); + } + } + } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) { + if (deleteFamily == null) { + deleteFamilyVersionList.add(cell); + addLatestVersion(cell); + } else { + addOlderVersion(cell); + } + } else if (cell.getType() == Cell.Type.DeleteColumn) { + if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { + deleteColumn = cell; + addLatestVersion(cell); + } else { + addOlderVersion(cell); } - latestVersionWriter.append(cell); - } else { - if (cell.getType() == Cell.Type.DeleteFamily - || cell.getType() == Cell.Type.DeleteFamilyVersion) { - deleteFamily = true; + } else if (cell.getType() == Cell.Type.Delete) { + if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { + deleteColumnVersionList.add(cell); + addLatestVersion(cell); + } else { + addOlderVersion(cell); } - if (multiVersionWriter == null) { - multiVersionWriter = writerFactory.createWriter(); - writers.add(multiVersionWriter); + } else if (cell.getType() == Cell.Type.Put) { + if (firstAndPutCellOfAColumn == null) { + // This is the first put cell (i.e., the latest version) of a column. Is it deleted? + if (!isDeleted(cell)) { + addLatestVersion(cell); + firstAndPutCellOfAColumn = cell; + } else { + // It is deleted + addOlderVersion(cell); + } + } else { + // It is an older put cell + addOlderVersion(cell); } - multiVersionWriter.append(cell); } lastCell = cell; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 4f083105f5f4..cda05427da80 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.DoNotRetryIOException; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.PrivateCellUtil; @@ -223,6 +224,10 @@ private void addCurrentScanners(List scanners) { this.currentScanners.addAll(scanners); } + private static boolean isOnlyLatestVersionScan(Scan scan) { + return !scan.isRaw() && scan.getMaxVersions() == 1 + && scan.getTimeRange().getMax() == HConstants.LATEST_TIMESTAMP; + } /** * Opens a scanner across memstore, snapshot, and all StoreFiles. Assumes we are not in a * compaction. @@ -248,7 +253,7 @@ public StoreScanner(HStore store, ScanInfo scanInfo, Scan scan, NavigableSet sfs, List memStoreSc // penalty because in scans (that uses stream scanners) the next() call is bound to happen. List scanners = store.getScanners(sfs, cacheBlocks, get, usePread, isCompaction, matcher, scan.getStartRow(), scan.getStopRow(), - this.readPt, false, !scan.isRaw() && scan.getMaxVersions() == 1); + this.readPt, false, isOnlyLatestVersionScan(scan)); flushedstoreFileScanners.addAll(scanners); if (!CollectionUtils.isEmpty(memStoreScanners)) { clearAndClose(memStoreScannersAfterFlush); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java index 2cabf8cfd416..bfb9f349c57c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java @@ -32,7 +32,7 @@ import org.slf4j.LoggerFactory; /** - * This compactor generates two files, one for the latest put cells and the other for + * This compactor generates two files, one for the latest cells and the other for * the rest of the cells (i.e., older put cells and delete markers). */ @InterfaceAudience.Private diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java index 48102a90bd1e..34793d71f818 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java @@ -78,6 +78,9 @@ public class TestDualFileCompactor { private static final TableName TABLE_NAME = TableName.valueOf(NAME_OF_THINGS, NAME_OF_THINGS); + private static final KeyValue KV_A_DeleteFamilyVersion = + new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), + null,300L, KeyValue.Type.DeleteFamilyVersion); private static final KeyValue KV_A_1 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), Bytes.toBytes("a"),300L, KeyValue.Type.Put); private static final KeyValue KV_A_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), @@ -85,7 +88,7 @@ public class TestDualFileCompactor { private static final KeyValue KV_A_3 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), Bytes.toBytes("a"),100L, KeyValue.Type.Put); - private static final KeyValue KV_B_Delete_Column = new KeyValue(Bytes.toBytes("123"), + private static final KeyValue KV_B_DeleteColumn = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), Bytes.toBytes("b"),200L, KeyValue.Type.DeleteColumn); private static final KeyValue KV_B = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), Bytes.toBytes("b"),100L, KeyValue.Type.Put); @@ -99,13 +102,23 @@ public class TestDualFileCompactor { private static final KeyValue KV_D_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), Bytes.toBytes("d"),100L, KeyValue.Type.Put); - private static final KeyValue KV_Delete_Family = new KeyValue(Bytes.toBytes("456"), + private static final KeyValue KV_E_F_DeleteFamily = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), null ,200L, KeyValue.Type.DeleteFamily); private static final KeyValue KV_E = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), Bytes.toBytes("e"),100L, KeyValue.Type.Put); private static final KeyValue KV_F = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), Bytes.toBytes("f"),100L, KeyValue.Type.Put); - private static final KeyValue KV_G = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), + private static final KeyValue KV_G_DeleteFamily = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), + null,400L, KeyValue.Type.DeleteFamily); + private static final KeyValue KV_G_DeleteFamilyVersion = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), + null,100L, KeyValue.Type.DeleteFamilyVersion); + private static final KeyValue KV_G_1 = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), + Bytes.toBytes("g"),500L, KeyValue.Type.Put); + private static final KeyValue KV_G_DeleteColumn = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), + null,300L, KeyValue.Type.DeleteColumn); + private static final KeyValue KV_G_DeleteColumnVersion = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), + null,200L, KeyValue.Type.Delete); + private static final KeyValue KV_G_2 = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), Bytes.toBytes("g"),100L, KeyValue.Type.Put); @Parameters(name = "{index}: usePrivateReaders={0}") @@ -174,11 +187,16 @@ private static T[] a(T... a) { @Test public void test() throws Exception { - verify(a(KV_A_1, KV_A_2, KV_A_3, KV_B_Delete_Column, KV_B, KV_C, KV_D_1, KV_D_2, - KV_Delete_Family, KV_E, KV_F, KV_G), + verify(a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, + KV_D_1, KV_D_2, // Row 123 + KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 + KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, + KV_G_DeleteColumnVersion, KV_G_2), // Row 789 a( - a(KV_A_1, KV_C, KV_D_1, KV_G), // Latest versions - a(KV_A_2, KV_A_3, KV_B_Delete_Column, KV_B, KV_D_2, KV_Delete_Family, KV_E, KV_F) + a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, + KV_E_F_DeleteFamily, KV_G_DeleteFamily, KV_G_1), // Latest versions + a(KV_A_1, KV_A_3, KV_B, KV_D_2, KV_E, KV_F, KV_G_DeleteFamilyVersion, KV_G_DeleteColumn, + KV_G_DeleteColumnVersion, KV_G_2) )); } From 5b63d700db59f5310b3c7b623eba38ab2cd4b781 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Mon, 11 Dec 2023 05:16:45 +0300 Subject: [PATCH 05/27] Integrated DualFileWriter and DualFileStoreFileManager with DefaultStoreEngine --- .../client/ColumnFamilyDescriptorBuilder.java | 2 +- .../hbase/io/hfile/HFilePrettyPrinter.java | 2 +- .../hbase/mob/DefaultMobStoreCompactor.java | 20 +++- .../hadoop/hbase/mob/MobStoreEngine.java | 71 +++++++++++- .../regionserver/DefaultStoreEngine.java | 11 +- .../regionserver/DefaultStoreFileManager.java | 2 +- .../regionserver/DualFileStoreEngine.java | 98 ---------------- .../hbase/regionserver/DualFileWriter.java | 106 ++++++++++++------ .../hadoop/hbase/regionserver/HStore.java | 1 - .../hadoop/hbase/regionserver/HStoreFile.java | 22 ++-- .../regionserver/StoreFileComparators.java | 3 +- .../hbase/regionserver/StoreScanner.java | 3 +- .../AbstractMultiOutputCompactor.java | 1 + .../regionserver/compactions/Compactor.java | 2 +- .../compactions/DefaultCompactor.java | 50 ++++----- .../compactions/DualFileCompactor.java | 75 ------------- .../hbase/regionserver/TestCompaction.java | 19 +++- .../regionserver/TestCompactorMemLeak.java | 4 +- .../compactions/TestCompactor.java | 9 ++ ...Compactor.java => TestDualFileWriter.java} | 79 +++++++++---- 20 files changed, 284 insertions(+), 296 deletions(-) delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java rename hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/{TestDualFileCompactor.java => TestDualFileWriter.java} (76%) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java index 42f25fdc56f4..07eb9ba50d1a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java @@ -398,7 +398,7 @@ public static ColumnFamilyDescriptor of(byte[] name) { return newBuilder(name).build(); } - private ColumnFamilyDescriptorBuilder(final byte[] name) { + public ColumnFamilyDescriptorBuilder(final byte[] name) { this.desc = new ModifyableColumnFamilyDescriptor(name); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index d035ea63b7a8..3fe84310db51 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -537,7 +537,7 @@ private void printMeta(HFile.Reader reader, Map fileInfo) throws Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY) - || Bytes.equals(e.getKey(), HStoreFile.HAS_LATEST_VERSION_KEY) + || Bytes.equals(e.getKey(), HStoreFile.HAS_LIVE_VERSIONS_KEY) ) { out.println(Bytes.toBoolean(e.getValue())); } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java index 44f77b62ad8b..39f47fc4cf22 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java @@ -55,7 +55,7 @@ import org.apache.hadoop.hbase.regionserver.compactions.CloseChecker; import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequestImpl; -import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; +import org.apache.hadoop.hbase.regionserver.compactions.Compactor; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputControlUtil; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; @@ -74,7 +74,7 @@ * Compact passed set of files in the mob-enabled column family. */ @InterfaceAudience.Private -public class DefaultMobStoreCompactor extends DefaultCompactor { +public class DefaultMobStoreCompactor extends Compactor { private static final Logger LOG = LoggerFactory.getLogger(DefaultMobStoreCompactor.class); protected long mobSizeThreshold; @@ -172,7 +172,6 @@ public DefaultMobStoreCompactor(Configuration conf, HStore store) { } - @Override public List compact(CompactionRequestImpl request, ThroughputController throughputController, User user) throws IOException { String tableName = store.getTableName().toString(); @@ -715,4 +714,19 @@ private void deleteCommittedMobFiles(List fileNames) { } + @Override + protected final void abortWriter(StoreFileWriter writer) throws IOException { + Path leftoverFile = writer.getPath(); + try { + writer.close(); + } catch (IOException e) { + LOG.warn("Failed to close the writer after an unfinished compaction.", e); + } + try { + store.getFileSystem().delete(leftoverFile, false); + } catch (IOException e) { + LOG.warn("Failed to delete the leftover file {} after an unfinished compaction.", + leftoverFile, e); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java index d2061ee6305b..750d6b9cc994 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java @@ -18,20 +18,38 @@ package org.apache.hadoop.hbase.mob; import java.io.IOException; +import java.util.List; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.regionserver.DefaultStoreEngine; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.regionserver.DefaultStoreFileManager; +import org.apache.hadoop.hbase.regionserver.DefaultStoreFlusher; import org.apache.hadoop.hbase.regionserver.HStore; -import org.apache.hadoop.hbase.util.ReflectionUtils; +import org.apache.hadoop.hbase.regionserver.HStoreFile; +import org.apache.hadoop.hbase.regionserver.StoreEngine; +import org.apache.hadoop.hbase.regionserver.StoreFileComparators; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; +import org.apache.hadoop.hbase.regionserver.compactions.RatioBasedCompactionPolicy; +import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; +import org.apache.hadoop.hbase.security.User; import org.apache.yetus.audience.InterfaceAudience; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY; /** * MobStoreEngine creates the mob specific compactor, and store flusher. */ @InterfaceAudience.Private -public class MobStoreEngine extends DefaultStoreEngine { +public class MobStoreEngine extends StoreEngine { public final static String MOB_COMPACTOR_CLASS_KEY = "hbase.hstore.mobengine.compactor.class"; @Override + public boolean needsCompaction(List filesCompacting) { + return compactionPolicy.needsCompaction(this.storeFileManager.getStorefiles(), filesCompacting); + } + protected void createStoreFlusher(Configuration conf, HStore store) throws IOException { // When using MOB, we use DefaultMobStoreFlusher always // Just use the compactor and compaction policy as that in DefaultStoreEngine. We can have MOB @@ -42,8 +60,53 @@ protected void createStoreFlusher(Configuration conf, HStore store) throws IOExc /** * Creates the DefaultMobCompactor. */ - @Override + protected void createCompactor(Configuration conf, HStore store) throws IOException { createCompactor(conf, store, MOB_COMPACTOR_CLASS_KEY, DefaultMobStoreCompactor.class.getName()); } + @Override + protected void createComponents(Configuration conf, HStore store, CellComparator kvComparator) + throws IOException { + createCompactor(conf, store); + createCompactionPolicy(conf, store); + createStoreFlusher(conf, store); + boolean enableDualFileWriter = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, + false); + storeFileManager = new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, + compactionPolicy.getConf()); + } + + protected void createCompactionPolicy(Configuration conf, HStore store) throws IOException { + createCompactionPolicy(conf, store, DEFAULT_COMPACTION_POLICY_CLASS_KEY, + DEFAULT_COMPACTION_POLICY_CLASS.getName()); + } + + + @Override + public CompactionContext createCompaction() { + return new MobStoreEngine.DefaultCompactionContext(); + } + + private class DefaultCompactionContext extends CompactionContext { + @Override + public boolean select(List filesCompacting, boolean isUserCompaction, + boolean mayUseOffPeak, boolean forceMajor) throws IOException { + request = compactionPolicy.selectCompaction(storeFileManager.getStorefiles(), filesCompacting, + isUserCompaction, mayUseOffPeak, forceMajor); + return request != null; + } + + @Override + public List compact(ThroughputController throughputController, User user) + throws IOException { + return compactor.compact(request, throughputController, user); + } + + @Override + public List preSelect(List filesCompacting) { + return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStorefiles(), + filesCompacting); + } + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 5ffc1a18d6d3..ce931ef9a43d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -46,12 +46,14 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_STORE_FLUSHER_CLASS = DefaultStoreFlusher.class; private static final Class DEFAULT_COMPACTOR_CLASS = DefaultCompactor.class; - private static final Class DEFAULT_COMPACTION_POLICY_CLASS = + public static final Class DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; @Override @@ -65,7 +67,12 @@ protected void createComponents(Configuration conf, HStore store, CellComparator createCompactor(conf, store); createCompactionPolicy(conf, store); createStoreFlusher(conf, store); - storeFileManager = new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, + boolean enableDualFileWriter = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, + false); + storeFileManager = enableDualFileWriter + ? new DualFileStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, + compactionPolicy.getConf()) + :new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, compactionPolicy.getConf()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index 4f3caa28c6e6..97a2f2b29f9c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -41,7 +41,7 @@ * Default implementation of StoreFileManager. Not thread-safe. */ @InterfaceAudience.Private -class DefaultStoreFileManager implements StoreFileManager { +public class DefaultStoreFileManager implements StoreFileManager { private static final Logger LOG = LoggerFactory.getLogger(DefaultStoreFileManager.class); private final CellComparator cellComparator; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java deleted file mode 100644 index 49fb42d8d90c..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreEngine.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver; - -import java.io.IOException; -import java.util.List; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; -import org.apache.hadoop.hbase.regionserver.compactions.DualFileCompactor; -import org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy; -import org.apache.hadoop.hbase.regionserver.compactions.RatioBasedCompactionPolicy; -import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; -import org.apache.hadoop.hbase.security.User; -import org.apache.yetus.audience.InterfaceAudience; - -/** - * HBASE-25972 This store engine allows us to store data in two files, - * one for the latest cells and the other for the rest of the cells (i.e., - * older put cells and delete markers). - */ -@InterfaceAudience.Private -public class DualFileStoreEngine extends StoreEngine { - public static final String DUAL_FILE_STORE_FLUSHER_CLASS_KEY = - "hbase.hstore.dualfileengine.storeflusher.class"; - public static final String DUAL_FILE_COMPACTOR_CLASS_KEY = - "hbase.hstore.dualfileengine.compactor.class"; - public static final String DUAL_FILE_COMPACTION_POLICY_CLASS_KEY = - "hbase.hstore.dualfileengine.compactionpolicy.class"; - - public static final Class DUAL_FILE_STORE_FLUSHER_CLASS = - DefaultStoreFlusher.class; - public static final Class DUAL_FILE_COMPACTOR_CLASS = - DualFileCompactor.class; - public static final Class - DUAL_FILE_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; - @Override - public boolean needsCompaction(List filesCompacting) { - return compactionPolicy.needsCompaction(storeFileManager.getStorefiles(), filesCompacting); - } - - @Override - public CompactionContext createCompaction() throws IOException { - return new DualFileCompactionContext(); - } - - @Override - protected void createComponents(Configuration conf, HStore store, CellComparator kvComparator) - throws IOException { - createCompactor(conf, store, DUAL_FILE_COMPACTOR_CLASS_KEY, - DUAL_FILE_COMPACTOR_CLASS.getName()); - createCompactionPolicy(conf, store, DUAL_FILE_COMPACTION_POLICY_CLASS_KEY, - DUAL_FILE_COMPACTION_POLICY_CLASS.getName()); - createStoreFlusher(conf, store, DUAL_FILE_STORE_FLUSHER_CLASS_KEY, - DUAL_FILE_STORE_FLUSHER_CLASS.getName()); - this.storeFileManager = new DualFileStoreFileManager(kvComparator, - StoreFileComparators.SEQ_ID_MAX_TIMESTAMP, conf, compactionPolicy.getConf()); - } - - private class DualFileCompactionContext extends CompactionContext { - @Override - public boolean select(List filesCompacting, boolean isUserCompaction, - boolean mayUseOffPeak, boolean forceMajor) throws IOException { - request = compactionPolicy.selectCompaction(storeFileManager.getStorefiles(), filesCompacting, - isUserCompaction, mayUseOffPeak, forceMajor); - return request != null; - } - - @Override - public List compact(ThroughputController throughputController, User user) - throws IOException { - return compactor.compact(request, throughputController, user); - } - - @Override - public List preSelect(List filesCompacting) { - return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStorefiles(), - filesCompacting); - } - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index 6c5fe1e54548..45a2add5178d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -24,9 +24,10 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; -import static org.apache.hadoop.hbase.regionserver.HStoreFile.HAS_LATEST_VERSION_KEY; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.HAS_LIVE_VERSIONS_KEY; /** * Separates the provided cells into two files, one file for the latest cells and @@ -39,18 +40,31 @@ public class DualFileWriter extends AbstractMultiFileWriter { private final CellComparator comparator; - private StoreFileWriter latestVersionWriter; - private StoreFileWriter multiVersionWriter; + private StoreFileWriter liveVersionWriter; + private StoreFileWriter historicalVersionWriter; private final List writers; + // The last cell of the current row private Cell lastCell; + // The first (latest) delete family marker of the current row private Cell deleteFamily; + // The list of delete family version markers of the current row private List deleteFamilyVersionList = new ArrayList<>(); + // The first (latest) delete column marker of the current column private Cell deleteColumn; + // The list of delete column version markers of the current column private List deleteColumnVersionList = new ArrayList<>(); - private Cell firstAndPutCellOfAColumn; - public DualFileWriter(CellComparator comparator) { + // The live put cell count for the current column + private int livePutCellCount; + private final boolean dualWriterEnabled; + private final boolean keepDeletedCells; + private final int maxVersions; + public DualFileWriter(CellComparator comparator, int maxVersions, + boolean keepDeletedCells, boolean dualWriterEnabled) { this.comparator = comparator; + this.maxVersions = maxVersions; + this.keepDeletedCells = keepDeletedCells; + this.dualWriterEnabled = dualWriterEnabled; writers = new ArrayList<>(2); initRowState(); } @@ -62,25 +76,29 @@ private void initRowState() { } private void initColumnState() { + livePutCellCount = 0; deleteColumn = null; deleteColumnVersionList.clear(); - firstAndPutCellOfAColumn = null; + } - private void addLatestVersion(Cell cell) throws IOException { - if (latestVersionWriter == null) { - latestVersionWriter = writerFactory.createWriter(); - writers.add(latestVersionWriter); + private void addLiveVersion(Cell cell) throws IOException { + if (liveVersionWriter == null) { + liveVersionWriter = writerFactory.createWriter(); + writers.add(liveVersionWriter); } - latestVersionWriter.append(cell); + liveVersionWriter.append(cell); } - private void addOlderVersion(Cell cell) throws IOException { - if (multiVersionWriter == null) { - multiVersionWriter = writerFactory.createWriter(); - writers.add(multiVersionWriter); + private void addHistoricalVersion(Cell cell) throws IOException { + if (!keepDeletedCells) { + return; + } + if (historicalVersionWriter == null) { + historicalVersionWriter = writerFactory.createWriter(); + writers.add(historicalVersionWriter); } - multiVersionWriter.append(cell); + historicalVersionWriter.append(cell); } private boolean isDeletedByDeleteFamily(Cell cell) { @@ -105,8 +123,15 @@ private boolean isDeleted(Cell cell) { return isDeletedByDeleteFamily(cell) || deleteColumn != null || isDeletedByDeleteFamilyVersion(cell) || isDeletedByDeleteColumnVersion(cell); } + @Override public void append(Cell cell) throws IOException { + if (!dualWriterEnabled) { + // If the dual writer is not enabled then all cells are written to one file. We use + // the live version file in this case + addLiveVersion(cell); + return; + } if (lastCell != null && comparator.compareRows(lastCell, cell) != 0) { // It is a new row and thus time to reset the state initRowState(); @@ -118,45 +143,45 @@ public void append(Cell cell) throws IOException { if (deleteFamily == null) { if (cell.getType() == Cell.Type.DeleteFamily) { deleteFamily = cell; - addLatestVersion(cell); + addLiveVersion(cell); } else { - addOlderVersion(cell); + addHistoricalVersion(cell); } } } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) { if (deleteFamily == null) { deleteFamilyVersionList.add(cell); - addLatestVersion(cell); + addLiveVersion(cell); } else { - addOlderVersion(cell); + addHistoricalVersion(cell); } } else if (cell.getType() == Cell.Type.DeleteColumn) { if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { deleteColumn = cell; - addLatestVersion(cell); + addLiveVersion(cell); } else { - addOlderVersion(cell); + addHistoricalVersion(cell); } } else if (cell.getType() == Cell.Type.Delete) { if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { deleteColumnVersionList.add(cell); - addLatestVersion(cell); + addLiveVersion(cell); } else { - addOlderVersion(cell); + addHistoricalVersion(cell); } } else if (cell.getType() == Cell.Type.Put) { - if (firstAndPutCellOfAColumn == null) { - // This is the first put cell (i.e., the latest version) of a column. Is it deleted? + if (livePutCellCount < maxVersions) { + // This is a live put cell (i.e., the latest version) of a column. Is it deleted? if (!isDeleted(cell)) { - addLatestVersion(cell); - firstAndPutCellOfAColumn = cell; + addLiveVersion(cell); + livePutCellCount++; } else { // It is deleted - addOlderVersion(cell); + addHistoricalVersion(cell); } } else { // It is an older put cell - addOlderVersion(cell); + addHistoricalVersion(cell); } } lastCell = cell; @@ -170,14 +195,23 @@ protected Collection writers() { @Override protected void preCommitWriters() throws IOException { if (writers.isEmpty()) { - latestVersionWriter = writerFactory.createWriter(); - writers.add(latestVersionWriter); + liveVersionWriter = writerFactory.createWriter(); + writers.add(liveVersionWriter); } - if (latestVersionWriter != null) { - latestVersionWriter.appendFileInfo(HAS_LATEST_VERSION_KEY, Bytes.toBytes(true)); + if (!dualWriterEnabled) { + return; } - if (multiVersionWriter != null) { - multiVersionWriter.appendFileInfo(HAS_LATEST_VERSION_KEY, Bytes.toBytes(false)); + if (liveVersionWriter != null) { + liveVersionWriter.appendFileInfo(HAS_LIVE_VERSIONS_KEY, Bytes.toBytes(true)); + } + if (historicalVersionWriter != null) { + historicalVersionWriter.appendFileInfo(HAS_LIVE_VERSIONS_KEY, Bytes.toBytes(false)); + } + } + public HFile.Writer getHFileWriter() { + if (writers.isEmpty()) { + return null; } + return writers.get(0).getHFileWriter(); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index fb07258f22a8..4d8520edbb00 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -1064,7 +1064,6 @@ public List getScanners(List files, boolean cacheBl * @param includeStartRow true to include start row, false if not * @param stopRow the stop row * @param includeStopRow true to include stop row, false if not - * @param maxVersions the max number of versions to fetch * @param readPt the read point of the current scan * @param includeMemstoreScanner true if memstore has to be included * @return scanners on the given files and on the memstore if specified diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index 5ffddfa26872..a3a9ecf47d0d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -125,7 +125,7 @@ public class HStoreFile implements StoreFile { */ public static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID"); - public static final byte[] HAS_LATEST_VERSION_KEY = Bytes.toBytes("HAS_LATEST_VERSION"); + public static final byte[] HAS_LIVE_VERSIONS_KEY = Bytes.toBytes("HAS_LIVE_VERSIONS"); private final StoreFileInfo fileInfo; @@ -140,11 +140,11 @@ public class HStoreFile implements StoreFile { // Indicates if the file got compacted private volatile boolean compactedAway = false; - // Indicate if the file contains only latest (i.e., single) cell version for a given column - // in a row. MemStore flushes generate files with multiple cell versions. However, - // compactions can generate two files, one with the latest version cells and the other - // with the remaining (non-latest) cell versions. - private volatile boolean hasLatestVersion = true; + // Indicate if the file contains live cell versions for a given column + // in a row. MemStore flushes generate files with all cell versions. However, + // compactions can generate two files, one with the liver version cells and the other + // with the remaining (historical) cell versions. + private volatile boolean hasLiveVersions = true; // Keys for metadata stored in backing HFile. // Set when we obtain a Reader. @@ -346,7 +346,7 @@ public boolean isCompactedAway() { } public boolean hasLatestVersion() { - return hasLatestVersion; + return hasLiveVersions; } public int getRefCount() { @@ -467,9 +467,9 @@ private void open() throws IOException { b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY); this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b)); - b = metadataMap.get(HAS_LATEST_VERSION_KEY); + b = metadataMap.get(HAS_LIVE_VERSIONS_KEY); if (b != null) { - hasLatestVersion = Bytes.toBoolean(b); + hasLiveVersions = Bytes.toBoolean(b); } BloomType hfileBloomType = initialReader.getBloomFilterType(); if (cfBloomType != BloomType.NONE) { @@ -599,8 +599,8 @@ public void markCompactedAway() { this.compactedAway = true; } - public void setHasLatestVersion(boolean hasLatestVersion) { - this.hasLatestVersion = hasLatestVersion; + public void setHasLiveVersions(boolean hasLiveVersions) { + this.hasLiveVersions = hasLiveVersions; } @Override public String toString() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java index dc4abf60e896..3f533808fa84 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java @@ -25,8 +25,7 @@ /** * Useful comparators for comparing store files. */ -@InterfaceAudience.Private -final class StoreFileComparators { +@InterfaceAudience.Private public final class StoreFileComparators { /** * Comparator that compares based on the Sequence Ids of the the store files. Bulk loads that did * not request a seq ID are given a seq id of -1; thus, they are placed before all non- bulk diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index cda05427da80..c5e690bc9352 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -225,8 +225,7 @@ private void addCurrentScanners(List scanners) { } private static boolean isOnlyLatestVersionScan(Scan scan) { - return !scan.isRaw() && scan.getMaxVersions() == 1 - && scan.getTimeRange().getMax() == HConstants.LATEST_TIMESTAMP; + return !scan.isRaw() && scan.getTimeRange().getMax() == HConstants.LATEST_TIMESTAMP; } /** * Opens a scanner across memstore, snapshot, and all StoreFiles. Assumes we are not in a diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java index f5a662ffe14f..0f8409b36785 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.regionserver.AbstractMultiFileWriter; import org.apache.hadoop.hbase.regionserver.AbstractMultiFileWriter.WriterFactory; +import org.apache.hadoop.hbase.regionserver.CreateStoreFileWriterParams; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.InternalScanner; import org.apache.hadoop.hbase.regionserver.StoreFileWriter; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java index d9ad265da64e..09471cdb2b69 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java @@ -105,7 +105,7 @@ public abstract class Compactor { Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>())); // TODO: depending on Store is not good but, realistically, all compactors currently do. - Compactor(Configuration conf, HStore store) { + protected Compactor(Configuration conf, HStore store) { this.conf = conf; this.store = store; this.compactionKVMax = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java index eb803c3e2a88..fba661d71f32 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java @@ -22,6 +22,8 @@ import java.util.function.Consumer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.regionserver.DualFileWriter; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.InternalScanner; import org.apache.hadoop.hbase.regionserver.StoreFileWriter; @@ -32,27 +34,35 @@ import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; /** * Compact passed set of files. Create an instance and then call * {@link #compact(CompactionRequestImpl, ThroughputController, User)} */ @InterfaceAudience.Private -public class DefaultCompactor extends Compactor { +public class DefaultCompactor extends AbstractMultiOutputCompactor { private static final Logger LOG = LoggerFactory.getLogger(DefaultCompactor.class); public DefaultCompactor(Configuration conf, HStore store) { super(conf, store); } - private final CellSinkFactory writerFactory = - new CellSinkFactory() { + private final CellSinkFactory writerFactory = + new CellSinkFactory() { @Override - public StoreFileWriter createWriter(InternalScanner scanner, FileDetails fd, + public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, boolean shouldDropBehind, boolean major, Consumer writerCreationTracker) throws IOException { - return DefaultCompactor.this.createWriter(fd, shouldDropBehind, major, - writerCreationTracker); + boolean enableDualFileWriter = + conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); + boolean keepDeletedCells = store.getColumnFamilyDescriptor().getKeepDeletedCells() + != KeepDeletedCells.FALSE; + DualFileWriter writer = new DualFileWriter(store.getComparator(), + store.getColumnFamilyDescriptor().getMaxVersions(), keepDeletedCells, + enableDualFileWriter); + initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); + return writer; } }; @@ -64,29 +74,11 @@ public List compact(final CompactionRequestImpl request, return compact(request, defaultScannerFactory, writerFactory, throughputController, user); } - @Override - protected List commitWriter(StoreFileWriter writer, FileDetails fd, - CompactionRequestImpl request) throws IOException { - List newFiles = Lists.newArrayList(writer.getPath()); - writer.appendMetadata(fd.maxSeqId, request.isAllFiles(), request.getFiles()); - writer.close(); - return newFiles; - } - @Override - protected final void abortWriter(StoreFileWriter writer) throws IOException { - Path leftoverFile = writer.getPath(); - try { - writer.close(); - } catch (IOException e) { - LOG.warn("Failed to close the writer after an unfinished compaction.", e); - } - try { - store.getFileSystem().delete(leftoverFile, false); - } catch (IOException e) { - LOG.warn("Failed to delete the leftover file {} after an unfinished compaction.", - leftoverFile, e); - } + protected List commitWriter(DualFileWriter writer, FileDetails fd, + CompactionRequestImpl request) throws IOException { + List pathList = + writer.commitWriters(fd.maxSeqId, request.isAllFiles(), request.getFiles()); + return pathList; } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java deleted file mode 100644 index bfb9f349c57c..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DualFileCompactor.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver.compactions; - -import java.io.IOException; -import java.util.List; -import java.util.function.Consumer; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.regionserver.DualFileWriter; -import org.apache.hadoop.hbase.regionserver.HStore; -import org.apache.hadoop.hbase.regionserver.InternalScanner; -import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; -import org.apache.hadoop.hbase.security.User; -import org.apache.yetus.audience.InterfaceAudience; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This compactor generates two files, one for the latest cells and the other for - * the rest of the cells (i.e., older put cells and delete markers). - */ -@InterfaceAudience.Private -public class DualFileCompactor extends AbstractMultiOutputCompactor { - - private static final Logger LOG = LoggerFactory.getLogger(DualFileCompactor.class); - - public DualFileCompactor(Configuration conf, HStore store) { - super(conf, store); - } - - public List compact(final CompactionRequestImpl request, ThroughputController throughputController, - User user) throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Executing compaction with "); - } - - return compact(request, defaultScannerFactory, - new CellSinkFactory() { - - @Override - public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, - boolean shouldDropBehind, boolean major, Consumer writerCreationTracker) - throws IOException { - DualFileWriter writer = new DualFileWriter(store.getComparator()); - initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); - return writer; - } - }, throughputController, user); - } - - @Override - protected List commitWriter(DualFileWriter writer, FileDetails fd, - CompactionRequestImpl request) throws IOException { - List pathList = - writer.commitWriters(fd.maxSeqId, request.isAllFiles(), request.getFiles()); - return pathList; - } - -} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java index 4f3efccaeede..57a917c54efa 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hbase.HBaseTestingUtil.START_KEY; import static org.apache.hadoop.hbase.HBaseTestingUtil.START_KEY_BYTES; import static org.apache.hadoop.hbase.HBaseTestingUtil.fam1; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; import static org.apache.hadoop.hbase.regionserver.Store.PRIORITY_USER; import static org.apache.hadoop.hbase.regionserver.compactions.CloseChecker.SIZE_LIMIT_KEY; import static org.apache.hadoop.hbase.regionserver.compactions.CloseChecker.TIME_LIMIT_KEY; @@ -36,6 +37,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -67,7 +69,6 @@ import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequestImpl; import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; -import org.apache.hadoop.hbase.regionserver.compactions.DualFileCompactor; import org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory; import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; @@ -86,6 +87,8 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -93,6 +96,7 @@ /** * Test compaction framework and common functions */ +@RunWith(Parameterized.class) @Category({ RegionServerTests.class, MediumTests.class }) public class TestCompaction { @@ -115,6 +119,13 @@ public class TestCompaction { private static final long MAX_FILES_TO_COMPACT = 10; private final byte[] FAMILY = Bytes.toBytes("cf"); + @Parameterized.Parameters(name = "{index}: enableDualFileWriter={0}") + public static Iterable data() { + return Arrays.asList(new Object[] { true }, new Object[] { false }); + } + + @Parameterized.Parameter + public boolean enableDualFileWriter; /** constructor */ public TestCompaction() { super(); @@ -125,6 +136,7 @@ public TestCompaction() { conf.setLong(HConstants.COMPACTION_SCANNER_SIZE_MAX, 10L); conf.set(CompactionThroughputControllerFactory.HBASE_THROUGHPUT_CONTROLLER_KEY, NoLimitThroughputController.class.getName()); + conf.setBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, enableDualFileWriter); compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3); secondRowBytes = START_KEY_BYTES.clone(); @@ -137,8 +149,9 @@ public TestCompaction() { @Before public void setUp() throws Exception { - TableDescriptorBuilder builder = UTIL.createModifyableTableDescriptor(name.getMethodName()); - if (name.getMethodName().equals("testCompactionSeqId")) { + TableDescriptorBuilder builder = UTIL.createModifyableTableDescriptor( + name.getMethodName().replaceAll("[^A-Za-z0-9-_]", "_")); + if (name.getMethodName().startsWith("testCompactionSeqId")) { UTIL.getConfiguration().set("hbase.hstore.compaction.kv.max", "10"); UTIL.getConfiguration().set(DefaultStoreEngine.DEFAULT_COMPACTOR_CLASS_KEY, DummyCompactor.class.getName()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java index cdabdf27491c..d3ae7608180e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java @@ -127,9 +127,9 @@ public MyCompactor(Configuration conf, HStore store) { } @Override - protected List commitWriter(StoreFileWriter writer, FileDetails fd, + protected List commitWriter(DualFileWriter writer, FileDetails fd, CompactionRequestImpl request) throws IOException { - HFileWriterImpl writerImpl = (HFileWriterImpl) writer.writer; + HFileWriterImpl writerImpl = (HFileWriterImpl) writer.getHFileWriter(); Cell cell = writerImpl.getLastCell(); // The cell should be backend with an KeyOnlyKeyValue. IS_LAST_CELL_ON_HEAP.set(cell instanceof KeyOnlyKeyValue); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java index 8ccbc521b75a..a5cf9bd2d759 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java @@ -201,6 +201,15 @@ public void verifyKvs(KeyValue[][] kvss) { } } + public void verifyKv(KeyValue[] kvs) { + assertEquals(1, writers.size()); + Writer w = writers.get(0); + assertEquals(kvs.length, w.kvs.size()); + for (int i = 0; i < kvs.length; ++i) { + assertEquals(kvs[i], w.kvs.get(i)); + } + } + public List getWriters() { return writers; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java similarity index 76% rename from hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java rename to hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java index 34793d71f818..6838a45a3292 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileCompactor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.regionserver.compactions; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; import static org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.createDummyRequest; import static org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.createDummyStoreFile; import static org.junit.Assert.assertEquals; @@ -28,16 +29,15 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.OptionalLong; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparatorImpl; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; @@ -68,11 +68,11 @@ @RunWith(Parameterized.class) @Category({ RegionServerTests.class, SmallTests.class }) -public class TestDualFileCompactor { +public class TestDualFileWriter { @ClassRule public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestDualFileCompactor.class); + HBaseClassTestRule.forClass(TestDualFileWriter.class); private static final byte[] NAME_OF_THINGS = Bytes.toBytes("foo"); @@ -121,21 +121,30 @@ public class TestDualFileCompactor { private static final KeyValue KV_G_2 = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), Bytes.toBytes("g"),100L, KeyValue.Type.Put); - @Parameters(name = "{index}: usePrivateReaders={0}") + @Parameters(name = "{index}: usePrivateReaders={0}, keepDeletedCells={1}") public static Iterable data() { - return Arrays.asList(new Object[] { true }, new Object[] { false }); + return Arrays.asList(new Object[] { true, true }, new Object[] { false, false }); } - @Parameter + @Parameter(0) public boolean usePrivateReaders; - private DualFileCompactor createCompactor(StoreFileWritersCapture writers, + @Parameter(1) + public boolean keepDeletedCells; + + private DefaultCompactor createCompactor(StoreFileWritersCapture writers, final KeyValue[] input, List storefiles) throws Exception { Configuration conf = HBaseConfiguration.create(); conf.setBoolean("hbase.regionserver.compaction.private.readers", usePrivateReaders); + conf.setBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); final Scanner scanner = new Scanner(input); // Create store mock that is satisfactory for compactor. - ColumnFamilyDescriptor familyDescriptor = ColumnFamilyDescriptorBuilder.of(NAME_OF_THINGS); + ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = + new ColumnFamilyDescriptorBuilder(NAME_OF_THINGS); + columnFamilyDescriptorBuilder.setKeepDeletedCells(keepDeletedCells ? KeepDeletedCells.TRUE + : KeepDeletedCells.FALSE); + ColumnFamilyDescriptor familyDescriptor = columnFamilyDescriptorBuilder.build(); + ScanInfo si = new ScanInfo(conf, familyDescriptor, Long.MAX_VALUE, 0, CellComparatorImpl.COMPARATOR); HStore store = mock(HStore.class); @@ -152,7 +161,7 @@ private DualFileCompactor createCompactor(StoreFileWritersCapture writers, OptionalLong maxSequenceId = StoreUtils.getMaxSequenceIdInList(storefiles); when(store.getMaxSequenceId()).thenReturn(maxSequenceId); - return new DualFileCompactor(conf, store) { + return new DefaultCompactor(conf, store) { @Override protected InternalScanner createScanner(HStore store, ScanInfo scanInfo, List scanners, long smallestReadPoint, long earliestPutTs, @@ -173,12 +182,22 @@ private void verify(KeyValue[] input, KeyValue[][] output) throws Exception { StoreFileWritersCapture writers = new StoreFileWritersCapture(); HStoreFile sf1 = createDummyStoreFile(1L); HStoreFile sf2 = createDummyStoreFile(2L); - DualFileCompactor dfc = createCompactor(writers, input, Arrays.asList(sf1, sf2)); + DefaultCompactor dfc = createCompactor(writers, input, Arrays.asList(sf1, sf2)); List paths = dfc.compact(new CompactionRequestImpl(Arrays.asList(sf1)), NoLimitThroughputController.INSTANCE, null); writers.verifyKvs(output); assertEquals(output.length, paths.size()); } + private void verify(KeyValue[] input, KeyValue[] output) throws Exception { + StoreFileWritersCapture writers = new StoreFileWritersCapture(); + HStoreFile sf1 = createDummyStoreFile(1L); + HStoreFile sf2 = createDummyStoreFile(2L); + DefaultCompactor dfc = createCompactor(writers, input, Arrays.asList(sf1, sf2)); + List paths = dfc.compact(new CompactionRequestImpl(Arrays.asList(sf1)), + NoLimitThroughputController.INSTANCE, null); + writers.verifyKv(output); + assertEquals(1, paths.size()); + } @SuppressWarnings("unchecked") private static T[] a(T... a) { @@ -187,26 +206,38 @@ private static T[] a(T... a) { @Test public void test() throws Exception { - verify(a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, - KV_D_1, KV_D_2, // Row 123 - KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 - KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, - KV_G_DeleteColumnVersion, KV_G_2), // Row 789 - a( - a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, - KV_E_F_DeleteFamily, KV_G_DeleteFamily, KV_G_1), // Latest versions - a(KV_A_1, KV_A_3, KV_B, KV_D_2, KV_E, KV_F, KV_G_DeleteFamilyVersion, KV_G_DeleteColumn, - KV_G_DeleteColumnVersion, KV_G_2) - )); + if (!keepDeletedCells){ + verify( + a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, KV_D_1, + KV_D_2, // Row 123 + KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 + KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, + KV_G_DeleteColumnVersion, KV_G_2), // Row 789 + a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, KV_E_F_DeleteFamily, + KV_G_DeleteFamily, KV_G_1)// Latest versions + ); + + } else { + verify( + a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, KV_D_1, + KV_D_2, // Row 123 + KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 + KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, + KV_G_DeleteColumnVersion, KV_G_2), // Row 789 + a(a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, KV_E_F_DeleteFamily, + KV_G_DeleteFamily, KV_G_1), // Latest versions + a(KV_A_1, KV_A_3, KV_B, KV_D_2, KV_E, KV_F, KV_G_DeleteFamilyVersion, KV_G_DeleteColumn, + KV_G_DeleteColumnVersion, KV_G_2))); + } } @Test public void testEmptyOutputFile() throws Exception { StoreFileWritersCapture writers = new StoreFileWritersCapture(); CompactionRequestImpl request = createDummyRequest(); - DualFileCompactor dtc = + DefaultCompactor dfc = createCompactor(writers, new KeyValue[0], new ArrayList<>(request.getFiles())); - List paths = dtc.compact(request, NoLimitThroughputController.INSTANCE, null); + List paths = dfc.compact(request, NoLimitThroughputController.INSTANCE, null); assertEquals(1, paths.size()); List dummyWriters = writers.getWriters(); assertEquals(1, dummyWriters.size()); From 61c6bf3055776c1d1bdb38439385717150d107dd Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Mon, 11 Dec 2023 22:40:41 +0300 Subject: [PATCH 06/27] Minor changes and fixes for spotless check failures --- .../client/ColumnFamilyDescriptorBuilder.java | 2 +- .../hadoop/hbase/mob/MobStoreEngine.java | 10 +- .../regionserver/DefaultStoreEngine.java | 11 +- .../DualFileStoreFileManager.java | 36 +++--- .../hbase/regionserver/DualFileWriter.java | 27 ++--- .../hadoop/hbase/regionserver/HStore.java | 12 +- .../hadoop/hbase/regionserver/HStoreFile.java | 5 +- .../hbase/regionserver/StoreEngine.java | 1 - .../regionserver/StoreFileComparators.java | 3 +- .../hbase/regionserver/StoreFileScanner.java | 3 +- .../hbase/regionserver/StoreScanner.java | 7 +- .../AbstractMultiOutputCompactor.java | 1 - .../compactions/DefaultCompactor.java | 12 +- .../hbase/regionserver/TestCompaction.java | 5 +- .../regionserver/TestReversibleScanners.java | 5 +- .../TestStripeStoreFileManager.java | 3 +- .../compactions/TestDualFileWriter.java | 108 ++++++------------ 17 files changed, 102 insertions(+), 149 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java index 07eb9ba50d1a..42f25fdc56f4 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java @@ -398,7 +398,7 @@ public static ColumnFamilyDescriptor of(byte[] name) { return newBuilder(name).build(); } - public ColumnFamilyDescriptorBuilder(final byte[] name) { + private ColumnFamilyDescriptorBuilder(final byte[] name) { this.desc = new ModifyableColumnFamilyDescriptor(name); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java index 750d6b9cc994..5ee7ad8f9341 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hbase.mob; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY; + import java.io.IOException; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -33,9 +36,6 @@ import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; import org.apache.yetus.audience.InterfaceAudience; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY; /** * MobStoreEngine creates the mob specific compactor, and store flusher. @@ -64,14 +64,13 @@ protected void createStoreFlusher(Configuration conf, HStore store) throws IOExc protected void createCompactor(Configuration conf, HStore store) throws IOException { createCompactor(conf, store, MOB_COMPACTOR_CLASS_KEY, DefaultMobStoreCompactor.class.getName()); } + @Override protected void createComponents(Configuration conf, HStore store, CellComparator kvComparator) throws IOException { createCompactor(conf, store); createCompactionPolicy(conf, store); createStoreFlusher(conf, store); - boolean enableDualFileWriter = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, - false); storeFileManager = new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, compactionPolicy.getConf()); } @@ -81,7 +80,6 @@ protected void createCompactionPolicy(Configuration conf, HStore store) throws I DEFAULT_COMPACTION_POLICY_CLASS.getName()); } - @Override public CompactionContext createCompaction() { return new MobStoreEngine.DefaultCompactionContext(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index ce931ef9a43d..57e711bd4a53 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hbase.regionserver.compactions.RatioBasedCompactionPolicy; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; -import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** @@ -67,13 +66,13 @@ protected void createComponents(Configuration conf, HStore store, CellComparator createCompactor(conf, store); createCompactionPolicy(conf, store); createStoreFlusher(conf, store); - boolean enableDualFileWriter = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, - false); + boolean enableDualFileWriter = + conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, false); storeFileManager = enableDualFileWriter ? new DualFileStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, - compactionPolicy.getConf()) - :new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, - compactionPolicy.getConf()); + compactionPolicy.getConf()) + : new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, + compactionPolicy.getConf()); } protected void createCompactor(Configuration conf, HStore store) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java index 461a1edc9712..9e66341be6f0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java @@ -32,13 +32,13 @@ import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; /** - * Implementation of {@link StoreFileManager} for {@link DualFileStoreEngine}. Not thread-safe. + * Implementation of {@link StoreFileManager} for {@link DualFileWriter}. Not thread-safe. */ @InterfaceAudience.Private -class DualFileStoreFileManager extends DefaultStoreFileManager { +public class DualFileStoreFileManager extends DefaultStoreFileManager { /** - * List of store files that include the latest cells inside this store. This is an - * immutable list that is atomically replaced when its contents change. + * List of store files that include the latest cells inside this store. This is an immutable list + * that is atomically replaced when its contents change. */ private volatile ImmutableList latestVersionStoreFiles = ImmutableList.of(); @@ -48,31 +48,30 @@ public DualFileStoreFileManager(CellComparator cellComparator, super(cellComparator, storeFileComparator, conf, comConf); } - private List extractHasLatestVersionFiles(Collection storeFiles) + private List extractHasLiveVersionFiles(Collection storeFiles) throws IOException { - List hasLatestVersionFiles = new ArrayList<>(storeFiles.size()); + List hasLiveVersionFiles = new ArrayList<>(storeFiles.size()); for (HStoreFile file : storeFiles) { file.initReader(); - if (file.hasLatestVersion()) { - hasLatestVersionFiles.add(file); + if (file.hasLiveVersion()) { + hasLiveVersionFiles.add(file); } } - return hasLatestVersionFiles; + return hasLiveVersionFiles; } @Override public void loadFiles(List storeFiles) throws IOException { super.loadFiles(storeFiles); - this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), - extractHasLatestVersionFiles(storeFiles)); + this.latestVersionStoreFiles = + ImmutableList.sortedCopyOf(getStoreFileComparator(), extractHasLiveVersionFiles(storeFiles)); } @Override public void insertNewFiles(Collection sfs) throws IOException { super.insertNewFiles(sfs); - this.latestVersionStoreFiles = - ImmutableList.sortedCopyOf(getStoreFileComparator(), - Iterables.concat(this.latestVersionStoreFiles, extractHasLatestVersionFiles(sfs))); + this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), + Iterables.concat(this.latestVersionStoreFiles, extractHasLiveVersionFiles(sfs))); } @Override @@ -84,10 +83,11 @@ public ImmutableCollection clearFiles() { @Override public void addCompactionResults(Collection newCompactedFiles, Collection results) throws IOException { - Collection newFilesHasLatestVersion= extractHasLatestVersionFiles(results); - this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), Iterables - .concat(Iterables.filter(latestVersionStoreFiles, - sf -> !newCompactedFiles.contains(sf)), newFilesHasLatestVersion)); + Collection newFilesHasLatestVersion = extractHasLiveVersionFiles(results); + this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), + Iterables.concat( + Iterables.filter(latestVersionStoreFiles, sf -> !newCompactedFiles.contains(sf)), + newFilesHasLatestVersion)); super.addCompactionResults(newCompactedFiles, results); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index 45a2add5178d..039309ed3876 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hbase.regionserver; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.HAS_LIVE_VERSIONS_KEY; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -27,14 +29,13 @@ import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; -import static org.apache.hadoop.hbase.regionserver.HStoreFile.HAS_LIVE_VERSIONS_KEY; /** - * Separates the provided cells into two files, one file for the latest cells and - * the other for the rest of the cells. The latest cells includes the latest put cells that are - * not deleted by a delete marker, the delete markers that delete latest put cells, and the - * version delete markers (that is, DeleteFamilyVersion and Delete) that are not deleted by other - * delete markers (that is DeleteFamily and DeleteColumn). + * Separates the provided cells into two files, one file for the latest cells and the other for the + * rest of the cells. The latest cells includes the latest put cells that are not deleted by a + * delete marker, the delete markers that delete latest put cells, and the version delete markers + * (that is, DeleteFamilyVersion and Delete) that are not deleted by other delete markers (that is + * DeleteFamily and DeleteColumn). */ @InterfaceAudience.Private public class DualFileWriter extends AbstractMultiFileWriter { @@ -57,13 +58,11 @@ public class DualFileWriter extends AbstractMultiFileWriter { // The live put cell count for the current column private int livePutCellCount; private final boolean dualWriterEnabled; - private final boolean keepDeletedCells; private final int maxVersions; - public DualFileWriter(CellComparator comparator, int maxVersions, - boolean keepDeletedCells, boolean dualWriterEnabled) { + + public DualFileWriter(CellComparator comparator, int maxVersions, boolean dualWriterEnabled) { this.comparator = comparator; this.maxVersions = maxVersions; - this.keepDeletedCells = keepDeletedCells; this.dualWriterEnabled = dualWriterEnabled; writers = new ArrayList<>(2); initRowState(); @@ -82,7 +81,7 @@ private void initColumnState() { } - private void addLiveVersion(Cell cell) throws IOException { + private void addLiveVersion(Cell cell) throws IOException { if (liveVersionWriter == null) { liveVersionWriter = writerFactory.createWriter(); writers.add(liveVersionWriter); @@ -90,10 +89,7 @@ private void addLiveVersion(Cell cell) throws IOException { liveVersionWriter.append(cell); } - private void addHistoricalVersion(Cell cell) throws IOException { - if (!keepDeletedCells) { - return; - } + private void addHistoricalVersion(Cell cell) throws IOException { if (historicalVersionWriter == null) { historicalVersionWriter = writerFactory.createWriter(); writers.add(historicalVersionWriter); @@ -208,6 +204,7 @@ protected void preCommitWriters() throws IOException { historicalVersionWriter.appendFileInfo(HAS_LIVE_VERSIONS_KEY, Bytes.toBytes(false)); } } + public HFile.Writer getHFileWriter() { if (writers.isEmpty()) { return null; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 4d8520edbb00..8e0e7c38babe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -957,8 +957,7 @@ private void notifyChangedReadersObservers(List sfs) throws IOExcept */ public List getScanners(boolean cacheBlocks, boolean isGet, boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow, byte[] stopRow, long readPt, - boolean onlyLatestVersion) - throws IOException { + boolean onlyLatestVersion) throws IOException { return getScanners(cacheBlocks, usePread, isCompaction, matcher, startRow, true, stopRow, false, readPt, onlyLatestVersion); } @@ -1003,9 +1002,9 @@ public List getScanners(boolean cacheBlocks, boolean usePread, // TODO this used to get the store files in descending order, // but now we get them in ascending order, which I think is // actually more correct, since memstore get put at the end. - List sfScanners = StoreFileScanner.getScannersForStoreFiles( - storeFilesToScan, cacheBlocks, usePread, isCompaction, false, matcher, readPt, - onlyLatestVersion); + List sfScanners = + StoreFileScanner.getScannersForStoreFiles(storeFilesToScan, cacheBlocks, usePread, + isCompaction, false, matcher, readPt, onlyLatestVersion); List scanners = new ArrayList<>(sfScanners.size() + 1); scanners.addAll(sfScanners); // Then the memstore scanners @@ -1046,8 +1045,7 @@ private static void clearAndClose(List scanners) { public List getScanners(List files, boolean cacheBlocks, boolean isGet, boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow, byte[] stopRow, long readPt, boolean includeMemstoreScanner, - boolean onlyLatestVersion) - throws IOException { + boolean onlyLatestVersion) throws IOException { return getScanners(files, cacheBlocks, usePread, isCompaction, matcher, startRow, true, stopRow, false, readPt, includeMemstoreScanner, onlyLatestVersion); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index a3a9ecf47d0d..a3c0172629a9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -142,7 +142,7 @@ public class HStoreFile implements StoreFile { // Indicate if the file contains live cell versions for a given column // in a row. MemStore flushes generate files with all cell versions. However, - // compactions can generate two files, one with the liver version cells and the other + // compactions can generate two files, one with the live cell versions and the other // with the remaining (historical) cell versions. private volatile boolean hasLiveVersions = true; @@ -345,7 +345,7 @@ public boolean isCompactedAway() { return compactedAway; } - public boolean hasLatestVersion() { + public boolean hasLiveVersion() { return hasLiveVersions; } @@ -602,6 +602,7 @@ public void markCompactedAway() { public void setHasLiveVersions(boolean hasLiveVersions) { this.hasLiveVersions = hasLiveVersions; } + @Override public String toString() { return this.fileInfo.toString(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java index b0d28c1aa089..e2a5fbf7c4be 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java @@ -545,7 +545,6 @@ public BloomFilterMetrics getBloomFilterMetrics() { return bloomFilterMetrics; } - protected void createCompactor(Configuration conf, HStore store, String classKey, String defaultClassName) throws IOException { String className = conf.get(classKey, defaultClassName); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java index 3f533808fa84..9e485ba807c6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java @@ -25,7 +25,8 @@ /** * Useful comparators for comparing store files. */ -@InterfaceAudience.Private public final class StoreFileComparators { +@InterfaceAudience.Private +public final class StoreFileComparators { /** * Comparator that compares based on the Sequence Ids of the the store files. Bulk loads that did * not request a seq ID are given a seq id of -1; thus, they are placed before all non- bulk diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index 4128e7b2853c..d84782107bf4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -113,8 +113,7 @@ public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVC */ public static List getScannersForStoreFiles(Collection files, boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind, long readPt, - boolean onlyLatestVersion) - throws IOException { + boolean onlyLatestVersion) throws IOException { return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction, useDropBehind, null, readPt, onlyLatestVersion); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index c5e690bc9352..675a811bee6e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -227,6 +227,7 @@ private void addCurrentScanners(List scanners) { private static boolean isOnlyLatestVersionScan(Scan scan) { return !scan.isRaw() && scan.getTimeRange().getMax() == HConstants.LATEST_TIMESTAMP; } + /** * Opens a scanner across memstore, snapshot, and all StoreFiles. Assumes we are not in a * compaction. @@ -1000,9 +1001,9 @@ public void updateReaders(List sfs, List memStoreSc // Eagerly creating scanners so that we have the ref counting ticking on the newly created // store files. In case of stream scanners this eager creation does not induce performance // penalty because in scans (that uses stream scanners) the next() call is bound to happen. - List scanners = store.getScanners(sfs, cacheBlocks, get, usePread, - isCompaction, matcher, scan.getStartRow(), scan.getStopRow(), - this.readPt, false, isOnlyLatestVersionScan(scan)); + List scanners = + store.getScanners(sfs, cacheBlocks, get, usePread, isCompaction, matcher, + scan.getStartRow(), scan.getStopRow(), this.readPt, false, isOnlyLatestVersionScan(scan)); flushedstoreFileScanners.addAll(scanners); if (!CollectionUtils.isEmpty(memStoreScanners)) { clearAndClose(memStoreScannersAfterFlush); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java index 0f8409b36785..f5a662ffe14f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/AbstractMultiOutputCompactor.java @@ -24,7 +24,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.regionserver.AbstractMultiFileWriter; import org.apache.hadoop.hbase.regionserver.AbstractMultiFileWriter.WriterFactory; -import org.apache.hadoop.hbase.regionserver.CreateStoreFileWriterParams; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.InternalScanner; import org.apache.hadoop.hbase.regionserver.StoreFileWriter; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java index fba661d71f32..45465e4a1b8d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java @@ -17,25 +17,22 @@ */ package org.apache.hadoop.hbase.regionserver.compactions; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; + import java.io.IOException; import java.util.List; import java.util.function.Consumer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.regionserver.DualFileWriter; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.InternalScanner; -import org.apache.hadoop.hbase.regionserver.StoreFileWriter; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.common.collect.Lists; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; - /** * Compact passed set of files. Create an instance and then call * {@link #compact(CompactionRequestImpl, ThroughputController, User)} @@ -56,10 +53,8 @@ public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, throws IOException { boolean enableDualFileWriter = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); - boolean keepDeletedCells = store.getColumnFamilyDescriptor().getKeepDeletedCells() - != KeepDeletedCells.FALSE; DualFileWriter writer = new DualFileWriter(store.getComparator(), - store.getColumnFamilyDescriptor().getMaxVersions(), keepDeletedCells, + store.getColumnFamilyDescriptor().getMaxVersions(), enableDualFileWriter); initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); return writer; @@ -74,7 +69,6 @@ public List compact(final CompactionRequestImpl request, return compact(request, defaultScannerFactory, writerFactory, throughputController, user); } - protected List commitWriter(DualFileWriter writer, FileDetails fd, CompactionRequestImpl request) throws IOException { List pathList = diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java index 57a917c54efa..2c7496023439 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java @@ -126,6 +126,7 @@ public static Iterable data() { @Parameterized.Parameter public boolean enableDualFileWriter; + /** constructor */ public TestCompaction() { super(); @@ -149,8 +150,8 @@ public TestCompaction() { @Before public void setUp() throws Exception { - TableDescriptorBuilder builder = UTIL.createModifyableTableDescriptor( - name.getMethodName().replaceAll("[^A-Za-z0-9-_]", "_")); + TableDescriptorBuilder builder = + UTIL.createModifyableTableDescriptor(name.getMethodName().replaceAll("[^A-Za-z0-9-_]", "_")); if (name.getMethodName().startsWith("testCompactionSeqId")) { UTIL.getConfiguration().set("hbase.hstore.compaction.kv.max", "10"); UTIL.getConfiguration().set(DefaultStoreEngine.DEFAULT_COMPACTOR_CLASS_KEY, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java index e086e340d35e..6ab6a7e18b98 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java @@ -481,9 +481,8 @@ private ReversedKeyValueHeap getReversibleKeyValueHeap(MemStore memstore, HStore private List getScanners(MemStore memstore, HStoreFile sf1, HStoreFile sf2, byte[] startRow, boolean doSeek, int readPoint) throws IOException { - List fileScanners = StoreFileScanner - .getScannersForStoreFiles(Lists.newArrayList(sf1, sf2), false, true, false, false, readPoint, - false); + List fileScanners = StoreFileScanner.getScannersForStoreFiles( + Lists.newArrayList(sf1, sf2), false, true, false, false, readPoint, false); List memScanners = memstore.getScanners(readPoint); List scanners = new ArrayList<>(fileScanners.size() + 1); scanners.addAll(fileScanners); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java index 13a50ccd218e..3a5cab23fba3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java @@ -95,8 +95,7 @@ public void testInsertFilesIntoL0() throws Exception { MockHStoreFile sf = createFile(); manager.insertNewFiles(al(sf)); assertEquals(1, manager.getStorefileCount()); - Collection filesForGet = manager.getFilesForScan(KEY_A, true, KEY_A, true, - false); + Collection filesForGet = manager.getFilesForScan(KEY_A, true, KEY_A, true, false); assertEquals(1, filesForGet.size()); assertTrue(filesForGet.contains(sf)); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java index 6838a45a3292..8fc27cb9e9b2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java @@ -78,72 +78,63 @@ public class TestDualFileWriter { private static final TableName TABLE_NAME = TableName.valueOf(NAME_OF_THINGS, NAME_OF_THINGS); - private static final KeyValue KV_A_DeleteFamilyVersion = - new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - null,300L, KeyValue.Type.DeleteFamilyVersion); + private static final KeyValue KV_A_DeleteFamilyVersion = new KeyValue(Bytes.toBytes("123"), + Bytes.toBytes("0"), null, 300L, KeyValue.Type.DeleteFamilyVersion); private static final KeyValue KV_A_1 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("a"),300L, KeyValue.Type.Put); + Bytes.toBytes("a"), 300L, KeyValue.Type.Put); private static final KeyValue KV_A_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("a"),200L, KeyValue.Type.Put); + Bytes.toBytes("a"), 200L, KeyValue.Type.Put); private static final KeyValue KV_A_3 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("a"),100L, KeyValue.Type.Put); + Bytes.toBytes("a"), 100L, KeyValue.Type.Put); private static final KeyValue KV_B_DeleteColumn = new KeyValue(Bytes.toBytes("123"), - Bytes.toBytes("0"), Bytes.toBytes("b"),200L, KeyValue.Type.DeleteColumn); + Bytes.toBytes("0"), Bytes.toBytes("b"), 200L, KeyValue.Type.DeleteColumn); private static final KeyValue KV_B = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("b"),100L, KeyValue.Type.Put); - + Bytes.toBytes("b"), 100L, KeyValue.Type.Put); private static final KeyValue KV_C = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("c"),100L, KeyValue.Type.Put); + Bytes.toBytes("c"), 100L, KeyValue.Type.Put); private static final KeyValue KV_D_1 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("d"),200L, KeyValue.Type.Put); + Bytes.toBytes("d"), 200L, KeyValue.Type.Put); private static final KeyValue KV_D_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("d"),100L, KeyValue.Type.Put); + Bytes.toBytes("d"), 100L, KeyValue.Type.Put); - private static final KeyValue KV_E_F_DeleteFamily = new KeyValue(Bytes.toBytes("456"), - Bytes.toBytes("0"), null ,200L, KeyValue.Type.DeleteFamily); + private static final KeyValue KV_E_F_DeleteFamily = + new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), null, 200L, KeyValue.Type.DeleteFamily); private static final KeyValue KV_E = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), - Bytes.toBytes("e"),100L, KeyValue.Type.Put); + Bytes.toBytes("e"), 100L, KeyValue.Type.Put); private static final KeyValue KV_F = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), - Bytes.toBytes("f"),100L, KeyValue.Type.Put); - private static final KeyValue KV_G_DeleteFamily = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - null,400L, KeyValue.Type.DeleteFamily); - private static final KeyValue KV_G_DeleteFamilyVersion = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - null,100L, KeyValue.Type.DeleteFamilyVersion); + Bytes.toBytes("f"), 100L, KeyValue.Type.Put); + private static final KeyValue KV_G_DeleteFamily = + new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), null, 400L, KeyValue.Type.DeleteFamily); + private static final KeyValue KV_G_DeleteFamilyVersion = new KeyValue(Bytes.toBytes("789"), + Bytes.toBytes("0"), null, 100L, KeyValue.Type.DeleteFamilyVersion); private static final KeyValue KV_G_1 = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - Bytes.toBytes("g"),500L, KeyValue.Type.Put); - private static final KeyValue KV_G_DeleteColumn = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - null,300L, KeyValue.Type.DeleteColumn); - private static final KeyValue KV_G_DeleteColumnVersion = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - null,200L, KeyValue.Type.Delete); + Bytes.toBytes("g"), 500L, KeyValue.Type.Put); + private static final KeyValue KV_G_DeleteColumn = + new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), null, 300L, KeyValue.Type.DeleteColumn); + private static final KeyValue KV_G_DeleteColumnVersion = + new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), null, 200L, KeyValue.Type.Delete); private static final KeyValue KV_G_2 = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - Bytes.toBytes("g"),100L, KeyValue.Type.Put); + Bytes.toBytes("g"), 100L, KeyValue.Type.Put); @Parameters(name = "{index}: usePrivateReaders={0}, keepDeletedCells={1}") public static Iterable data() { - return Arrays.asList(new Object[] { true, true }, new Object[] { false, false }); + return Arrays.asList(new Object[] { true }, new Object[] { false }); } @Parameter(0) public boolean usePrivateReaders; - @Parameter(1) - public boolean keepDeletedCells; - - private DefaultCompactor createCompactor(StoreFileWritersCapture writers, - final KeyValue[] input, List storefiles) throws Exception { + private DefaultCompactor createCompactor(StoreFileWritersCapture writers, final KeyValue[] input, + List storefiles) throws Exception { Configuration conf = HBaseConfiguration.create(); conf.setBoolean("hbase.regionserver.compaction.private.readers", usePrivateReaders); conf.setBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); final Scanner scanner = new Scanner(input); // Create store mock that is satisfactory for compactor. - ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = - new ColumnFamilyDescriptorBuilder(NAME_OF_THINGS); - columnFamilyDescriptorBuilder.setKeepDeletedCells(keepDeletedCells ? KeepDeletedCells.TRUE - : KeepDeletedCells.FALSE); - ColumnFamilyDescriptor familyDescriptor = columnFamilyDescriptorBuilder.build(); + ColumnFamilyDescriptor familyDescriptor = ColumnFamilyDescriptorBuilder.of(NAME_OF_THINGS); ScanInfo si = new ScanInfo(conf, familyDescriptor, Long.MAX_VALUE, 0, CellComparatorImpl.COMPARATOR); @@ -188,16 +179,6 @@ private void verify(KeyValue[] input, KeyValue[][] output) throws Exception { writers.verifyKvs(output); assertEquals(output.length, paths.size()); } - private void verify(KeyValue[] input, KeyValue[] output) throws Exception { - StoreFileWritersCapture writers = new StoreFileWritersCapture(); - HStoreFile sf1 = createDummyStoreFile(1L); - HStoreFile sf2 = createDummyStoreFile(2L); - DefaultCompactor dfc = createCompactor(writers, input, Arrays.asList(sf1, sf2)); - List paths = dfc.compact(new CompactionRequestImpl(Arrays.asList(sf1)), - NoLimitThroughputController.INSTANCE, null); - writers.verifyKv(output); - assertEquals(1, paths.size()); - } @SuppressWarnings("unchecked") private static T[] a(T... a) { @@ -206,29 +187,16 @@ private static T[] a(T... a) { @Test public void test() throws Exception { - if (!keepDeletedCells){ - verify( - a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, KV_D_1, - KV_D_2, // Row 123 - KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 - KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, - KV_G_DeleteColumnVersion, KV_G_2), // Row 789 - a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, KV_E_F_DeleteFamily, - KV_G_DeleteFamily, KV_G_1)// Latest versions - ); - - } else { - verify( - a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, KV_D_1, - KV_D_2, // Row 123 - KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 - KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, - KV_G_DeleteColumnVersion, KV_G_2), // Row 789 - a(a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, KV_E_F_DeleteFamily, - KV_G_DeleteFamily, KV_G_1), // Latest versions - a(KV_A_1, KV_A_3, KV_B, KV_D_2, KV_E, KV_F, KV_G_DeleteFamilyVersion, KV_G_DeleteColumn, - KV_G_DeleteColumnVersion, KV_G_2))); - } + verify( + a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, KV_D_1, + KV_D_2, // Row 123 + KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 + KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, + KV_G_DeleteColumnVersion, KV_G_2), // Row 789 + a(a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, KV_E_F_DeleteFamily, + KV_G_DeleteFamily, KV_G_1), // Latest versions + a(KV_A_1, KV_A_3, KV_B, KV_D_2, KV_E, KV_F, KV_G_DeleteFamilyVersion, KV_G_DeleteColumn, + KV_G_DeleteColumnVersion, KV_G_2))); } @Test From 2e4325ded64c026457cbf2c5434c1e5144d8342c Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Tue, 12 Dec 2023 10:47:39 +0300 Subject: [PATCH 07/27] Removed DualFileStoreFileManager --- .../regionserver/DefaultStoreEngine.java | 9 +- .../regionserver/DefaultStoreFileManager.java | 43 ++++++++ .../DualFileStoreFileManager.java | 102 ------------------ .../compactions/DefaultCompactor.java | 3 +- .../compactions/TestDualFileWriter.java | 1 - 5 files changed, 46 insertions(+), 112 deletions(-) delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 57e711bd4a53..4033844f6a16 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -66,13 +66,8 @@ protected void createComponents(Configuration conf, HStore store, CellComparator createCompactor(conf, store); createCompactionPolicy(conf, store); createStoreFlusher(conf, store); - boolean enableDualFileWriter = - conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, false); - storeFileManager = enableDualFileWriter - ? new DualFileStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, - compactionPolicy.getConf()) - : new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, - compactionPolicy.getConf()); + storeFileManager = new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, + compactionPolicy.getConf()); } protected void createCompactor(Configuration conf, HStore store) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index 97a2f2b29f9c..30fb52e96af6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -17,7 +17,10 @@ */ package org.apache.hadoop.hbase.regionserver; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; + import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; @@ -53,12 +56,18 @@ public class DefaultStoreFileManager implements StoreFileManager { * when its contents change. */ private volatile ImmutableList storefiles = ImmutableList.of(); + /** + * List of store files that include the latest cells inside this store. This is an immutable list + * that is atomically replaced when its contents change. + */ + private volatile ImmutableList liveVersionStoreFiles = ImmutableList.of(); /** * List of compacted files inside this store that needs to be excluded in reads because further * new reads will be using only the newly created files out of compaction. These compacted files * will be deleted/cleared once all the existing readers on these compacted files are done. */ private volatile ImmutableList compactedfiles = ImmutableList.of(); + private final boolean enableLiveVersionFiles; public DefaultStoreFileManager(CellComparator cellComparator, Comparator storeFileComparator, Configuration conf, @@ -68,10 +77,28 @@ public DefaultStoreFileManager(CellComparator cellComparator, this.comConf = comConf; this.blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT); + this.enableLiveVersionFiles = + conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); + } + + private List getLiveVersionFiles(Collection storeFiles) + throws IOException { + List hasLiveVersionFiles = new ArrayList<>(storeFiles.size()); + for (HStoreFile file : storeFiles) { + file.initReader(); + if (file.hasLiveVersion()) { + hasLiveVersionFiles.add(file); + } + } + return hasLiveVersionFiles; } @Override public void loadFiles(List storeFiles) throws IOException { + if (enableLiveVersionFiles) { + this.liveVersionStoreFiles = + ImmutableList.sortedCopyOf(getStoreFileComparator(), getLiveVersionFiles(storeFiles)); + } this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, storeFiles); } @@ -87,12 +114,19 @@ public Collection getCompactedfiles() { @Override public void insertNewFiles(Collection sfs) throws IOException { + if (enableLiveVersionFiles) { + this.liveVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), + Iterables.concat(this.liveVersionStoreFiles, getLiveVersionFiles(sfs))); + } this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, Iterables.concat(this.storefiles, sfs)); } @Override public ImmutableCollection clearFiles() { + if (enableLiveVersionFiles) { + liveVersionStoreFiles = ImmutableList.of(); + } ImmutableList result = storefiles; storefiles = ImmutableList.of(); return result; @@ -118,6 +152,12 @@ public final int getCompactedFilesCount() { @Override public void addCompactionResults(Collection newCompactedfiles, Collection results) throws IOException { + if (enableLiveVersionFiles) { + this.liveVersionStoreFiles = ImmutableList.sortedCopyOf(storeFileComparator, + Iterables.concat( + Iterables.filter(liveVersionStoreFiles, sf -> !newCompactedfiles.contains(sf)), + getLiveVersionFiles(results))); + } this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, Iterables .concat(Iterables.filter(storefiles, sf -> !newCompactedfiles.contains(sf)), results)); // Mark the files as compactedAway once the storefiles and compactedfiles list is finalized @@ -159,6 +199,9 @@ public final Optional getSplitPoint() throws IOException { @Override public Collection getFilesForScan(byte[] startRow, boolean includeStartRow, byte[] stopRow, boolean includeStopRow, boolean onlyLatestVersion) { + if (onlyLatestVersion && enableLiveVersionFiles) { + return liveVersionStoreFiles; + } // We cannot provide any useful input and already have the files sorted by seqNum. return getStorefiles(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java deleted file mode 100644 index 9e66341be6f0..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileStoreFileManager.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; -import org.apache.yetus.audience.InterfaceAudience; - -import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableCollection; -import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; -import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; - -/** - * Implementation of {@link StoreFileManager} for {@link DualFileWriter}. Not thread-safe. - */ -@InterfaceAudience.Private -public class DualFileStoreFileManager extends DefaultStoreFileManager { - /** - * List of store files that include the latest cells inside this store. This is an immutable list - * that is atomically replaced when its contents change. - */ - private volatile ImmutableList latestVersionStoreFiles = ImmutableList.of(); - - public DualFileStoreFileManager(CellComparator cellComparator, - Comparator storeFileComparator, Configuration conf, - CompactionConfiguration comConf) { - super(cellComparator, storeFileComparator, conf, comConf); - } - - private List extractHasLiveVersionFiles(Collection storeFiles) - throws IOException { - List hasLiveVersionFiles = new ArrayList<>(storeFiles.size()); - for (HStoreFile file : storeFiles) { - file.initReader(); - if (file.hasLiveVersion()) { - hasLiveVersionFiles.add(file); - } - } - return hasLiveVersionFiles; - } - - @Override - public void loadFiles(List storeFiles) throws IOException { - super.loadFiles(storeFiles); - this.latestVersionStoreFiles = - ImmutableList.sortedCopyOf(getStoreFileComparator(), extractHasLiveVersionFiles(storeFiles)); - } - - @Override - public void insertNewFiles(Collection sfs) throws IOException { - super.insertNewFiles(sfs); - this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), - Iterables.concat(this.latestVersionStoreFiles, extractHasLiveVersionFiles(sfs))); - } - - @Override - public ImmutableCollection clearFiles() { - latestVersionStoreFiles = ImmutableList.of(); - return super.clearFiles(); - } - - @Override - public void addCompactionResults(Collection newCompactedFiles, - Collection results) throws IOException { - Collection newFilesHasLatestVersion = extractHasLiveVersionFiles(results); - this.latestVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), - Iterables.concat( - Iterables.filter(latestVersionStoreFiles, sf -> !newCompactedFiles.contains(sf)), - newFilesHasLatestVersion)); - super.addCompactionResults(newCompactedFiles, results); - } - - @Override - public Collection getFilesForScan(byte[] startRow, boolean includeStartRow, - byte[] stopRow, boolean includeStopRow, boolean onlyLatestVersion) { - if (onlyLatestVersion) { - return latestVersionStoreFiles; - } - return super.getFilesForScan(startRow, includeStartRow, stopRow, includeStopRow, false); - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java index 45465e4a1b8d..f2cd742aba5f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java @@ -54,8 +54,7 @@ public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, boolean enableDualFileWriter = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); DualFileWriter writer = new DualFileWriter(store.getComparator(), - store.getColumnFamilyDescriptor().getMaxVersions(), - enableDualFileWriter); + store.getColumnFamilyDescriptor().getMaxVersions(), enableDualFileWriter); initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); return writer; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java index 8fc27cb9e9b2..edcec5e25e37 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java @@ -37,7 +37,6 @@ import org.apache.hadoop.hbase.CellComparatorImpl; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; From ccc458293dc0f27bed627a784b0f7981324fc0d3 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Fri, 15 Dec 2023 05:57:52 +0300 Subject: [PATCH 08/27] Made dual file compaction disabled by default --- .../apache/hadoop/hbase/regionserver/DefaultStoreEngine.java | 1 + .../hadoop/hbase/regionserver/DefaultStoreFileManager.java | 5 +++-- .../hbase/regionserver/compactions/DefaultCompactor.java | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 4033844f6a16..6793e4e4ec5a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -54,6 +54,7 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; + public static boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = false; @Override public boolean needsCompaction(List filesCompacting) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index 30fb52e96af6..8a2464e661bb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.regionserver; import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_ENABLE_DUAL_FILE_WRITER; import java.io.IOException; import java.util.ArrayList; @@ -77,8 +78,8 @@ public DefaultStoreFileManager(CellComparator cellComparator, this.comConf = comConf; this.blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT); - this.enableLiveVersionFiles = - conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); + this.enableLiveVersionFiles = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, + DEFAULT_ENABLE_DUAL_FILE_WRITER); } private List getLiveVersionFiles(Collection storeFiles) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java index f2cd742aba5f..08fdc7c5c805 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.regionserver.compactions; import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_ENABLE_DUAL_FILE_WRITER; import java.io.IOException; import java.util.List; @@ -51,8 +52,8 @@ public DefaultCompactor(Configuration conf, HStore store) { public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, boolean shouldDropBehind, boolean major, Consumer writerCreationTracker) throws IOException { - boolean enableDualFileWriter = - conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); + boolean enableDualFileWriter = conf.getBoolean( + DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, DEFAULT_ENABLE_DUAL_FILE_WRITER); DualFileWriter writer = new DualFileWriter(store.getComparator(), store.getColumnFamilyDescriptor().getMaxVersions(), enableDualFileWriter); initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); From 63b1aa357b485cd7ecc22371cc3c341df24f90f6 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Thu, 21 Dec 2023 10:51:34 +0300 Subject: [PATCH 09/27] Changes for compaction performace improvement and delete use cases in PerformanceEvaluation --- .../hadoop/hbase/PerformanceEvaluation.java | 50 +++++++++++++++- .../hadoop/hbase/regionserver/CellSink.java | 12 ++++ .../regionserver/DefaultStoreEngine.java | 2 +- .../hbase/regionserver/DualFileWriter.java | 58 +++++++++++++------ .../regionserver/compactions/Compactor.java | 2 +- 5 files changed, 103 insertions(+), 21 deletions(-) diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java index 97fcefe4a70c..69f923ade844 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -181,8 +181,12 @@ public class PerformanceEvaluation extends Configured implements Tool { addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000", "Run random seek scan with both start and stop row (max 10000 rows)"); addCommandDescriptor(RandomWriteTest.class, "randomWrite", "Run random write test"); + addCommandDescriptor(RandomDeleteTest.class, "randomDelete", + "Run random delete test"); addCommandDescriptor(SequentialReadTest.class, "sequentialRead", "Run sequential read test"); addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite", "Run sequential write test"); + addCommandDescriptor(SequentialDeleteTest.class, "sequentialDelete", + "Run sequential delete test"); addCommandDescriptor(MetaWriteTest.class, "metaWrite", "Populate meta table;used with 1 thread; to be cleaned up by cleanMeta"); addCommandDescriptor(ScanTest.class, "scan", "Run scan test (read every row)"); @@ -352,7 +356,8 @@ static boolean checkTable(Admin admin, TestOptions opts) throws IOException { boolean needsDelete = false, exists = admin.tableExists(tableName); boolean isReadCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("read") || opts.cmdName.toLowerCase(Locale.ROOT).contains("scan"); - if (!exists && isReadCmd) { + boolean isDeleteCmd = opts.cmdName.toLowerCase(Locale.ROOT).contains("delete"); + if (!exists && (isReadCmd || isDeleteCmd)) { throw new IllegalStateException( "Must specify an existing table for read commands. Run a write command first."); } @@ -367,7 +372,8 @@ static boolean checkTable(Admin admin, TestOptions opts) throws IOException { && opts.presplitRegions != admin.getRegions(tableName).size()) || (!isReadCmd && desc != null && !StringUtils.equals(desc.getRegionSplitPolicyClassName(), opts.splitPolicy)) - || (!isReadCmd && desc != null && desc.getRegionReplication() != opts.replicas) + || (!(isReadCmd || isDeleteCmd) && desc != null + && desc.getRegionReplication() != opts.replicas) || (desc != null && desc.getColumnFamilyCount() != opts.families) ) { needsDelete = true; @@ -2071,6 +2077,18 @@ protected byte[] generateRow(final int i) { } + static class RandomDeleteTest extends SequentialDeleteTest { + RandomDeleteTest(Connection con, TestOptions options, Status status) { + super(con, options, status); + } + + @Override + protected byte[] generateRow(final int i) { + return getRandomRow(this.rand, opts.totalRows); + } + + } + static class ScanTest extends TableTest { private ResultScanner testScanner; @@ -2406,6 +2424,34 @@ boolean testRow(final int i, final long startTime) throws IOException { } } + static class SequentialDeleteTest extends BufferedMutatorTest { + + SequentialDeleteTest(Connection con, TestOptions options, Status status) { + super(con, options, status); + } + + protected byte[] generateRow(final int i) { + return format(i); + } + + @Override + boolean testRow(final int i, final long startTime) throws IOException { + byte[] row = generateRow(i); + Delete delete = new Delete(row); + for (int family = 0; family < opts.families; family++) { + byte familyName[] = Bytes.toBytes(FAMILY_NAME_BASE + family); + delete.addFamily(familyName); + } + delete.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL); + if (opts.autoFlush) { + table.delete(delete); + } else { + mutator.mutate(delete); + } + return true; + } + } + /* * Insert fake regions into meta table with contiguous split keys. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java index c7587a147a6f..9426cd8cc9f6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.regionserver; import java.io.IOException; +import java.util.List; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.yetus.audience.InterfaceAudience; @@ -34,4 +35,15 @@ public interface CellSink { * @param cell the cell to be added */ void append(Cell cell) throws IOException; + + /** + * Append the given (possibly partial) list of cells of a row + * @param cellList the cell list to be added + * @throws IOException + */ + default void appendAll(List cellList) throws IOException { + for (Cell cell : cellList) { + append(cell); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 6793e4e4ec5a..85886ced624d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -54,7 +54,7 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; - public static boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = false; + public static final boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = false; @Override public boolean needsCompaction(List filesCompacting) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index 039309ed3876..eec47c1f31bf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -31,11 +31,9 @@ import org.apache.yetus.audience.InterfaceAudience; /** - * Separates the provided cells into two files, one file for the latest cells and the other for the - * rest of the cells. The latest cells includes the latest put cells that are not deleted by a - * delete marker, the delete markers that delete latest put cells, and the version delete markers - * (that is, DeleteFamilyVersion and Delete) that are not deleted by other delete markers (that is - * DeleteFamily and DeleteColumn). + * Separates the provided cells into two files, one file for the live cells and the other for the + * rest of the cells (historical cells). The live cells includes the live put cells, delete + * all and version delete markers that are not masked by other delete all markers. */ @InterfaceAudience.Private public class DualFileWriter extends AbstractMultiFileWriter { @@ -120,18 +118,7 @@ private boolean isDeleted(Cell cell) { || isDeletedByDeleteFamilyVersion(cell) || isDeletedByDeleteColumnVersion(cell); } - @Override - public void append(Cell cell) throws IOException { - if (!dualWriterEnabled) { - // If the dual writer is not enabled then all cells are written to one file. We use - // the live version file in this case - addLiveVersion(cell); - return; - } - if (lastCell != null && comparator.compareRows(lastCell, cell) != 0) { - // It is a new row and thus time to reset the state - initRowState(); - } + private void appendCell(Cell cell) throws IOException { if ((lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) { initColumnState(); } @@ -183,6 +170,43 @@ public void append(Cell cell) throws IOException { lastCell = cell; } + @Override + public void appendAll(List cellList) throws IOException { + if (!dualWriterEnabled) { + // If the dual writer is not enabled then all cells are written to one file. We use + // the live version file in this case + for (Cell cell : cellList) { + addLiveVersion(cell); + } + return; + } + if (cellList.isEmpty()) { + return; + } + if (lastCell != null && comparator.compareRows(lastCell, cellList.get(0)) != 0) { + // It is a new row and thus time to reset the state + initRowState(); + } + for (Cell cell : cellList) { + appendCell(cell); + } + } + + @Override + public void append(Cell cell) throws IOException { + if (!dualWriterEnabled) { + // If the dual writer is not enabled then all cells are written to one file. We use + // the live version file in this case + addLiveVersion(cell); + return; + } + if (lastCell != null && comparator.compareRows(lastCell, cell) != 0) { + // It is a new row and thus time to reset the state + initRowState(); + } + appendCell(cell); + } + @Override protected Collection writers() { return writers; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java index 09471cdb2b69..715b12c36aa0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java @@ -464,7 +464,6 @@ protected boolean performCompaction(FileDetails fd, InternalScanner scanner, Cel lastCleanCell = null; lastCleanCellSeqId = 0; } - writer.append(c); int len = c.getSerializedSize(); ++progress.currentCompactedKVs; progress.totalCompactedSize += len; @@ -478,6 +477,7 @@ protected boolean performCompaction(FileDetails fd, InternalScanner scanner, Cel return false; } } + writer.appendAll(cells); if (shipper != null && bytesWrittenProgressForShippedCall > shippedCallSizeLimit) { if (lastCleanCell != null) { // HBASE-16931, set back sequence id to avoid affecting scan order unexpectedly. From 6efd551e2d9979928f6e6b1f87135ff87324a447 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Thu, 28 Dec 2023 10:08:13 +0100 Subject: [PATCH 10/27] Handled the new version behavior for delete markers --- .../hadoop/hbase/PerformanceEvaluation.java | 5 ++- .../hadoop/hbase/regionserver/CellSink.java | 1 - .../hbase/regionserver/DualFileWriter.java | 31 ++++++++++++++----- .../compactions/DefaultCompactor.java | 3 +- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java index 69f923ade844..4c65f3cfce0d 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -181,8 +181,7 @@ public class PerformanceEvaluation extends Configured implements Tool { addCommandDescriptor(RandomScanWithRange10000Test.class, "scanRange10000", "Run random seek scan with both start and stop row (max 10000 rows)"); addCommandDescriptor(RandomWriteTest.class, "randomWrite", "Run random write test"); - addCommandDescriptor(RandomDeleteTest.class, "randomDelete", - "Run random delete test"); + addCommandDescriptor(RandomDeleteTest.class, "randomDelete", "Run random delete test"); addCommandDescriptor(SequentialReadTest.class, "sequentialRead", "Run sequential read test"); addCommandDescriptor(SequentialWriteTest.class, "sequentialWrite", "Run sequential write test"); addCommandDescriptor(SequentialDeleteTest.class, "sequentialDelete", @@ -2444,7 +2443,7 @@ boolean testRow(final int i, final long startTime) throws IOException { } delete.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL); if (opts.autoFlush) { - table.delete(delete); + table.delete(delete); } else { mutator.mutate(delete); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java index 9426cd8cc9f6..1d838d86abcf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSink.java @@ -39,7 +39,6 @@ public interface CellSink { /** * Append the given (possibly partial) list of cells of a row * @param cellList the cell list to be added - * @throws IOException */ default void appendAll(List cellList) throws IOException { for (Cell cell : cellList) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index eec47c1f31bf..eb8c5be26a7d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -32,8 +32,8 @@ /** * Separates the provided cells into two files, one file for the live cells and the other for the - * rest of the cells (historical cells). The live cells includes the live put cells, delete - * all and version delete markers that are not masked by other delete all markers. + * rest of the cells (historical cells). The live cells includes the live put cells, delete all and + * version delete markers that are not masked by other delete all markers. */ @InterfaceAudience.Private public class DualFileWriter extends AbstractMultiFileWriter { @@ -57,11 +57,14 @@ public class DualFileWriter extends AbstractMultiFileWriter { private int livePutCellCount; private final boolean dualWriterEnabled; private final int maxVersions; + private final boolean newVersionBehavior; - public DualFileWriter(CellComparator comparator, int maxVersions, boolean dualWriterEnabled) { + public DualFileWriter(CellComparator comparator, int maxVersions, boolean dualWriterEnabled, + boolean newVersionBehavior) { this.comparator = comparator; this.maxVersions = maxVersions; this.dualWriterEnabled = dualWriterEnabled; + this.newVersionBehavior = newVersionBehavior; writers = new ArrayList<>(2); initRowState(); } @@ -96,25 +99,39 @@ private void addHistoricalVersion(Cell cell) throws IOException { } private boolean isDeletedByDeleteFamily(Cell cell) { - return deleteFamily != null && deleteFamily.getTimestamp() >= cell.getTimestamp(); + return deleteFamily != null && (deleteFamily.getTimestamp() > cell.getTimestamp() + || (deleteFamily.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteFamily.getSequenceId()))); } private boolean isDeletedByDeleteFamilyVersion(Cell cell) { for (Cell deleteFamilyVersion : deleteFamilyVersionList) { - if (deleteFamilyVersion.getTimestamp() == cell.getTimestamp()) return true; + if ( + deleteFamilyVersion.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteFamilyVersion.getSequenceId()) + ) return true; } return false; } + private boolean isDeletedByDeleteColumn(Cell cell) { + return deleteColumn != null && (deleteColumn.getTimestamp() > cell.getTimestamp() + || (deleteColumn.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteColumn.getSequenceId()))); + } + private boolean isDeletedByDeleteColumnVersion(Cell cell) { for (Cell deleteColumnVersion : deleteColumnVersionList) { - if (deleteColumnVersion.getTimestamp() == cell.getTimestamp()) return true; + if ( + deleteColumnVersion.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteColumnVersion.getSequenceId()) + ) return true; } return false; } private boolean isDeleted(Cell cell) { - return isDeletedByDeleteFamily(cell) || deleteColumn != null + return isDeletedByDeleteFamily(cell) || isDeletedByDeleteColumn(cell) || isDeletedByDeleteFamilyVersion(cell) || isDeletedByDeleteColumnVersion(cell); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java index 08fdc7c5c805..334833209cf2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java @@ -55,7 +55,8 @@ public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, boolean enableDualFileWriter = conf.getBoolean( DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, DEFAULT_ENABLE_DUAL_FILE_WRITER); DualFileWriter writer = new DualFileWriter(store.getComparator(), - store.getColumnFamilyDescriptor().getMaxVersions(), enableDualFileWriter); + store.getColumnFamilyDescriptor().getMaxVersions(), enableDualFileWriter, + store.getColumnFamilyDescriptor().isNewVersionBehavior()); initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); return writer; } From 0fdee043956024f4d1b513a8a761b1ac267e555f Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Fri, 5 Jan 2024 13:39:42 +0300 Subject: [PATCH 11/27] Changes for the review comments by Viraj --- .../java/org/apache/hadoop/hbase/mob/MobFile.java | 4 ++-- .../hbase/regionserver/DefaultStoreEngine.java | 2 +- .../hadoop/hbase/regionserver/DualFileWriter.java | 14 ++++++-------- .../apache/hadoop/hbase/regionserver/HStore.java | 7 +++---- .../hadoop/hbase/regionserver/HStoreFile.java | 13 +++++++++---- .../hbase/regionserver/StoreFileScanner.java | 8 ++++---- .../hadoop/hbase/regionserver/StoreScanner.java | 2 ++ .../regionserver/compactions/DefaultCompactor.java | 3 --- .../hbase/regionserver/TestCompactorMemLeak.java | 2 +- .../hbase/regionserver/TestFSErrorsExposed.java | 2 +- .../hbase/regionserver/TestReversibleScanners.java | 8 ++++---- 11 files changed, 33 insertions(+), 32 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java index 4b2e01315e31..3293208771ac 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java @@ -55,7 +55,7 @@ public StoreFileScanner getScanner() throws IOException { List sfs = new ArrayList<>(); sfs.add(sf); List sfScanners = StoreFileScanner.getScannersForStoreFiles(sfs, false, true, - false, false, sf.getMaxMemStoreTS(), false); + false, false, sf.getMaxMemStoreTS()); return sfScanners.get(0); } @@ -82,7 +82,7 @@ public MobCell readCell(Cell search, boolean cacheMobBlocks, long readPt) throws boolean succ = false; try { List sfScanners = StoreFileScanner.getScannersForStoreFiles( - Collections.singletonList(sf), cacheMobBlocks, true, false, false, readPt, false); + Collections.singletonList(sf), cacheMobBlocks, true, false, false, readPt); if (!sfScanners.isEmpty()) { scanner = sfScanners.get(0); if (scanner.seek(search)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 85886ced624d..b372ce7e399d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -54,7 +54,7 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; - public static final boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = false; + public static final boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = true; @Override public boolean needsCompaction(List filesCompacting) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index eb8c5be26a7d..2bd9651b452d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -141,15 +141,13 @@ private void appendCell(Cell cell) throws IOException { } if (cell.getType() == Cell.Type.DeleteFamily) { if (deleteFamily == null) { - if (cell.getType() == Cell.Type.DeleteFamily) { - deleteFamily = cell; - addLiveVersion(cell); - } else { - addHistoricalVersion(cell); - } + deleteFamily = cell; + addLiveVersion(cell); + } else { + addHistoricalVersion(cell); } } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) { - if (deleteFamily == null) { + if (!isDeletedByDeleteFamily(cell)) { deleteFamilyVersionList.add(cell); addLiveVersion(cell); } else { @@ -246,7 +244,7 @@ protected void preCommitWriters() throws IOException { } } - public HFile.Writer getHFileWriter() { + public HFile.Writer getLiveVersionHFileWriter() { if (writers.isEmpty()) { return null; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 8e0e7c38babe..7f06759a7eee 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -1002,9 +1002,8 @@ public List getScanners(boolean cacheBlocks, boolean usePread, // TODO this used to get the store files in descending order, // but now we get them in ascending order, which I think is // actually more correct, since memstore get put at the end. - List sfScanners = - StoreFileScanner.getScannersForStoreFiles(storeFilesToScan, cacheBlocks, usePread, - isCompaction, false, matcher, readPt, onlyLatestVersion); + List sfScanners = StoreFileScanner.getScannersForStoreFiles( + storeFilesToScan, cacheBlocks, usePread, isCompaction, false, matcher, readPt); List scanners = new ArrayList<>(sfScanners.size() + 1); scanners.addAll(sfScanners); // Then the memstore scanners @@ -1081,7 +1080,7 @@ public List getScanners(List files, boolean cacheBl } try { List sfScanners = StoreFileScanner.getScannersForStoreFiles(files, - cacheBlocks, usePread, isCompaction, false, matcher, readPt, onlyLatestVersion); + cacheBlocks, usePread, isCompaction, false, matcher, readPt); List scanners = new ArrayList<>(sfScanners.size() + 1); scanners.addAll(sfScanners); // Then the memstore scanners diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index a3c0172629a9..216e086d4063 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -140,10 +140,15 @@ public class HStoreFile implements StoreFile { // Indicates if the file got compacted private volatile boolean compactedAway = false; - // Indicate if the file contains live cell versions for a given column - // in a row. MemStore flushes generate files with all cell versions. However, - // compactions can generate two files, one with the live cell versions and the other - // with the remaining (historical) cell versions. + // Indicate if the file contains live cell versions. This is used when + // hbase.hstore.defaultengine.enable.dualfilewriter is enabled. In that case, compactions + // can generate two files, one with the live cell versions and the other with the remaining + // (historical) cell versions. Even when hbase.hstore.defaultengine.enable.dualfilewriter is + // enabled, the files generated by memstore flushes do not include the HAS_LIVE_VERSIONS HFile + // metadata key since memstore does not use compaction writers. However, these files will include + // live version cells. Thus, when the HAS_LIVE_VERSIONS HFile metadata key is not included in + // the HFile, we still want to return true for HStoreFile#hasLiveVersion(). That is why + // the default value for hasLiveVersions is true. private volatile boolean hasLiveVersions = true; // Keys for metadata stored in backing HFile. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index d84782107bf4..fd941de4df87 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -112,10 +112,10 @@ public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVC * Return an array of scanners corresponding to the given set of store files. */ public static List getScannersForStoreFiles(Collection files, - boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind, long readPt, - boolean onlyLatestVersion) throws IOException { + boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind, long readPt) + throws IOException { return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction, useDropBehind, null, - readPt, onlyLatestVersion); + readPt); } /** @@ -124,7 +124,7 @@ public static List getScannersForStoreFiles(Collection getScannersForStoreFiles(Collection files, boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean canUseDrop, - ScanQueryMatcher matcher, long readPt, boolean onlyLatestVersion) throws IOException { + ScanQueryMatcher matcher, long readPt) throws IOException { if (files.isEmpty()) { return Collections.emptyList(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 675a811bee6e..eabb65ad2802 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -225,6 +225,8 @@ private void addCurrentScanners(List scanners) { } private static boolean isOnlyLatestVersionScan(Scan scan) { + // No need to check for Scan#getMaxVersions because live version files generated by dual file + // writer retains max versions specified in ColumnFamilyDescriptor for the given CF return !scan.isRaw() && scan.getTimeRange().getMax() == HConstants.LATEST_TIMESTAMP; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java index 334833209cf2..16d2f5a135e1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java @@ -31,8 +31,6 @@ import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; import org.apache.yetus.audience.InterfaceAudience; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Compact passed set of files. Create an instance and then call @@ -40,7 +38,6 @@ */ @InterfaceAudience.Private public class DefaultCompactor extends AbstractMultiOutputCompactor { - private static final Logger LOG = LoggerFactory.getLogger(DefaultCompactor.class); public DefaultCompactor(Configuration conf, HStore store) { super(conf, store); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java index d3ae7608180e..b72b5d822ce7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java @@ -129,7 +129,7 @@ public MyCompactor(Configuration conf, HStore store) { @Override protected List commitWriter(DualFileWriter writer, FileDetails fd, CompactionRequestImpl request) throws IOException { - HFileWriterImpl writerImpl = (HFileWriterImpl) writer.getHFileWriter(); + HFileWriterImpl writerImpl = (HFileWriterImpl) writer.getLiveVersionHFileWriter(); Cell cell = writerImpl.getLastCell(); // The cell should be backend with an KeyOnlyKeyValue. IS_LAST_CELL_ON_HEAP.set(cell instanceof KeyOnlyKeyValue); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java index 9fd4136e05ff..55320e94a9f9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java @@ -146,7 +146,7 @@ public void testStoreFileScannerThrowsErrors() throws IOException { List scanners = StoreFileScanner.getScannersForStoreFiles( Collections.singletonList(sf), false, true, false, false, // 0 is passed as readpoint because this test operates on HStoreFile directly - 0, false); + 0); KeyValueScanner scanner = scanners.get(0); FaultyInputStream inStream = faultyfs.inStreams.get(0).get(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java index 6ab6a7e18b98..391f1bef69cc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java @@ -126,13 +126,13 @@ public void testReversibleStoreFileScanner() throws IOException { BloomType.NONE, true); List scanners = StoreFileScanner.getScannersForStoreFiles( - Collections.singletonList(sf), false, true, false, false, Long.MAX_VALUE, false); + Collections.singletonList(sf), false, true, false, false, Long.MAX_VALUE); StoreFileScanner scanner = scanners.get(0); seekTestOfReversibleKeyValueScanner(scanner); for (int readPoint = 0; readPoint < MAXMVCC; readPoint++) { LOG.info("Setting read point to " + readPoint); scanners = StoreFileScanner.getScannersForStoreFiles(Collections.singletonList(sf), false, - true, false, false, readPoint, false); + true, false, false, readPoint); seekTestOfReversibleKeyValueScannerWithMVCC(scanners, readPoint); } } @@ -481,8 +481,8 @@ private ReversedKeyValueHeap getReversibleKeyValueHeap(MemStore memstore, HStore private List getScanners(MemStore memstore, HStoreFile sf1, HStoreFile sf2, byte[] startRow, boolean doSeek, int readPoint) throws IOException { - List fileScanners = StoreFileScanner.getScannersForStoreFiles( - Lists.newArrayList(sf1, sf2), false, true, false, false, readPoint, false); + List fileScanners = StoreFileScanner + .getScannersForStoreFiles(Lists.newArrayList(sf1, sf2), false, true, false, false, readPoint); List memScanners = memstore.getScanners(readPoint); List scanners = new ArrayList<>(fileScanners.size() + 1); scanners.addAll(fileScanners); From e65141ad0f71d46a049fbb74b2cee000d4b836a4 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Sat, 6 Jan 2024 14:42:16 +0300 Subject: [PATCH 12/27] Further changes for review comments by Viraj --- .../regionserver/DefaultStoreEngine.java | 2 +- .../hbase/regionserver/DualFileWriter.java | 22 +++++++++++++++++-- .../hbase/regionserver/StoreFileManager.java | 1 + 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index b372ce7e399d..85886ced624d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -54,7 +54,7 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; - public static final boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = true; + public static final boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = false; @Override public boolean needsCompaction(List filesCompacting) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index 2bd9651b452d..54acf06957ef 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -149,7 +149,16 @@ private void appendCell(Cell cell) throws IOException { } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) { if (!isDeletedByDeleteFamily(cell)) { deleteFamilyVersionList.add(cell); - addLiveVersion(cell); + if (deleteFamily != null && deleteFamily.getTimestamp() == cell.getTimestamp()) { + // This means both the delete-family and delete-family-version markers have the same + // timestamp but the sequence id of delete-family-version marker is higher than that of + // the delete-family marker. In this case, there is no need to add the + // delete-family-version marker to the live version file. This case happens only with + // the new version behavior. + addHistoricalVersion(cell); + } else { + addLiveVersion(cell); + } } else { addHistoricalVersion(cell); } @@ -163,7 +172,16 @@ private void appendCell(Cell cell) throws IOException { } else if (cell.getType() == Cell.Type.Delete) { if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { deleteColumnVersionList.add(cell); - addLiveVersion(cell); + if (deleteFamily != null && deleteFamily.getTimestamp() == cell.getTimestamp()) { + // This means both the delete-family and delete-column-version markers have the same + // timestamp but the sequence id of delete-column-version marker is higher than that of + // the delete-family marker. In this case, there is no need to add the + // delete-column-version marker to the live version file. This case happens only with + // the new version behavior. + addHistoricalVersion(cell); + } else { + addLiveVersion(cell); + } } else { addHistoricalVersion(cell); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java index 9cdd3ca5e8e1..7edc10c545f6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java @@ -122,6 +122,7 @@ void addCompactionResults(Collection compactedFiles, Collection getFilesForScan(byte[] startRow, boolean includeStartRow, byte[] stopRow, From 82884434d3999a8ef927f043cab30857c005333e Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Sat, 13 Jan 2024 14:17:21 +0300 Subject: [PATCH 13/27] Java doc comment edit --- .../apache/hadoop/hbase/regionserver/StoreFileManager.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java index 7edc10c545f6..9d918374702f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java @@ -120,9 +120,9 @@ void addCompactionResults(Collection compactedFiles, Collection getFilesForScan(byte[] startRow, boolean includeStartRow, byte[] stopRow, From 558253e58a387914bc7847f9b3312b1e63ee8012 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Tue, 6 Feb 2024 07:20:31 -0800 Subject: [PATCH 14/27] Fixed remaining minor checkstyle warnings --- .../org/apache/hadoop/hbase/PerformanceEvaluation.java | 2 +- .../apache/hadoop/hbase/regionserver/DualFileWriter.java | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java index 4c65f3cfce0d..9f97002f4b44 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -2438,7 +2438,7 @@ boolean testRow(final int i, final long startTime) throws IOException { byte[] row = generateRow(i); Delete delete = new Delete(row); for (int family = 0; family < opts.families; family++) { - byte familyName[] = Bytes.toBytes(FAMILY_NAME_BASE + family); + byte[] familyName = Bytes.toBytes(FAMILY_NAME_BASE + family); delete.addFamily(familyName); } delete.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java index 54acf06957ef..4308c86f1ff2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java @@ -109,7 +109,9 @@ private boolean isDeletedByDeleteFamilyVersion(Cell cell) { if ( deleteFamilyVersion.getTimestamp() == cell.getTimestamp() && (!newVersionBehavior || cell.getSequenceId() < deleteFamilyVersion.getSequenceId()) - ) return true; + ) { + return true; + } } return false; } @@ -125,7 +127,9 @@ private boolean isDeletedByDeleteColumnVersion(Cell cell) { if ( deleteColumnVersion.getTimestamp() == cell.getTimestamp() && (!newVersionBehavior || cell.getSequenceId() < deleteColumnVersion.getSequenceId()) - ) return true; + ) { + return true; + } } return false; } From 55fc059d31cfe0a3ee92feb6d3ccd0597845584e Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Tue, 13 Feb 2024 11:20:01 -0800 Subject: [PATCH 15/27] Bump up minFilesToCompact by one when DualFileWriter is enabled --- .../compactions/CompactionConfiguration.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java index 251c8227da00..e482009f84a4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hbase.regionserver.compactions; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_ENABLE_DUAL_FILE_WRITER; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.RegionInfo; @@ -143,6 +146,15 @@ public class CompactionConfiguration { conf.getLong(HBASE_HSTORE_COMPACTION_MIN_SIZE_KEY, storeConfigInfo.getMemStoreFlushSize()); minFilesToCompact = Math.max(2, conf.getInt(HBASE_HSTORE_COMPACTION_MIN_KEY, conf.getInt(HBASE_HSTORE_COMPACTION_MIN_KEY_OLD, 3))); + if ( + conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, + DEFAULT_ENABLE_DUAL_FILE_WRITER) + ) { + // If DualFileWriter is enabled, we bump up the min value by one as DualFileWriter compacts + // files into two files, live and historical, instead of one. This also eliminates infinite + // re-compaction when the min value is set to 2 + minFilesToCompact += 1; + } maxFilesToCompact = conf.getInt(HBASE_HSTORE_COMPACTION_MAX_KEY, 10); compactionRatio = conf.getFloat(HBASE_HSTORE_COMPACTION_RATIO_KEY, 1.2F); offPeakCompactionRatio = conf.getFloat(HBASE_HSTORE_COMPACTION_RATIO_OFFPEAK_KEY, 5.0F); From 251263877bee8f02c4c7cbce06ba731c3342e0f2 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Thu, 14 Mar 2024 13:30:35 -0700 Subject: [PATCH 16/27] Moved the dual file writing to StoreFileWriter --- .../hbase/io/hfile/HFilePrettyPrinter.java | 2 +- .../hbase/mob/DefaultMobStoreCompactor.java | 20 +- .../hadoop/hbase/mob/MobStoreEngine.java | 75 +- .../regionserver/DefaultStoreEngine.java | 35 +- .../regionserver/DefaultStoreFileManager.java | 54 +- .../hbase/regionserver/DualFileWriter.java | 275 ------ .../hadoop/hbase/regionserver/HStoreFile.java | 27 +- .../hbase/regionserver/StoreContext.java | 8 + .../hbase/regionserver/StoreEngine.java | 34 - .../regionserver/StoreFileComparators.java | 2 +- .../hbase/regionserver/StoreFileWriter.java | 851 +++++++++++++----- .../hbase/regionserver/StoreScanner.java | 2 +- .../compactions/CompactionConfiguration.java | 14 +- .../regionserver/compactions/Compactor.java | 2 +- .../compactions/DefaultCompactor.java | 54 +- .../StoreFileTrackerBase.java | 4 +- .../hbase/regionserver/TestCompaction.java | 19 +- .../regionserver/TestCompactorMemLeak.java | 4 +- .../regionserver/TestStoreFileWriter.java | 462 ++++++++++ .../compactions/TestCompactor.java | 21 - .../compactions/TestDualFileWriter.java | 215 ----- 21 files changed, 1234 insertions(+), 946 deletions(-) delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index 3fe84310db51..0c32303746c0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -537,7 +537,7 @@ private void printMeta(HFile.Reader reader, Map fileInfo) throws Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY) - || Bytes.equals(e.getKey(), HStoreFile.HAS_LIVE_VERSIONS_KEY) + || Bytes.equals(e.getKey(), HStoreFile.HISTORICAL_KEY) ) { out.println(Bytes.toBoolean(e.getValue())); } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java index 39f47fc4cf22..44f77b62ad8b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java @@ -55,7 +55,7 @@ import org.apache.hadoop.hbase.regionserver.compactions.CloseChecker; import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequestImpl; -import org.apache.hadoop.hbase.regionserver.compactions.Compactor; +import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputControlUtil; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; @@ -74,7 +74,7 @@ * Compact passed set of files in the mob-enabled column family. */ @InterfaceAudience.Private -public class DefaultMobStoreCompactor extends Compactor { +public class DefaultMobStoreCompactor extends DefaultCompactor { private static final Logger LOG = LoggerFactory.getLogger(DefaultMobStoreCompactor.class); protected long mobSizeThreshold; @@ -172,6 +172,7 @@ public DefaultMobStoreCompactor(Configuration conf, HStore store) { } + @Override public List compact(CompactionRequestImpl request, ThroughputController throughputController, User user) throws IOException { String tableName = store.getTableName().toString(); @@ -714,19 +715,4 @@ private void deleteCommittedMobFiles(List fileNames) { } - @Override - protected final void abortWriter(StoreFileWriter writer) throws IOException { - Path leftoverFile = writer.getPath(); - try { - writer.close(); - } catch (IOException e) { - LOG.warn("Failed to close the writer after an unfinished compaction.", e); - } - try { - store.getFileSystem().delete(leftoverFile, false); - } catch (IOException e) { - LOG.warn("Failed to delete the leftover file {} after an unfinished compaction.", - leftoverFile, e); - } - } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java index 5ee7ad8f9341..633781d464e0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobStoreEngine.java @@ -17,39 +17,21 @@ */ package org.apache.hadoop.hbase.mob; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY; - import java.io.IOException; -import java.util.List; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.regionserver.DefaultStoreFileManager; -import org.apache.hadoop.hbase.regionserver.DefaultStoreFlusher; +import org.apache.hadoop.hbase.regionserver.DefaultStoreEngine; import org.apache.hadoop.hbase.regionserver.HStore; -import org.apache.hadoop.hbase.regionserver.HStoreFile; -import org.apache.hadoop.hbase.regionserver.StoreEngine; -import org.apache.hadoop.hbase.regionserver.StoreFileComparators; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; -import org.apache.hadoop.hbase.regionserver.compactions.RatioBasedCompactionPolicy; -import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; -import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** * MobStoreEngine creates the mob specific compactor, and store flusher. */ @InterfaceAudience.Private -public class MobStoreEngine extends StoreEngine { +public class MobStoreEngine extends DefaultStoreEngine { public final static String MOB_COMPACTOR_CLASS_KEY = "hbase.hstore.mobengine.compactor.class"; @Override - public boolean needsCompaction(List filesCompacting) { - return compactionPolicy.needsCompaction(this.storeFileManager.getStorefiles(), filesCompacting); - } - protected void createStoreFlusher(Configuration conf, HStore store) throws IOException { // When using MOB, we use DefaultMobStoreFlusher always // Just use the compactor and compaction policy as that in DefaultStoreEngine. We can have MOB @@ -60,51 +42,14 @@ protected void createStoreFlusher(Configuration conf, HStore store) throws IOExc /** * Creates the DefaultMobCompactor. */ - - protected void createCompactor(Configuration conf, HStore store) throws IOException { - createCompactor(conf, store, MOB_COMPACTOR_CLASS_KEY, DefaultMobStoreCompactor.class.getName()); - } - - @Override - protected void createComponents(Configuration conf, HStore store, CellComparator kvComparator) - throws IOException { - createCompactor(conf, store); - createCompactionPolicy(conf, store); - createStoreFlusher(conf, store); - storeFileManager = new DefaultStoreFileManager(kvComparator, StoreFileComparators.SEQ_ID, conf, - compactionPolicy.getConf()); - } - - protected void createCompactionPolicy(Configuration conf, HStore store) throws IOException { - createCompactionPolicy(conf, store, DEFAULT_COMPACTION_POLICY_CLASS_KEY, - DEFAULT_COMPACTION_POLICY_CLASS.getName()); - } - @Override - public CompactionContext createCompaction() { - return new MobStoreEngine.DefaultCompactionContext(); - } - - private class DefaultCompactionContext extends CompactionContext { - @Override - public boolean select(List filesCompacting, boolean isUserCompaction, - boolean mayUseOffPeak, boolean forceMajor) throws IOException { - request = compactionPolicy.selectCompaction(storeFileManager.getStorefiles(), filesCompacting, - isUserCompaction, mayUseOffPeak, forceMajor); - return request != null; - } - - @Override - public List compact(ThroughputController throughputController, User user) - throws IOException { - return compactor.compact(request, throughputController, user); - } - - @Override - public List preSelect(List filesCompacting) { - return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStorefiles(), - filesCompacting); + protected void createCompactor(Configuration conf, HStore store) throws IOException { + String className = conf.get(MOB_COMPACTOR_CLASS_KEY, DefaultMobStoreCompactor.class.getName()); + try { + compactor = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); + } catch (RuntimeException e) { + throw new IOException("Unable to load configured compactor '" + className + "'", e); } } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 85886ced624d..0c9fb9adcc2c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.regionserver.compactions.RatioBasedCompactionPolicy; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** @@ -45,16 +46,13 @@ public class DefaultStoreEngine extends StoreEngine DEFAULT_STORE_FLUSHER_CLASS = DefaultStoreFlusher.class; private static final Class DEFAULT_COMPACTOR_CLASS = DefaultCompactor.class; - public static final Class DEFAULT_COMPACTION_POLICY_CLASS = + private static final Class DEFAULT_COMPACTION_POLICY_CLASS = ExploringCompactionPolicy.class; - public static final boolean DEFAULT_ENABLE_DUAL_FILE_WRITER = false; @Override public boolean needsCompaction(List filesCompacting) { @@ -72,17 +70,36 @@ protected void createComponents(Configuration conf, HStore store, CellComparator } protected void createCompactor(Configuration conf, HStore store) throws IOException { - createCompactor(conf, store, DEFAULT_COMPACTOR_CLASS_KEY, DEFAULT_COMPACTOR_CLASS.getName()); + String className = conf.get(DEFAULT_COMPACTOR_CLASS_KEY, DEFAULT_COMPACTOR_CLASS.getName()); + try { + compactor = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured compactor '" + className + "'", e); + } } protected void createCompactionPolicy(Configuration conf, HStore store) throws IOException { - createCompactionPolicy(conf, store, DEFAULT_COMPACTION_POLICY_CLASS_KEY, - DEFAULT_COMPACTION_POLICY_CLASS.getName()); + String className = + conf.get(DEFAULT_COMPACTION_POLICY_CLASS_KEY, DEFAULT_COMPACTION_POLICY_CLASS.getName()); + try { + compactionPolicy = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, StoreConfigInformation.class }, + new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured compaction policy '" + className + "'", e); + } } protected void createStoreFlusher(Configuration conf, HStore store) throws IOException { - createStoreFlusher(conf, store, DEFAULT_STORE_FLUSHER_CLASS_KEY, - DEFAULT_STORE_FLUSHER_CLASS.getName()); + String className = + conf.get(DEFAULT_STORE_FLUSHER_CLASS_KEY, DEFAULT_STORE_FLUSHER_CLASS.getName()); + try { + storeFlusher = ReflectionUtils.instantiateWithCustomCtor(className, + new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); + } catch (Exception e) { + throw new IOException("Unable to load configured store flusher '" + className + "'", e); + } } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index 8a2464e661bb..cef269821090 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hbase.regionserver; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_ENABLE_DUAL_FILE_WRITER; +import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES; +import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; import java.io.IOException; import java.util.ArrayList; @@ -45,7 +45,7 @@ * Default implementation of StoreFileManager. Not thread-safe. */ @InterfaceAudience.Private -public class DefaultStoreFileManager implements StoreFileManager { +class DefaultStoreFileManager implements StoreFileManager { private static final Logger LOG = LoggerFactory.getLogger(DefaultStoreFileManager.class); private final CellComparator cellComparator; @@ -61,14 +61,14 @@ public class DefaultStoreFileManager implements StoreFileManager { * List of store files that include the latest cells inside this store. This is an immutable list * that is atomically replaced when its contents change. */ - private volatile ImmutableList liveVersionStoreFiles = ImmutableList.of(); + private volatile ImmutableList liveStoreFiles = ImmutableList.of(); /** * List of compacted files inside this store that needs to be excluded in reads because further * new reads will be using only the newly created files out of compaction. These compacted files * will be deleted/cleared once all the existing readers on these compacted files are done. */ private volatile ImmutableList compactedfiles = ImmutableList.of(); - private final boolean enableLiveVersionFiles; + private final boolean enableLiveFileTracking; public DefaultStoreFileManager(CellComparator cellComparator, Comparator storeFileComparator, Configuration conf, @@ -78,27 +78,26 @@ public DefaultStoreFileManager(CellComparator cellComparator, this.comConf = comConf; this.blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT); - this.enableLiveVersionFiles = conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, - DEFAULT_ENABLE_DUAL_FILE_WRITER); + this.enableLiveFileTracking = conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, + DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES); } - private List getLiveVersionFiles(Collection storeFiles) - throws IOException { - List hasLiveVersionFiles = new ArrayList<>(storeFiles.size()); + private List getLiveFiles(Collection storeFiles) throws IOException { + List liveFiles = new ArrayList<>(storeFiles.size()); for (HStoreFile file : storeFiles) { file.initReader(); - if (file.hasLiveVersion()) { - hasLiveVersionFiles.add(file); + if (!file.isHistorical()) { + liveFiles.add(file); } } - return hasLiveVersionFiles; + return liveFiles; } @Override public void loadFiles(List storeFiles) throws IOException { - if (enableLiveVersionFiles) { - this.liveVersionStoreFiles = - ImmutableList.sortedCopyOf(getStoreFileComparator(), getLiveVersionFiles(storeFiles)); + if (enableLiveFileTracking) { + this.liveStoreFiles = + ImmutableList.sortedCopyOf(getStoreFileComparator(), getLiveFiles(storeFiles)); } this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, storeFiles); } @@ -115,9 +114,9 @@ public Collection getCompactedfiles() { @Override public void insertNewFiles(Collection sfs) throws IOException { - if (enableLiveVersionFiles) { - this.liveVersionStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), - Iterables.concat(this.liveVersionStoreFiles, getLiveVersionFiles(sfs))); + if (enableLiveFileTracking) { + this.liveStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), + Iterables.concat(this.liveStoreFiles, getLiveFiles(sfs))); } this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, Iterables.concat(this.storefiles, sfs)); @@ -125,8 +124,8 @@ public void insertNewFiles(Collection sfs) throws IOException { @Override public ImmutableCollection clearFiles() { - if (enableLiveVersionFiles) { - liveVersionStoreFiles = ImmutableList.of(); + if (enableLiveFileTracking) { + liveStoreFiles = ImmutableList.of(); } ImmutableList result = storefiles; storefiles = ImmutableList.of(); @@ -153,11 +152,10 @@ public final int getCompactedFilesCount() { @Override public void addCompactionResults(Collection newCompactedfiles, Collection results) throws IOException { - if (enableLiveVersionFiles) { - this.liveVersionStoreFiles = ImmutableList.sortedCopyOf(storeFileComparator, - Iterables.concat( - Iterables.filter(liveVersionStoreFiles, sf -> !newCompactedfiles.contains(sf)), - getLiveVersionFiles(results))); + if (enableLiveFileTracking) { + this.liveStoreFiles = ImmutableList.sortedCopyOf(storeFileComparator, + Iterables.concat(Iterables.filter(liveStoreFiles, sf -> !newCompactedfiles.contains(sf)), + getLiveFiles(results))); } this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, Iterables .concat(Iterables.filter(storefiles, sf -> !newCompactedfiles.contains(sf)), results)); @@ -200,8 +198,8 @@ public final Optional getSplitPoint() throws IOException { @Override public Collection getFilesForScan(byte[] startRow, boolean includeStartRow, byte[] stopRow, boolean includeStopRow, boolean onlyLatestVersion) { - if (onlyLatestVersion && enableLiveVersionFiles) { - return liveVersionStoreFiles; + if (onlyLatestVersion && enableLiveFileTracking) { + return liveStoreFiles; } // We cannot provide any useful input and already have the files sorted by seqNum. return getStorefiles(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java deleted file mode 100644 index 4308c86f1ff2..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DualFileWriter.java +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver; - -import static org.apache.hadoop.hbase.regionserver.HStoreFile.HAS_LIVE_VERSIONS_KEY; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.io.hfile.HFile; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.yetus.audience.InterfaceAudience; - -/** - * Separates the provided cells into two files, one file for the live cells and the other for the - * rest of the cells (historical cells). The live cells includes the live put cells, delete all and - * version delete markers that are not masked by other delete all markers. - */ -@InterfaceAudience.Private -public class DualFileWriter extends AbstractMultiFileWriter { - - private final CellComparator comparator; - private StoreFileWriter liveVersionWriter; - private StoreFileWriter historicalVersionWriter; - - private final List writers; - // The last cell of the current row - private Cell lastCell; - // The first (latest) delete family marker of the current row - private Cell deleteFamily; - // The list of delete family version markers of the current row - private List deleteFamilyVersionList = new ArrayList<>(); - // The first (latest) delete column marker of the current column - private Cell deleteColumn; - // The list of delete column version markers of the current column - private List deleteColumnVersionList = new ArrayList<>(); - // The live put cell count for the current column - private int livePutCellCount; - private final boolean dualWriterEnabled; - private final int maxVersions; - private final boolean newVersionBehavior; - - public DualFileWriter(CellComparator comparator, int maxVersions, boolean dualWriterEnabled, - boolean newVersionBehavior) { - this.comparator = comparator; - this.maxVersions = maxVersions; - this.dualWriterEnabled = dualWriterEnabled; - this.newVersionBehavior = newVersionBehavior; - writers = new ArrayList<>(2); - initRowState(); - } - - private void initRowState() { - deleteFamily = null; - deleteFamilyVersionList.clear(); - lastCell = null; - } - - private void initColumnState() { - livePutCellCount = 0; - deleteColumn = null; - deleteColumnVersionList.clear(); - - } - - private void addLiveVersion(Cell cell) throws IOException { - if (liveVersionWriter == null) { - liveVersionWriter = writerFactory.createWriter(); - writers.add(liveVersionWriter); - } - liveVersionWriter.append(cell); - } - - private void addHistoricalVersion(Cell cell) throws IOException { - if (historicalVersionWriter == null) { - historicalVersionWriter = writerFactory.createWriter(); - writers.add(historicalVersionWriter); - } - historicalVersionWriter.append(cell); - } - - private boolean isDeletedByDeleteFamily(Cell cell) { - return deleteFamily != null && (deleteFamily.getTimestamp() > cell.getTimestamp() - || (deleteFamily.getTimestamp() == cell.getTimestamp() - && (!newVersionBehavior || cell.getSequenceId() < deleteFamily.getSequenceId()))); - } - - private boolean isDeletedByDeleteFamilyVersion(Cell cell) { - for (Cell deleteFamilyVersion : deleteFamilyVersionList) { - if ( - deleteFamilyVersion.getTimestamp() == cell.getTimestamp() - && (!newVersionBehavior || cell.getSequenceId() < deleteFamilyVersion.getSequenceId()) - ) { - return true; - } - } - return false; - } - - private boolean isDeletedByDeleteColumn(Cell cell) { - return deleteColumn != null && (deleteColumn.getTimestamp() > cell.getTimestamp() - || (deleteColumn.getTimestamp() == cell.getTimestamp() - && (!newVersionBehavior || cell.getSequenceId() < deleteColumn.getSequenceId()))); - } - - private boolean isDeletedByDeleteColumnVersion(Cell cell) { - for (Cell deleteColumnVersion : deleteColumnVersionList) { - if ( - deleteColumnVersion.getTimestamp() == cell.getTimestamp() - && (!newVersionBehavior || cell.getSequenceId() < deleteColumnVersion.getSequenceId()) - ) { - return true; - } - } - return false; - } - - private boolean isDeleted(Cell cell) { - return isDeletedByDeleteFamily(cell) || isDeletedByDeleteColumn(cell) - || isDeletedByDeleteFamilyVersion(cell) || isDeletedByDeleteColumnVersion(cell); - } - - private void appendCell(Cell cell) throws IOException { - if ((lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) { - initColumnState(); - } - if (cell.getType() == Cell.Type.DeleteFamily) { - if (deleteFamily == null) { - deleteFamily = cell; - addLiveVersion(cell); - } else { - addHistoricalVersion(cell); - } - } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) { - if (!isDeletedByDeleteFamily(cell)) { - deleteFamilyVersionList.add(cell); - if (deleteFamily != null && deleteFamily.getTimestamp() == cell.getTimestamp()) { - // This means both the delete-family and delete-family-version markers have the same - // timestamp but the sequence id of delete-family-version marker is higher than that of - // the delete-family marker. In this case, there is no need to add the - // delete-family-version marker to the live version file. This case happens only with - // the new version behavior. - addHistoricalVersion(cell); - } else { - addLiveVersion(cell); - } - } else { - addHistoricalVersion(cell); - } - } else if (cell.getType() == Cell.Type.DeleteColumn) { - if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { - deleteColumn = cell; - addLiveVersion(cell); - } else { - addHistoricalVersion(cell); - } - } else if (cell.getType() == Cell.Type.Delete) { - if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { - deleteColumnVersionList.add(cell); - if (deleteFamily != null && deleteFamily.getTimestamp() == cell.getTimestamp()) { - // This means both the delete-family and delete-column-version markers have the same - // timestamp but the sequence id of delete-column-version marker is higher than that of - // the delete-family marker. In this case, there is no need to add the - // delete-column-version marker to the live version file. This case happens only with - // the new version behavior. - addHistoricalVersion(cell); - } else { - addLiveVersion(cell); - } - } else { - addHistoricalVersion(cell); - } - } else if (cell.getType() == Cell.Type.Put) { - if (livePutCellCount < maxVersions) { - // This is a live put cell (i.e., the latest version) of a column. Is it deleted? - if (!isDeleted(cell)) { - addLiveVersion(cell); - livePutCellCount++; - } else { - // It is deleted - addHistoricalVersion(cell); - } - } else { - // It is an older put cell - addHistoricalVersion(cell); - } - } - lastCell = cell; - } - - @Override - public void appendAll(List cellList) throws IOException { - if (!dualWriterEnabled) { - // If the dual writer is not enabled then all cells are written to one file. We use - // the live version file in this case - for (Cell cell : cellList) { - addLiveVersion(cell); - } - return; - } - if (cellList.isEmpty()) { - return; - } - if (lastCell != null && comparator.compareRows(lastCell, cellList.get(0)) != 0) { - // It is a new row and thus time to reset the state - initRowState(); - } - for (Cell cell : cellList) { - appendCell(cell); - } - } - - @Override - public void append(Cell cell) throws IOException { - if (!dualWriterEnabled) { - // If the dual writer is not enabled then all cells are written to one file. We use - // the live version file in this case - addLiveVersion(cell); - return; - } - if (lastCell != null && comparator.compareRows(lastCell, cell) != 0) { - // It is a new row and thus time to reset the state - initRowState(); - } - appendCell(cell); - } - - @Override - protected Collection writers() { - return writers; - } - - @Override - protected void preCommitWriters() throws IOException { - if (writers.isEmpty()) { - liveVersionWriter = writerFactory.createWriter(); - writers.add(liveVersionWriter); - } - if (!dualWriterEnabled) { - return; - } - if (liveVersionWriter != null) { - liveVersionWriter.appendFileInfo(HAS_LIVE_VERSIONS_KEY, Bytes.toBytes(true)); - } - if (historicalVersionWriter != null) { - historicalVersionWriter.appendFileInfo(HAS_LIVE_VERSIONS_KEY, Bytes.toBytes(false)); - } - } - - public HFile.Writer getLiveVersionHFileWriter() { - if (writers.isEmpty()) { - return null; - } - return writers.get(0).getHFileWriter(); - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index 216e086d4063..a9951aa9260c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -125,7 +125,7 @@ public class HStoreFile implements StoreFile { */ public static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID"); - public static final byte[] HAS_LIVE_VERSIONS_KEY = Bytes.toBytes("HAS_LIVE_VERSIONS"); + public static final byte[] HISTORICAL_KEY = Bytes.toBytes("HISTORICAL"); private final StoreFileInfo fileInfo; @@ -140,16 +140,11 @@ public class HStoreFile implements StoreFile { // Indicates if the file got compacted private volatile boolean compactedAway = false; - // Indicate if the file contains live cell versions. This is used when - // hbase.hstore.defaultengine.enable.dualfilewriter is enabled. In that case, compactions + // Indicate if the file contains historical cell versions. This is used when + // hbase.enable.historical.compaction.files is set to true. In that case, compactions // can generate two files, one with the live cell versions and the other with the remaining - // (historical) cell versions. Even when hbase.hstore.defaultengine.enable.dualfilewriter is - // enabled, the files generated by memstore flushes do not include the HAS_LIVE_VERSIONS HFile - // metadata key since memstore does not use compaction writers. However, these files will include - // live version cells. Thus, when the HAS_LIVE_VERSIONS HFile metadata key is not included in - // the HFile, we still want to return true for HStoreFile#hasLiveVersion(). That is why - // the default value for hasLiveVersions is true. - private volatile boolean hasLiveVersions = true; + // (historical) cell versions. + private volatile boolean isHistorical = false; // Keys for metadata stored in backing HFile. // Set when we obtain a Reader. @@ -350,8 +345,8 @@ public boolean isCompactedAway() { return compactedAway; } - public boolean hasLiveVersion() { - return hasLiveVersions; + public boolean isHistorical() { + return isHistorical; } public int getRefCount() { @@ -472,9 +467,9 @@ private void open() throws IOException { b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY); this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b)); - b = metadataMap.get(HAS_LIVE_VERSIONS_KEY); + b = metadataMap.get(HISTORICAL_KEY); if (b != null) { - hasLiveVersions = Bytes.toBoolean(b); + isHistorical = Bytes.toBoolean(b); } BloomType hfileBloomType = initialReader.getBloomFilterType(); if (cfBloomType != BloomType.NONE) { @@ -604,10 +599,6 @@ public void markCompactedAway() { this.compactedAway = true; } - public void setHasLiveVersions(boolean hasLiveVersions) { - this.hasLiveVersions = hasLiveVersions; - } - @Override public String toString() { return this.fileInfo.toString(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreContext.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreContext.java index 48618a6976ce..7bb800a1d39c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreContext.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreContext.java @@ -118,6 +118,14 @@ public RegionInfo getRegionInfo() { return regionFileSystem.getRegionInfo(); } + public int getMaxVersions() { + return family.getMaxVersions(); + } + + public boolean getNewVersionBehavior() { + return family.isNewVersionBehavior(); + } + public boolean isPrimaryReplicaStore() { return getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java index e2a5fbf7c4be..34f882516bae 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java @@ -544,38 +544,4 @@ ReadWriteLock getLock() { public BloomFilterMetrics getBloomFilterMetrics() { return bloomFilterMetrics; } - - protected void createCompactor(Configuration conf, HStore store, String classKey, - String defaultClassName) throws IOException { - String className = conf.get(classKey, defaultClassName); - try { - compactor = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured compactor '" + className + "'", e); - } - } - - protected void createCompactionPolicy(Configuration conf, HStore store, String classKey, - String defaultClassName) throws IOException { - String className = conf.get(classKey, defaultClassName); - try { - compactionPolicy = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, StoreConfigInformation.class }, - new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured compaction policy '" + className + "'", e); - } - } - - protected void createStoreFlusher(Configuration conf, HStore store, String classKey, - String defaultClassName) throws IOException { - String className = conf.get(classKey, defaultClassName); - try { - storeFlusher = ReflectionUtils.instantiateWithCustomCtor(className, - new Class[] { Configuration.class, HStore.class }, new Object[] { conf, store }); - } catch (Exception e) { - throw new IOException("Unable to load configured store flusher '" + className + "'", e); - } - } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java index 9e485ba807c6..dc4abf60e896 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileComparators.java @@ -26,7 +26,7 @@ * Useful comparators for comparing store files. */ @InterfaceAudience.Private -public final class StoreFileComparators { +final class StoreFileComparators { /** * Comparator that compares based on the Sequence Ids of the the store files. Bulk loads that did * not request a seq ID are given a seq id of -1; thus, they are placed before all non- bulk diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index 17e0001fb0cc..e982367acbd7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.hbase.regionserver.HStoreFile.COMPACTION_EVENT_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.DELETE_FAMILY_COUNT; import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.HISTORICAL_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAX_SEQ_ID_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT; @@ -30,9 +31,11 @@ import java.io.IOException; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.UUID; import java.util.function.Consumer; @@ -43,6 +46,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.PrivateCellUtil; @@ -68,6 +73,7 @@ import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.base.Strings; +import org.apache.hbase.thirdparty.com.google.common.collect.Lists; import org.apache.hbase.thirdparty.com.google.common.collect.SetMultimap; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; @@ -79,24 +85,42 @@ @InterfaceAudience.Private public class StoreFileWriter implements CellSink, ShipperListener { private static final Logger LOG = LoggerFactory.getLogger(StoreFileWriter.class.getName()); + public static final String ENABLE_HISTORICAL_COMPACTION_FILES = + "hbase.enable.historical.compaction.files"; + public static final boolean DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES = false; private static final Pattern dash = Pattern.compile("-"); - private final BloomFilterWriter generalBloomFilterWriter; - private final BloomFilterWriter deleteFamilyBloomFilterWriter; + private SingleStoreFileWriter liveFileWriter; + private SingleStoreFileWriter historicalFileWriter; + private final FileSystem fs; + private final Path historicalFilePath; + private final Configuration conf; + private final CacheConfig cacheConf; private final BloomType bloomType; - private byte[] bloomParam = null; - private long earliestPutTs = HConstants.LATEST_TIMESTAMP; - private long deleteFamilyCnt = 0; - private BloomContext bloomContext = null; - private BloomContext deleteFamilyBloomContext = null; - private final TimeRangeTracker timeRangeTracker; + private final long maxKeys; + private final InetSocketAddress[] favoredNodes; + private final HFileContext fileContext; + private final boolean shouldDropCacheBehind; private final Supplier> compactedFilesSupplier; - - protected HFile.Writer writer; + private final CellComparator comparator; + private Cell lastCell; + // The first (latest) delete family marker of the current row + private Cell deleteFamily; + // The list of delete family version markers of the current row + private List deleteFamilyVersionList = new ArrayList<>(); + // The first (latest) delete column marker of the current column + private Cell deleteColumn; + // The list of delete column version markers of the current column + private List deleteColumnVersionList = new ArrayList<>(); + // The live put cell count for the current column + private int livePutCellCount; + private final int maxVersions; + private final boolean newVersionBehavior; /** * Creates an HFile.Writer that also write helpful meta data. * @param fs file system to write to - * @param path file name to create + * @param liveFilePath the name of the live file to create + * @param historicalFilePath the name of the historical file name to create * @param conf user configuration * @param bloomType bloom filter setting * @param maxKeys the expected maximum number of keys to be added. Was used for @@ -105,72 +129,35 @@ public class StoreFileWriter implements CellSink, ShipperListener { * @param fileContext The HFile context * @param shouldDropCacheBehind Drop pages written to page cache after writing the store file. * @param compactedFilesSupplier Returns the {@link HStore} compacted files which not archived + * @param comparator Cell comparator + * @param maxVersions max cell versions + * @param newVersionBehavior enable new version behavior * @throws IOException problem writing to FS */ - private StoreFileWriter(FileSystem fs, Path path, final Configuration conf, CacheConfig cacheConf, - BloomType bloomType, long maxKeys, InetSocketAddress[] favoredNodes, HFileContext fileContext, - boolean shouldDropCacheBehind, Supplier> compactedFilesSupplier) - throws IOException { + private StoreFileWriter(FileSystem fs, Path liveFilePath, Path historicalFilePath, + final Configuration conf, CacheConfig cacheConf, BloomType bloomType, long maxKeys, + InetSocketAddress[] favoredNodes, HFileContext fileContext, boolean shouldDropCacheBehind, + Supplier> compactedFilesSupplier, CellComparator comparator, + int maxVersions, boolean newVersionBehavior) throws IOException { + this.fs = fs; + this.historicalFilePath = historicalFilePath; + this.conf = conf; + this.cacheConf = cacheConf; + this.bloomType = bloomType; + this.maxKeys = maxKeys; + this.favoredNodes = favoredNodes; + this.fileContext = fileContext; + this.shouldDropCacheBehind = shouldDropCacheBehind; this.compactedFilesSupplier = compactedFilesSupplier; - this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); - // TODO : Change all writers to be specifically created for compaction context - writer = - HFile.getWriterFactory(conf, cacheConf).withPath(fs, path).withFavoredNodes(favoredNodes) - .withFileContext(fileContext).withShouldDropCacheBehind(shouldDropCacheBehind).create(); - - generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, - bloomType, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); - - if (generalBloomFilterWriter != null) { - this.bloomType = bloomType; - this.bloomParam = BloomFilterUtil.getBloomFilterParam(bloomType, conf); - if (LOG.isTraceEnabled()) { - LOG.trace("Bloom filter type for " + path + ": " + this.bloomType + ", param: " - + (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH - ? Bytes.toInt(bloomParam) - : Bytes.toStringBinary(bloomParam)) - + ", " + generalBloomFilterWriter.getClass().getSimpleName()); - } - // init bloom context - switch (bloomType) { - case ROW: - bloomContext = - new RowBloomContext(generalBloomFilterWriter, fileContext.getCellComparator()); - break; - case ROWCOL: - bloomContext = - new RowColBloomContext(generalBloomFilterWriter, fileContext.getCellComparator()); - break; - case ROWPREFIX_FIXED_LENGTH: - bloomContext = new RowPrefixFixedLengthBloomContext(generalBloomFilterWriter, - fileContext.getCellComparator(), Bytes.toInt(bloomParam)); - break; - default: - throw new IOException( - "Invalid Bloom filter type: " + bloomType + " (ROW or ROWCOL or ROWPREFIX expected)"); - } - } else { - // Not using Bloom filters. - this.bloomType = BloomType.NONE; - } - - // initialize delete family Bloom filter when there is NO RowCol Bloom filter - if (this.bloomType != BloomType.ROWCOL) { - this.deleteFamilyBloomFilterWriter = BloomFilterFactory.createDeleteBloomAtWrite(conf, - cacheConf, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); - deleteFamilyBloomContext = - new RowBloomContext(deleteFamilyBloomFilterWriter, fileContext.getCellComparator()); - } else { - deleteFamilyBloomFilterWriter = null; - } - if (deleteFamilyBloomFilterWriter != null && LOG.isTraceEnabled()) { - LOG.trace("Delete Family Bloom filter type for " + path + ": " - + deleteFamilyBloomFilterWriter.getClass().getSimpleName()); - } + this.comparator = comparator; + this.maxVersions = maxVersions; + this.newVersionBehavior = newVersionBehavior; + liveFileWriter = new SingleStoreFileWriter(fs, liveFilePath, conf, cacheConf, bloomType, + maxKeys, favoredNodes, fileContext, shouldDropCacheBehind, compactedFilesSupplier); } public long getPos() throws IOException { - return ((HFileWriterImpl) writer).getPos(); + return liveFileWriter.getPos(); } /** @@ -181,7 +168,10 @@ public long getPos() throws IOException { */ public void appendMetadata(final long maxSequenceId, final boolean majorCompaction) throws IOException { - appendMetadata(maxSequenceId, majorCompaction, Collections.emptySet()); + liveFileWriter.appendMetadata(maxSequenceId, majorCompaction); + if (historicalFileWriter != null) { + historicalFileWriter.appendMetadata(maxSequenceId, majorCompaction); + } } /** @@ -193,37 +183,10 @@ public void appendMetadata(final long maxSequenceId, final boolean majorCompacti */ public void appendMetadata(final long maxSequenceId, final boolean majorCompaction, final Collection storeFiles) throws IOException { - writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); - writer.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction)); - writer.appendFileInfo(COMPACTION_EVENT_KEY, toCompactionEventTrackerBytes(storeFiles)); - appendTrackedTimestampsToMetadata(); - } - - /** - * Used when write {@link HStoreFile#COMPACTION_EVENT_KEY} to new file's file info. The compacted - * store files's name is needed. But if the compacted store file is a result of compaction, it's - * compacted files which still not archived is needed, too. And don't need to add compacted files - * recursively. If file A, B, C compacted to new file D, and file D compacted to new file E, will - * write A, B, C, D to file E's compacted files. So if file E compacted to new file F, will add E - * to F's compacted files first, then add E's compacted files: A, B, C, D to it. And no need to - * add D's compacted file, as D's compacted files has been in E's compacted files, too. See - * HBASE-20724 for more details. - * @param storeFiles The compacted store files to generate this new file - * @return bytes of CompactionEventTracker - */ - private byte[] toCompactionEventTrackerBytes(Collection storeFiles) { - Set notArchivedCompactedStoreFiles = this.compactedFilesSupplier.get().stream() - .map(sf -> sf.getPath().getName()).collect(Collectors.toSet()); - Set compactedStoreFiles = new HashSet<>(); - for (HStoreFile storeFile : storeFiles) { - compactedStoreFiles.add(storeFile.getFileInfo().getPath().getName()); - for (String csf : storeFile.getCompactedStoreFiles()) { - if (notArchivedCompactedStoreFiles.contains(csf)) { - compactedStoreFiles.add(csf); - } - } + liveFileWriter.appendMetadata(maxSequenceId, majorCompaction, storeFiles); + if (historicalFileWriter != null) { + historicalFileWriter.appendMetadata(maxSequenceId, majorCompaction, storeFiles); } - return ProtobufUtil.toCompactionEventTrackerBytes(compactedStoreFiles); } /** @@ -235,10 +198,10 @@ private byte[] toCompactionEventTrackerBytes(Collection storeFiles) */ public void appendMetadata(final long maxSequenceId, final boolean majorCompaction, final long mobCellsCount) throws IOException { - writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); - writer.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction)); - writer.appendFileInfo(MOB_CELLS_COUNT, Bytes.toBytes(mobCellsCount)); - appendTrackedTimestampsToMetadata(); + liveFileWriter.appendMetadata(maxSequenceId, majorCompaction, mobCellsCount); + if (historicalFileWriter != null) { + historicalFileWriter.appendMetadata(maxSequenceId, majorCompaction, mobCellsCount); + } } /** @@ -247,7 +210,10 @@ public void appendMetadata(final long maxSequenceId, final boolean majorCompacti * @throws IOException problem writing to FS */ public void appendMobMetadata(SetMultimap mobRefSet) throws IOException { - writer.appendFileInfo(MOB_FILE_REFS, MobUtils.serializeMobFileRefs(mobRefSet)); + liveFileWriter.appendMobMetadata(mobRefSet); + if (historicalFileWriter != null) { + historicalFileWriter.appendMobMetadata(mobRefSet); + } } /** @@ -256,8 +222,10 @@ public void appendMobMetadata(SetMultimap mobRefSet) throws I public void appendTrackedTimestampsToMetadata() throws IOException { // TODO: The StoreFileReader always converts the byte[] to TimeRange // via TimeRangeTracker, so we should write the serialization data of TimeRange directly. - appendFileInfo(TIMERANGE_KEY, TimeRangeTracker.toByteArray(timeRangeTracker)); - appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); + liveFileWriter.appendTrackedTimestampsToMetadata(); + if (historicalFileWriter != null) { + historicalFileWriter.appendTrackedTimestampsToMetadata(); + } } /** @@ -265,147 +233,560 @@ public void appendTrackedTimestampsToMetadata() throws IOException { * to include the timestamp of this key */ public void trackTimestamps(final Cell cell) { - if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) { - earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); + liveFileWriter.trackTimestamps(cell); + if (historicalFileWriter != null) { + historicalFileWriter.trackTimestamps(cell); } - timeRangeTracker.includeTimestamp(cell); } - private void appendGeneralBloomfilter(final Cell cell) throws IOException { - if (this.generalBloomFilterWriter != null) { - /* - * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png - * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + Timestamp 3 Types of - * Filtering: 1. Row = Row 2. RowCol = Row + Qualifier 3. RowPrefixFixedLength = Fixed Length - * Row Prefix - */ - bloomContext.writeBloom(cell); + @Override + public void beforeShipped() throws IOException { + liveFileWriter.beforeShipped(); + if (historicalFileWriter != null) { + historicalFileWriter.beforeShipped(); } } - private void appendDeleteFamilyBloomFilter(final Cell cell) throws IOException { - if (!PrivateCellUtil.isDeleteFamily(cell) && !PrivateCellUtil.isDeleteFamilyVersion(cell)) { - return; - } + public Path getPath() { + return liveFileWriter.getPath(); + } - // increase the number of delete family in the store file - deleteFamilyCnt++; - if (this.deleteFamilyBloomFilterWriter != null) { - deleteFamilyBloomContext.writeBloom(cell); + public List getPaths() { + if (historicalFileWriter == null) { + return Lists.newArrayList(liveFileWriter.getPath()); } + return Lists.newArrayList(liveFileWriter.getPath(), historicalFileWriter.getPath()); } - @Override - public void append(final Cell cell) throws IOException { - appendGeneralBloomfilter(cell); - appendDeleteFamilyBloomFilter(cell); - writer.append(cell); - trackTimestamps(cell); + public boolean hasGeneralBloom() { + return liveFileWriter.hasGeneralBloom(); } - @Override - public void beforeShipped() throws IOException { - // For now these writer will always be of type ShipperListener true. - // TODO : Change all writers to be specifically created for compaction context - writer.beforeShipped(); - if (generalBloomFilterWriter != null) { - generalBloomFilterWriter.beforeShipped(); - } - if (deleteFamilyBloomFilterWriter != null) { - deleteFamilyBloomFilterWriter.beforeShipped(); + /** + * For unit testing only. + * @return the Bloom filter used by this writer. + */ + BloomFilterWriter getGeneralBloomWriter() { + return liveFileWriter.generalBloomFilterWriter; + } + + public void close() throws IOException { + LOG.info( + "closing liveFileWriter " + liveFileWriter.getPath() + " pos " + liveFileWriter.getPos()); + liveFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(true)); + liveFileWriter.close(); + if (historicalFileWriter != null) { + historicalFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(false)); + LOG.info("closing historicalFileWriter " + historicalFileWriter.getPath() + " pos " + + historicalFileWriter.getPos()); + historicalFileWriter.close(); } } - public Path getPath() { - return this.writer.getPath(); + public void appendFileInfo(byte[] key, byte[] value) throws IOException { + liveFileWriter.appendFileInfo(key, value); + if (historicalFileWriter != null) { + historicalFileWriter.appendFileInfo(key, value); + } } - public boolean hasGeneralBloom() { - return this.generalBloomFilterWriter != null; + /** + * For use in testing. + */ + HFile.Writer getHFileWriter() { + return liveFileWriter.getHFileWriter(); } /** - * For unit testing only. - * @return the Bloom filter used by this writer. + * @param dir Directory to create file in. + * @return random filename inside passed dir */ - BloomFilterWriter getGeneralBloomWriter() { - return generalBloomFilterWriter; + public static Path getUniqueFile(final FileSystem fs, final Path dir) throws IOException { + if (!fs.getFileStatus(dir).isDirectory()) { + throw new IOException("Expecting " + dir.toString() + " to be a directory"); + } + return new Path(dir, dash.matcher(UUID.randomUUID().toString()).replaceAll("")); } - private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException { - boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0); - if (haveBloom) { - bfw.compactBloom(); + private SingleStoreFileWriter getHistoricalFileWriter() throws IOException { + if (historicalFileWriter == null) { + historicalFileWriter = + new SingleStoreFileWriter(fs, historicalFilePath, conf, cacheConf, bloomType, maxKeys, + favoredNodes, fileContext, shouldDropCacheBehind, compactedFilesSupplier); } - return haveBloom; + return historicalFileWriter; + } + + private void initRowState() { + deleteFamily = null; + deleteFamilyVersionList.clear(); + lastCell = null; } - private boolean closeGeneralBloomFilter() throws IOException { - boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter); + private void initColumnState() { + livePutCellCount = 0; + deleteColumn = null; + deleteColumnVersionList.clear(); - // add the general Bloom filter writer and append file info - if (hasGeneralBloom) { - writer.addGeneralBloomFilter(generalBloomFilterWriter); - writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY, Bytes.toBytes(bloomType.toString())); - if (bloomParam != null) { - writer.appendFileInfo(BLOOM_FILTER_PARAM_KEY, bloomParam); + } + + private boolean isDeletedByDeleteFamily(Cell cell) { + return deleteFamily != null && (deleteFamily.getTimestamp() > cell.getTimestamp() + || (deleteFamily.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteFamily.getSequenceId()))); + } + + private boolean isDeletedByDeleteFamilyVersion(Cell cell) { + for (Cell deleteFamilyVersion : deleteFamilyVersionList) { + if ( + deleteFamilyVersion.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteFamilyVersion.getSequenceId()) + ) { + return true; } - bloomContext.addLastBloomKey(writer); } - return hasGeneralBloom; + return false; } - private boolean closeDeleteFamilyBloomFilter() throws IOException { - boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter); + private boolean isDeletedByDeleteColumn(Cell cell) { + return deleteColumn != null && (deleteColumn.getTimestamp() > cell.getTimestamp() + || (deleteColumn.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteColumn.getSequenceId()))); + } - // add the delete family Bloom filter writer - if (hasDeleteFamilyBloom) { - writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter); + private boolean isDeletedByDeleteColumnVersion(Cell cell) { + for (Cell deleteColumnVersion : deleteColumnVersionList) { + if ( + deleteColumnVersion.getTimestamp() == cell.getTimestamp() + && (!newVersionBehavior || cell.getSequenceId() < deleteColumnVersion.getSequenceId()) + ) { + return true; + } } + return false; + } - // append file info about the number of delete family kvs - // even if there is no delete family Bloom. - writer.appendFileInfo(DELETE_FAMILY_COUNT, Bytes.toBytes(this.deleteFamilyCnt)); + private boolean isDeleted(Cell cell) { + return isDeletedByDeleteFamily(cell) || isDeletedByDeleteColumn(cell) + || isDeletedByDeleteFamilyVersion(cell) || isDeletedByDeleteColumnVersion(cell); + } - return hasDeleteFamilyBloom; + private void appendCell(Cell cell) throws IOException { + if ((lastCell == null || !CellUtil.matchingColumn(lastCell, cell))) { + initColumnState(); + } + if (cell.getType() == Cell.Type.DeleteFamily) { + if (deleteFamily == null) { + deleteFamily = cell; + liveFileWriter.append(cell); + } else { + getHistoricalFileWriter().append(cell); + } + } else if (cell.getType() == Cell.Type.DeleteFamilyVersion) { + if (!isDeletedByDeleteFamily(cell)) { + deleteFamilyVersionList.add(cell); + if (deleteFamily != null && deleteFamily.getTimestamp() == cell.getTimestamp()) { + // This means both the delete-family and delete-family-version markers have the same + // timestamp but the sequence id of delete-family-version marker is higher than that of + // the delete-family marker. In this case, there is no need to add the + // delete-family-version marker to the live version file. This case happens only with + // the new version behavior. + liveFileWriter.append(cell); + } else { + liveFileWriter.append(cell); + } + } else { + getHistoricalFileWriter().append(cell); + } + } else if (cell.getType() == Cell.Type.DeleteColumn) { + if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { + deleteColumn = cell; + liveFileWriter.append(cell); + } else { + getHistoricalFileWriter().append(cell); + } + } else if (cell.getType() == Cell.Type.Delete) { + if (!isDeletedByDeleteFamily(cell) && deleteColumn == null) { + deleteColumnVersionList.add(cell); + if (deleteFamily != null && deleteFamily.getTimestamp() == cell.getTimestamp()) { + // This means both the delete-family and delete-column-version markers have the same + // timestamp but the sequence id of delete-column-version marker is higher than that of + // the delete-family marker. In this case, there is no need to add the + // delete-column-version marker to the live version file. This case happens only with + // the new version behavior. + getHistoricalFileWriter().append(cell); + } else { + liveFileWriter.append(cell); + } + } else { + getHistoricalFileWriter().append(cell); + } + } else if (cell.getType() == Cell.Type.Put) { + if (livePutCellCount < maxVersions) { + // This is a live put cell (i.e., the latest version) of a column. Is it deleted? + if (!isDeleted(cell)) { + liveFileWriter.append(cell); + livePutCellCount++; + } else { + // It is deleted + getHistoricalFileWriter().append(cell); + } + } else { + // It is an older put cell + getHistoricalFileWriter().append(cell); + } + } + lastCell = cell; } - public void close() throws IOException { - boolean hasGeneralBloom = this.closeGeneralBloomFilter(); - boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter(); + @Override + public void appendAll(List cellList) throws IOException { + if (historicalFilePath == null) { + // The dual writing is not enabled and all cells are written to one file. We use + // the live version file in this case + for (Cell cell : cellList) { + liveFileWriter.append(cell); + } + return; + } + if (cellList.isEmpty()) { + return; + } + if (lastCell != null && comparator.compareRows(lastCell, cellList.get(0)) != 0) { + // It is a new row and thus time to reset the state + initRowState(); + } + for (Cell cell : cellList) { + appendCell(cell); + } + } + + @Override + public void append(Cell cell) throws IOException { + if (historicalFilePath == null) { + // The dual writing is not enabled and all cells are written to one file. We use + // the live version file in this case + liveFileWriter.append(cell); + return; + } + appendCell(cell); + } + + private static class SingleStoreFileWriter { + private final BloomFilterWriter generalBloomFilterWriter; + private final BloomFilterWriter deleteFamilyBloomFilterWriter; + private final BloomType bloomType; + private byte[] bloomParam = null; + private long earliestPutTs = HConstants.LATEST_TIMESTAMP; + private long deleteFamilyCnt = 0; + private BloomContext bloomContext = null; + private BloomContext deleteFamilyBloomContext = null; + private final TimeRangeTracker timeRangeTracker; + private final Supplier> compactedFilesSupplier; - writer.close(); + private HFile.Writer writer; - // Log final Bloom filter statistics. This needs to be done after close() - // because compound Bloom filters might be finalized as part of closing. - if (LOG.isTraceEnabled()) { - LOG.trace( - (hasGeneralBloom ? "" : "NO ") + "General Bloom and " + (hasDeleteFamilyBloom ? "" : "NO ") - + "DeleteFamily" + " was added to HFile " + getPath()); + /** + * Creates an HFile.Writer that also write helpful meta data. + * @param fs file system to write to + * @param path file name to create + * @param conf user configuration + * @param bloomType bloom filter setting + * @param maxKeys the expected maximum number of keys to be added. Was used for + * Bloom filter size in {@link HFile} format version 1. + * @param favoredNodes an array of favored nodes or possibly null + * @param fileContext The HFile context + * @param shouldDropCacheBehind Drop pages written to page cache after writing the store file. + * @param compactedFilesSupplier Returns the {@link HStore} compacted files which not archived + * @throws IOException problem writing to FS + */ + private SingleStoreFileWriter(FileSystem fs, Path path, final Configuration conf, + CacheConfig cacheConf, BloomType bloomType, long maxKeys, InetSocketAddress[] favoredNodes, + HFileContext fileContext, boolean shouldDropCacheBehind, + Supplier> compactedFilesSupplier) throws IOException { + this.compactedFilesSupplier = compactedFilesSupplier; + this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); + // TODO : Change all writers to be specifically created for compaction context + writer = + HFile.getWriterFactory(conf, cacheConf).withPath(fs, path).withFavoredNodes(favoredNodes) + .withFileContext(fileContext).withShouldDropCacheBehind(shouldDropCacheBehind).create(); + + generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, + bloomType, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); + + if (generalBloomFilterWriter != null) { + this.bloomType = bloomType; + this.bloomParam = BloomFilterUtil.getBloomFilterParam(bloomType, conf); + if (LOG.isTraceEnabled()) { + LOG.trace("Bloom filter type for " + path + ": " + this.bloomType + ", param: " + + (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH + ? Bytes.toInt(bloomParam) + : Bytes.toStringBinary(bloomParam)) + + ", " + generalBloomFilterWriter.getClass().getSimpleName()); + } + // init bloom context + switch (bloomType) { + case ROW: + bloomContext = + new RowBloomContext(generalBloomFilterWriter, fileContext.getCellComparator()); + break; + case ROWCOL: + bloomContext = + new RowColBloomContext(generalBloomFilterWriter, fileContext.getCellComparator()); + break; + case ROWPREFIX_FIXED_LENGTH: + bloomContext = new RowPrefixFixedLengthBloomContext(generalBloomFilterWriter, + fileContext.getCellComparator(), Bytes.toInt(bloomParam)); + break; + default: + throw new IOException( + "Invalid Bloom filter type: " + bloomType + " (ROW or ROWCOL or ROWPREFIX expected)"); + } + } else { + // Not using Bloom filters. + this.bloomType = BloomType.NONE; + } + + // initialize delete family Bloom filter when there is NO RowCol Bloom filter + if (this.bloomType != BloomType.ROWCOL) { + this.deleteFamilyBloomFilterWriter = BloomFilterFactory.createDeleteBloomAtWrite(conf, + cacheConf, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); + deleteFamilyBloomContext = + new RowBloomContext(deleteFamilyBloomFilterWriter, fileContext.getCellComparator()); + } else { + deleteFamilyBloomFilterWriter = null; + } + if (deleteFamilyBloomFilterWriter != null && LOG.isTraceEnabled()) { + LOG.trace("Delete Family Bloom filter type for " + path + ": " + + deleteFamilyBloomFilterWriter.getClass().getSimpleName()); + } } - } + private long getPos() throws IOException { + return ((HFileWriterImpl) writer).getPos(); + } - public void appendFileInfo(byte[] key, byte[] value) throws IOException { - writer.appendFileInfo(key, value); - } + /** + * Writes meta data. Call before {@link #close()} since its written as meta data to this file. + * @param maxSequenceId Maximum sequence id. + * @param majorCompaction True if this file is product of a major compaction + * @throws IOException problem writing to FS + */ + private void appendMetadata(final long maxSequenceId, final boolean majorCompaction) + throws IOException { + appendMetadata(maxSequenceId, majorCompaction, Collections.emptySet()); + } - /** - * For use in testing. - */ - HFile.Writer getHFileWriter() { - return writer; - } + /** + * Writes meta data. Call before {@link #close()} since its written as meta data to this file. + * @param maxSequenceId Maximum sequence id. + * @param majorCompaction True if this file is product of a major compaction + * @param storeFiles The compacted store files to generate this new file + * @throws IOException problem writing to FS + */ + private void appendMetadata(final long maxSequenceId, final boolean majorCompaction, + final Collection storeFiles) throws IOException { + writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); + writer.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction)); + writer.appendFileInfo(COMPACTION_EVENT_KEY, toCompactionEventTrackerBytes(storeFiles)); + appendTrackedTimestampsToMetadata(); + } - /** - * @param dir Directory to create file in. - * @return random filename inside passed dir - */ - public static Path getUniqueFile(final FileSystem fs, final Path dir) throws IOException { - if (!fs.getFileStatus(dir).isDirectory()) { - throw new IOException("Expecting " + dir.toString() + " to be a directory"); + /** + * Used when write {@link HStoreFile#COMPACTION_EVENT_KEY} to new file's file info. The + * compacted store files's name is needed. But if the compacted store file is a result of + * compaction, it's compacted files which still not archived is needed, too. And don't need to + * add compacted files recursively. If file A, B, C compacted to new file D, and file D + * compacted to new file E, will write A, B, C, D to file E's compacted files. So if file E + * compacted to new file F, will add E to F's compacted files first, then add E's compacted + * files: A, B, C, D to it. And no need to add D's compacted file, as D's compacted files has + * been in E's compacted files, too. See HBASE-20724 for more details. + * @param storeFiles The compacted store files to generate this new file + * @return bytes of CompactionEventTracker + */ + private byte[] toCompactionEventTrackerBytes(Collection storeFiles) { + Set notArchivedCompactedStoreFiles = this.compactedFilesSupplier.get().stream() + .map(sf -> sf.getPath().getName()).collect(Collectors.toSet()); + Set compactedStoreFiles = new HashSet<>(); + for (HStoreFile storeFile : storeFiles) { + compactedStoreFiles.add(storeFile.getFileInfo().getPath().getName()); + for (String csf : storeFile.getCompactedStoreFiles()) { + if (notArchivedCompactedStoreFiles.contains(csf)) { + compactedStoreFiles.add(csf); + } + } + } + return ProtobufUtil.toCompactionEventTrackerBytes(compactedStoreFiles); + } + + /** + * Writes meta data. Call before {@link #close()} since its written as meta data to this file. + * @param maxSequenceId Maximum sequence id. + * @param majorCompaction True if this file is product of a major compaction + * @param mobCellsCount The number of mob cells. + * @throws IOException problem writing to FS + */ + private void appendMetadata(final long maxSequenceId, final boolean majorCompaction, + final long mobCellsCount) throws IOException { + writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); + writer.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction)); + writer.appendFileInfo(MOB_CELLS_COUNT, Bytes.toBytes(mobCellsCount)); + appendTrackedTimestampsToMetadata(); + } + + /** + * Appends MOB - specific metadata (even if it is empty) + * @param mobRefSet - original table -> set of MOB file names + * @throws IOException problem writing to FS + */ + private void appendMobMetadata(SetMultimap mobRefSet) throws IOException { + writer.appendFileInfo(MOB_FILE_REFS, MobUtils.serializeMobFileRefs(mobRefSet)); + } + + /** + * Add TimestampRange and earliest put timestamp to Metadata + */ + private void appendTrackedTimestampsToMetadata() throws IOException { + // TODO: The StoreFileReader always converts the byte[] to TimeRange + // via TimeRangeTracker, so we should write the serialization data of TimeRange directly. + appendFileInfo(TIMERANGE_KEY, TimeRangeTracker.toByteArray(timeRangeTracker)); + appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); + } + + /** + * Record the earlest Put timestamp. If the timeRangeTracker is not set, update TimeRangeTracker + * to include the timestamp of this key + */ + private void trackTimestamps(final Cell cell) { + if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) { + earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); + } + timeRangeTracker.includeTimestamp(cell); + } + + private void appendGeneralBloomfilter(final Cell cell) throws IOException { + if (this.generalBloomFilterWriter != null) { + /* + * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue. + * png Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + Timestamp 3 Types of + * Filtering: 1. Row = Row 2. RowCol = Row + Qualifier 3. RowPrefixFixedLength = Fixed + * Length Row Prefix + */ + bloomContext.writeBloom(cell); + } + } + + private void appendDeleteFamilyBloomFilter(final Cell cell) throws IOException { + if (!PrivateCellUtil.isDeleteFamily(cell) && !PrivateCellUtil.isDeleteFamilyVersion(cell)) { + return; + } + + // increase the number of delete family in the store file + deleteFamilyCnt++; + if (this.deleteFamilyBloomFilterWriter != null) { + deleteFamilyBloomContext.writeBloom(cell); + } + } + + private void append(final Cell cell) throws IOException { + appendGeneralBloomfilter(cell); + appendDeleteFamilyBloomFilter(cell); + writer.append(cell); + trackTimestamps(cell); + } + + private void beforeShipped() throws IOException { + // For now these writer will always be of type ShipperListener true. + // TODO : Change all writers to be specifically created for compaction context + writer.beforeShipped(); + if (generalBloomFilterWriter != null) { + generalBloomFilterWriter.beforeShipped(); + } + if (deleteFamilyBloomFilterWriter != null) { + deleteFamilyBloomFilterWriter.beforeShipped(); + } + } + + private Path getPath() { + return this.writer.getPath(); + } + + private boolean hasGeneralBloom() { + return this.generalBloomFilterWriter != null; + } + + /** + * For unit testing only. + * @return the Bloom filter used by this writer. + */ + BloomFilterWriter getGeneralBloomWriter() { + return generalBloomFilterWriter; + } + + private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException { + boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0); + if (haveBloom) { + bfw.compactBloom(); + } + return haveBloom; + } + + private boolean closeGeneralBloomFilter() throws IOException { + boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter); + + // add the general Bloom filter writer and append file info + if (hasGeneralBloom) { + writer.addGeneralBloomFilter(generalBloomFilterWriter); + writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY, Bytes.toBytes(bloomType.toString())); + if (bloomParam != null) { + writer.appendFileInfo(BLOOM_FILTER_PARAM_KEY, bloomParam); + } + bloomContext.addLastBloomKey(writer); + } + return hasGeneralBloom; + } + + private boolean closeDeleteFamilyBloomFilter() throws IOException { + boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter); + + // add the delete family Bloom filter writer + if (hasDeleteFamilyBloom) { + writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter); + } + + // append file info about the number of delete family kvs + // even if there is no delete family Bloom. + writer.appendFileInfo(DELETE_FAMILY_COUNT, Bytes.toBytes(this.deleteFamilyCnt)); + + return hasDeleteFamilyBloom; + } + + private void close() throws IOException { + boolean hasGeneralBloom = this.closeGeneralBloomFilter(); + boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter(); + + writer.close(); + + // Log final Bloom filter statistics. This needs to be done after close() + // because compound Bloom filters might be finalized as part of closing. + if (LOG.isTraceEnabled()) { + LOG.trace((hasGeneralBloom ? "" : "NO ") + "General Bloom and " + + (hasDeleteFamilyBloom ? "" : "NO ") + "DeleteFamily" + " was added to HFile " + + getPath()); + } + + } + + private void appendFileInfo(byte[] key, byte[] value) throws IOException { + writer.appendFileInfo(key, value); + } + + /** + * For use in testing. + */ + private HFile.Writer getHFileWriter() { + return writer; } - return new Path(dir, dash.matcher(UUID.randomUUID().toString()).replaceAll("")); } @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "ICAST_INTEGER_MULTIPLY_CAST_TO_LONG", @@ -418,7 +799,9 @@ public static class Builder { private BloomType bloomType = BloomType.NONE; private long maxKeyCount = 0; private Path dir; - private Path filePath; + private Path liveFilePath; + private Path historicalFilePath; + private InetSocketAddress[] favoredNodes; private HFileContext fileContext; private boolean shouldDropCacheBehind; @@ -430,6 +813,10 @@ public static class Builder { // store files which are not recorded in the SFT, but for the newly created store file writer, // they are not tracked in SFT, so here we need to record them and treat them specially. private Consumer writerCreationTracker; + private int maxVersions; + private boolean newVersionBehavior; + private CellComparator comparator; + private boolean isCompaction; public Builder(Configuration conf, CacheConfig cacheConf, FileSystem fs) { this.conf = conf; @@ -465,7 +852,7 @@ public Builder withOutputDir(Path dir) { */ public Builder withFilePath(Path filePath) { Preconditions.checkNotNull(filePath); - this.filePath = filePath; + this.liveFilePath = filePath; return this; } @@ -519,17 +906,37 @@ public Builder withWriterCreationTracker(Consumer writerCreationTracker) { return this; } + public Builder withMaxVersions(int maxVersions) { + this.maxVersions = maxVersions; + return this; + } + + public Builder withNewVersionBehavior(boolean newVersionBehavior) { + this.newVersionBehavior = newVersionBehavior; + return this; + } + + public Builder withCellComparator(CellComparator comparator) { + this.comparator = comparator; + return this; + } + + public Builder withIsCompaction(boolean isCompaction) { + this.isCompaction = isCompaction; + return this; + } + /** * Create a store file writer. Client is responsible for closing file when done. If metadata, * add BEFORE closing using {@link StoreFileWriter#appendMetadata}. */ public StoreFileWriter build() throws IOException { - if ((dir == null ? 0 : 1) + (filePath == null ? 0 : 1) != 1) { + if ((dir == null ? 0 : 1) + (liveFilePath == null ? 0 : 1) != 1) { throw new IllegalArgumentException("Either specify parent directory " + "or file path"); } if (dir == null) { - dir = filePath.getParent(); + dir = liveFilePath.getParent(); } if (!fs.exists(dir)) { @@ -545,7 +952,7 @@ public StoreFileWriter build() throws IOException { } CommonFSUtils.setStoragePolicy(this.fs, dir, policyName); - if (filePath == null) { + if (liveFilePath == null) { // The stored file and related blocks will used the directory based StoragePolicy. // Because HDFS DistributedFileSystem does not support create files with storage policy // before version 3.3.0 (See HDFS-13209). Use child dir here is to make stored files @@ -560,21 +967,37 @@ public StoreFileWriter build() throws IOException { } CommonFSUtils.setStoragePolicy(this.fs, dir, fileStoragePolicy); } - filePath = getUniqueFile(fs, dir); + liveFilePath = getUniqueFile(fs, dir); if (!BloomFilterFactory.isGeneralBloomEnabled(conf)) { bloomType = BloomType.NONE; } } + + if ( + isCompaction && conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, + DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES) + ) { + historicalFilePath = getUniqueFile(fs, dir); + LOG.info("Dual file compaction is enabled liveFilePath " + liveFilePath + + " historicalFilePath " + historicalFilePath); + } else { + LOG.info("Dual file compaction is not enabled liveFilePath " + liveFilePath); + } + // make sure we call this before actually create the writer // in fact, it is not a big deal to even add an inexistent file to the track, as we will never // try to delete it and finally we will clean the tracker up after compaction. But if the file // cleaner find the file but we haven't recorded it yet, it may accidentally delete the file // and cause problem. if (writerCreationTracker != null) { - writerCreationTracker.accept(filePath); + writerCreationTracker.accept(liveFilePath); + if (historicalFilePath != null) { + writerCreationTracker.accept(historicalFilePath); + } } - return new StoreFileWriter(fs, filePath, conf, cacheConf, bloomType, maxKeyCount, - favoredNodes, fileContext, shouldDropCacheBehind, compactedFilesSupplier); + return new StoreFileWriter(fs, liveFilePath, historicalFilePath, conf, cacheConf, bloomType, + maxKeyCount, favoredNodes, fileContext, shouldDropCacheBehind, compactedFilesSupplier, + comparator, maxVersions, newVersionBehavior); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index eabb65ad2802..89d4aa34e78c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -225,7 +225,7 @@ private void addCurrentScanners(List scanners) { } private static boolean isOnlyLatestVersionScan(Scan scan) { - // No need to check for Scan#getMaxVersions because live version files generated by dual file + // No need to check for Scan#getMaxVersions because live version files generated by store file // writer retains max versions specified in ColumnFamilyDescriptor for the given CF return !scan.isRaw() && scan.getTimeRange().getMax() == HConstants.LATEST_TIMESTAMP; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java index e482009f84a4..ea15c320d35c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hbase.regionserver.compactions; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_ENABLE_DUAL_FILE_WRITER; +import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES; +import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; @@ -147,12 +147,12 @@ public class CompactionConfiguration { minFilesToCompact = Math.max(2, conf.getInt(HBASE_HSTORE_COMPACTION_MIN_KEY, conf.getInt(HBASE_HSTORE_COMPACTION_MIN_KEY_OLD, 3))); if ( - conf.getBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, - DEFAULT_ENABLE_DUAL_FILE_WRITER) + conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, + DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES) ) { - // If DualFileWriter is enabled, we bump up the min value by one as DualFileWriter compacts - // files into two files, live and historical, instead of one. This also eliminates infinite - // re-compaction when the min value is set to 2 + // If historical file writing is enabled, we bump up the min value by one as DualFileWriter + // compacts files into two files, live and historical, instead of one. This also eliminates + // infinite re-compaction when the min value is set to 2 minFilesToCompact += 1; } maxFilesToCompact = conf.getInt(HBASE_HSTORE_COMPACTION_MAX_KEY, 10); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java index 715b12c36aa0..e58c53c355f4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java @@ -105,7 +105,7 @@ public abstract class Compactor { Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>())); // TODO: depending on Store is not good but, realistically, all compactors currently do. - protected Compactor(Configuration conf, HStore store) { + Compactor(Configuration conf, HStore store) { this.conf = conf; this.store = store; this.compactionKVMax = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java index 16d2f5a135e1..bcc84230952f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/DefaultCompactor.java @@ -17,45 +17,40 @@ */ package org.apache.hadoop.hbase.regionserver.compactions; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_ENABLE_DUAL_FILE_WRITER; - import java.io.IOException; import java.util.List; import java.util.function.Consumer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.regionserver.DualFileWriter; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.InternalScanner; +import org.apache.hadoop.hbase.regionserver.StoreFileWriter; import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; import org.apache.hadoop.hbase.security.User; import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Compact passed set of files. Create an instance and then call * {@link #compact(CompactionRequestImpl, ThroughputController, User)} */ @InterfaceAudience.Private -public class DefaultCompactor extends AbstractMultiOutputCompactor { +public class DefaultCompactor extends Compactor { + private static final Logger LOG = LoggerFactory.getLogger(DefaultCompactor.class); public DefaultCompactor(Configuration conf, HStore store) { super(conf, store); } - private final CellSinkFactory writerFactory = - new CellSinkFactory() { + private final CellSinkFactory writerFactory = + new CellSinkFactory() { @Override - public DualFileWriter createWriter(InternalScanner scanner, FileDetails fd, + public StoreFileWriter createWriter(InternalScanner scanner, FileDetails fd, boolean shouldDropBehind, boolean major, Consumer writerCreationTracker) throws IOException { - boolean enableDualFileWriter = conf.getBoolean( - DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, DEFAULT_ENABLE_DUAL_FILE_WRITER); - DualFileWriter writer = new DualFileWriter(store.getComparator(), - store.getColumnFamilyDescriptor().getMaxVersions(), enableDualFileWriter, - store.getColumnFamilyDescriptor().isNewVersionBehavior()); - initMultiWriter(writer, scanner, fd, shouldDropBehind, major, writerCreationTracker); - return writer; + return DefaultCompactor.this.createWriter(fd, shouldDropBehind, major, + writerCreationTracker); } }; @@ -67,10 +62,31 @@ public List compact(final CompactionRequestImpl request, return compact(request, defaultScannerFactory, writerFactory, throughputController, user); } - protected List commitWriter(DualFileWriter writer, FileDetails fd, + @Override + protected List commitWriter(StoreFileWriter writer, FileDetails fd, CompactionRequestImpl request) throws IOException { - List pathList = - writer.commitWriters(fd.maxSeqId, request.isAllFiles(), request.getFiles()); - return pathList; + List newFiles = writer.getPaths(); + writer.appendMetadata(fd.maxSeqId, request.isAllFiles(), request.getFiles()); + writer.close(); + return newFiles; } + + @Override + protected final void abortWriter(StoreFileWriter writer) throws IOException { + List leftoverFiles = writer.getPaths(); + try { + writer.close(); + } catch (IOException e) { + LOG.warn("Failed to close the writer after an unfinished compaction.", e); + } + try { + for (Path path : leftoverFiles) { + store.getFileSystem().delete(path, false); + } + } catch (IOException e) { + LOG.warn("Failed to delete the leftover file {} after an unfinished compaction.", + leftoverFiles, e); + } + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileTrackerBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileTrackerBase.java index bdf3b92db65d..794a707062e5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileTrackerBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileTrackerBase.java @@ -185,7 +185,9 @@ public final StoreFileWriter createWriter(CreateStoreFileWriterParams params) th .withFileContext(hFileContext).withShouldDropCacheBehind(params.shouldDropBehind()) .withCompactedFilesSupplier(ctx.getCompactedFilesSupplier()) .withFileStoragePolicy(params.fileStoragePolicy()) - .withWriterCreationTracker(params.writerCreationTracker()); + .withWriterCreationTracker(params.writerCreationTracker()) + .withMaxVersions(ctx.getMaxVersions()).withNewVersionBehavior(ctx.getNewVersionBehavior()) + .withCellComparator(ctx.getComparator()).withIsCompaction(params.isCompaction()); return builder.build(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java index 2c7496023439..c0bc72079cb7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java @@ -20,7 +20,6 @@ import static org.apache.hadoop.hbase.HBaseTestingUtil.START_KEY; import static org.apache.hadoop.hbase.HBaseTestingUtil.START_KEY_BYTES; import static org.apache.hadoop.hbase.HBaseTestingUtil.fam1; -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; import static org.apache.hadoop.hbase.regionserver.Store.PRIORITY_USER; import static org.apache.hadoop.hbase.regionserver.compactions.CloseChecker.SIZE_LIMIT_KEY; import static org.apache.hadoop.hbase.regionserver.compactions.CloseChecker.TIME_LIMIT_KEY; @@ -37,7 +36,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -87,8 +85,6 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -96,7 +92,6 @@ /** * Test compaction framework and common functions */ -@RunWith(Parameterized.class) @Category({ RegionServerTests.class, MediumTests.class }) public class TestCompaction { @@ -119,14 +114,6 @@ public class TestCompaction { private static final long MAX_FILES_TO_COMPACT = 10; private final byte[] FAMILY = Bytes.toBytes("cf"); - @Parameterized.Parameters(name = "{index}: enableDualFileWriter={0}") - public static Iterable data() { - return Arrays.asList(new Object[] { true }, new Object[] { false }); - } - - @Parameterized.Parameter - public boolean enableDualFileWriter; - /** constructor */ public TestCompaction() { super(); @@ -137,7 +124,6 @@ public TestCompaction() { conf.setLong(HConstants.COMPACTION_SCANNER_SIZE_MAX, 10L); conf.set(CompactionThroughputControllerFactory.HBASE_THROUGHPUT_CONTROLLER_KEY, NoLimitThroughputController.class.getName()); - conf.setBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, enableDualFileWriter); compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3); secondRowBytes = START_KEY_BYTES.clone(); @@ -150,9 +136,8 @@ public TestCompaction() { @Before public void setUp() throws Exception { - TableDescriptorBuilder builder = - UTIL.createModifyableTableDescriptor(name.getMethodName().replaceAll("[^A-Za-z0-9-_]", "_")); - if (name.getMethodName().startsWith("testCompactionSeqId")) { + TableDescriptorBuilder builder = UTIL.createModifyableTableDescriptor(name.getMethodName()); + if (name.getMethodName().equals("testCompactionSeqId")) { UTIL.getConfiguration().set("hbase.hstore.compaction.kv.max", "10"); UTIL.getConfiguration().set(DefaultStoreEngine.DEFAULT_COMPACTOR_CLASS_KEY, DummyCompactor.class.getName()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java index b72b5d822ce7..ad478bceac71 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java @@ -127,9 +127,9 @@ public MyCompactor(Configuration conf, HStore store) { } @Override - protected List commitWriter(DualFileWriter writer, FileDetails fd, + protected List commitWriter(StoreFileWriter writer, FileDetails fd, CompactionRequestImpl request) throws IOException { - HFileWriterImpl writerImpl = (HFileWriterImpl) writer.getLiveVersionHFileWriter(); + HFileWriterImpl writerImpl = (HFileWriterImpl) writer.getHFileWriter(); Cell cell = writerImpl.getLastCell(); // The cell should be backend with an KeyOnlyKeyValue. IS_LAST_CELL_ON_HEAP.set(cell instanceof KeyOnlyKeyValue); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java new file mode 100644 index 000000000000..cb0a5a9938af --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java @@ -0,0 +1,462 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Random; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Store file writer does not do any compaction. Each cell written to either the live or historical + * file. Regular (i.e., not-raw) scans that reads the latest put cells scans only live files. To + * ensure the correctness of store file writer, we need to verify that live files includes all live + * cells. This test indirectly verify this as follows. The test creates two tables, each with one + * region and one store. The dual file writing (live vs historical) is configured on only one of the + * tables. The test generates exact set of mutations on both tables. These mutations include all + * types of cells and these cells are written to multiple files using multiple memstore flushes. + * After writing all cells, the test first verify that both tables return the same set of cells for + * regular and raw scans. Then the same verification is done after tables are minor and finally + * major compacted. The test also verifies that flushes do not generate historical files and the + * historical files are generated only when historical file generation is enabled (by the config + * hbase.enable.historical.compaction.files). The test maintains the information about cells + * inserted in memory and compares in memory state with the state on disk. The mismatches are + * currently logged only now instead of asserting on them as the test finds inconsistencies. These + * inconsistencies (data integrity issues) are due to mishandling of version delete markers + * currently in HBase (see HBASE-XXXXXX). + */ +@Category({ MediumTests.class, RegionServerTests.class }) +@RunWith(Parameterized.class) +public class TestStoreFileWriter { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestStoreFileWriter.class); + private static final Logger LOG = LoggerFactory.getLogger(RegionScannerImpl.class); + private final int ROW_NUM = 100; + private final Random RANDOM = new Random(11); + private final HBaseTestingUtil testUtil = new HBaseTestingUtil(); + private HRegion[] regions = new HRegion[2]; + private final byte[][] qualifiers = + { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") }; + private ArrayList>> insertedCells; + private TableName[] tableName = new TableName[2]; + private final Configuration conf = testUtil.getConfiguration(); + private int flushCount = 0; + + @Parameterized.Parameter(0) + public KeepDeletedCells keepDeletedCells; + @Parameterized.Parameter(1) + public int maxVersions; + + @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}") + public static synchronized Collection data() { + return Arrays.asList(new Object[][] { { KeepDeletedCells.FALSE, 1 }, + { KeepDeletedCells.FALSE, 2 }, { KeepDeletedCells.FALSE, 2 }, { KeepDeletedCells.TRUE, 1 }, + { KeepDeletedCells.TRUE, 2 }, { KeepDeletedCells.TRUE, 3 } }); + } + + private static class CellInfo { + long timestamp; + Cell.Type type; + int flushCount; + + CellInfo(long timestamp, Cell.Type type, int flushCount) { + this.timestamp = timestamp; + this.type = type; + this.flushCount = flushCount; + } + } + + private void createTable(int index, boolean enableDualFileWriter) throws IOException { + tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index); + ColumnFamilyDescriptor familyDescriptor = + ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions) + .setKeepDeletedCells(keepDeletedCells).build(); + TableDescriptorBuilder builder = + TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor) + .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter)); + testUtil.createTable(builder.build(), null); + regions[index] = testUtil.getMiniHBaseCluster().getRegions(tableName[index]).get(0); + } + + @Before + public void setUp() throws Exception { + conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6); + testUtil.startMiniCluster(); + createTable(0, false); + createTable(1, true); + insertedCells = new ArrayList<>(ROW_NUM); + for (int r = 0; r < ROW_NUM; r++) { + insertedCells.add(new ArrayList<>(qualifiers.length)); + for (int q = 0; q < qualifiers.length; q++) { + insertedCells.get(r).add(new ArrayList<>(10)); + } + } + } + + @After + public void tearDown() throws Exception { + this.testUtil.shutdownMiniCluster(); + testUtil.cleanupTestDir(); + } + + @Test + public void testCompactedFiles() throws Exception { + Scan scan = new Scan(); + scan.readAllVersions(); + + for (int i = 0; i < 10; i++) { + putRows(ROW_NUM / 2); + deleteRows(ROW_NUM / 8); + deleteRowVersions(ROW_NUM / 8); + deleteColumns(ROW_NUM / 8); + deleteColumnVersions(ROW_NUM / 8); + flushRegion(); + } + + verifyCells(scan, getLiveCellCount(), getAllCellCount(), "Flush"); + + HStore[] stores = new HStore[2]; + + stores[0] = regions[0].getStore(HBaseTestingUtil.fam1); + assertEquals(flushCount, stores[0].getStorefilesCount()); + + stores[1] = regions[1].getStore(HBaseTestingUtil.fam1); + assertEquals(flushCount, stores[1].getStorefilesCount()); + + regions[0].compact(false); + assertEquals(flushCount - stores[0].getCompactedFiles().size() + 1, + stores[0].getStorefilesCount()); + + regions[1].compact(false); + assertEquals(flushCount - stores[1].getCompactedFiles().size() + 2, + stores[1].getStorefilesCount()); + + verifyCells(scan, getLiveCellCount(), getAllCellCount(), "Minor Compaction"); + + regions[0].compact(true); + assertEquals(1, stores[0].getStorefilesCount()); + + regions[1].compact(true); + assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2, + stores[1].getStorefilesCount()); + + verifyCells(scan, getLiveCellCount(), + keepDeletedCells == KeepDeletedCells.FALSE ? getLiveCellCount() : getAllCellCount(), + "Major Compaction"); + } + + private void verifyCells(Scan scan, int expectedLiveCellCount, int expectedAllCellCount, + String phase) throws Exception { + scan.setRaw(false); + LOG.info("[" + phase + "] Live cell count expected: " + expectedLiveCellCount + " actual: " + + scanAndVerifyAndCountCells(regions[0])); + scan.setRaw(true); + LOG.info("[" + phase + "] All cell count expected: " + expectedAllCellCount + " actual: " + + scanAndCompareAndCountCells(regions[0], regions[1], scan)); + } + + private int getLiveCellCount(int row, int q) { + int count = 0; + List cellTypeList = insertedCells.get(row).get(q); + for (int version = 1; version <= maxVersions; version++) { + if (getPutCellTimestamp(cellTypeList, version) != -1) { + count++; + } else { + break; + } + } + return count; + } + + private int getLiveCellCount(int row) { + int count = 0; + for (int q = 0; q < qualifiers.length; q++) { + count += getLiveCellCount(row, q); + } + return count; + } + + private int getLiveCellCount() { + int count = 0; + for (int r = 0; r < ROW_NUM; r++) { + count += getLiveCellCount(r); + } + return count; + } + + private int getAllCellCount() { + int count = 0; + for (int r = 0; r < ROW_NUM; r++) { + for (int q = 0; q < qualifiers.length; q++) { + count += insertedCells.get(r).get(q).size(); + } + } + return count; + } + + private void flushRegion() throws Exception { + regions[0].flush(true); + regions[1].flush(true); + flushCount++; + } + + private Long getRowTimestamp(int row) { + Long maxTimestamp = null; + for (int q = 0; q < qualifiers.length; q++) { + int size = insertedCells.get(row).get(q).size(); + if (size > 0) { + CellInfo mostRecentCellInfo = insertedCells.get(row).get(q).get(size - 1); + if (mostRecentCellInfo.type == Cell.Type.Put) { + if (maxTimestamp == null || maxTimestamp < mostRecentCellInfo.timestamp) { + maxTimestamp = mostRecentCellInfo.timestamp; + } + } + } + } + return maxTimestamp; + } + + private void putRows(int rowCount) throws Exception { + int row; + long timestamp = System.currentTimeMillis(); + for (int r = 0; r < rowCount; r++) { + row = RANDOM.nextInt(ROW_NUM); + Put put = new Put(Bytes.toBytes(String.valueOf(row)), timestamp); + for (int q = 0; q < qualifiers.length; q++) { + put.addColumn(HBaseTestingUtil.fam1, qualifiers[q], + Bytes.toBytes(String.valueOf(timestamp))); + insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put, flushCount)); + } + regions[0].put(put); + regions[1].put(put); + long newTimestamp = System.currentTimeMillis(); + if (timestamp == newTimestamp) { + Thread.sleep(1); + newTimestamp = System.currentTimeMillis(); + assert (timestamp < newTimestamp); + } + timestamp = newTimestamp; + } + } + + private void deleteRows(int rowCount) throws Exception { + int row; + for (int r = 0; r < rowCount; r++) { + long timestamp = System.currentTimeMillis(); + row = RANDOM.nextInt(ROW_NUM); + Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); + regions[0].delete(delete); + regions[1].delete(delete); + for (int q = 0; q < qualifiers.length; q++) { + insertedCells.get(row).get(q) + .add(new CellInfo(timestamp, Cell.Type.DeleteFamily, flushCount)); + } + } + } + + private void deleteSingleRowVersion(int row, long timestamp) throws IOException { + Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); + delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp); + regions[0].delete(delete); + regions[1].delete(delete); + for (int q = 0; q < qualifiers.length; q++) { + insertedCells.get(row).get(q) + .add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion, flushCount)); + } + } + + private void deleteRowVersions(int rowCount) throws Exception { + int row; + for (int r = 0; r < rowCount; r++) { + row = RANDOM.nextInt(ROW_NUM); + Long timestamp = getRowTimestamp(row); + if (timestamp != null) { + deleteSingleRowVersion(row, timestamp); + } + } + // Just insert one more delete marker possibly does not delete any row version + row = RANDOM.nextInt(ROW_NUM); + deleteSingleRowVersion(row, System.currentTimeMillis()); + } + + private void deleteColumns(int rowCount) throws Exception { + int row; + for (int r = 0; r < rowCount; r++) { + long timestamp = System.currentTimeMillis(); + row = RANDOM.nextInt(ROW_NUM); + int q = RANDOM.nextInt(qualifiers.length); + Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)), timestamp); + delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp); + regions[0].delete(delete); + regions[1].delete(delete); + insertedCells.get(row).get(q) + .add(new CellInfo(timestamp, Cell.Type.DeleteColumn, flushCount)); + } + } + + private void deleteColumnVersions(int rowCount) throws Exception { + int row; + for (int r = 0; r < rowCount; r++) { + row = RANDOM.nextInt(ROW_NUM); + Long timestamp = getRowTimestamp(row); + if (timestamp != null) { + Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); + int q = RANDOM.nextInt(qualifiers.length); + delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp); + regions[0].delete(delete); + regions[1].delete(delete); + insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete, flushCount)); + } + } + } + + private long getPutCellTimestamp(List cellList, int version) { + if (cellList.isEmpty()) { + return -1; + } + int currentVersion = 0; + CellInfo previousDeleteVersionCellInfo = null; + int size = cellList.size(); + for (int i = size - 1; i >= 0; i--) { + CellInfo cellInfo = cellList.get(i); + if (cellInfo.type == Cell.Type.Put) { + if (previousDeleteVersionCellInfo != null) { + if (previousDeleteVersionCellInfo.timestamp != cellInfo.timestamp) { + previousDeleteVersionCellInfo = null; + currentVersion++; + if (currentVersion == version) { + return cellInfo.timestamp; + } + } + // Skip this cell as it is deleted by a family version delete marker + } else { + currentVersion++; + if (currentVersion == version) { + return cellInfo.timestamp; + } + } + } else + if (cellInfo.type == Cell.Type.DeleteFamily || cellInfo.type == Cell.Type.DeleteColumn) { + return -1; + } else { + previousDeleteVersionCellInfo = cellInfo; + } + } + return -1; + } + + private int scanAndVerifyAndCountCells(HRegion region) throws Exception { + int cellCount = 0; + Scan scan = new Scan(); + scan.readAllVersions(); + + try (RegionScanner regionScanner = region.getScanner(scan)) { + boolean hasMore; + do { + List rowList = new ArrayList<>(); + hasMore = regionScanner.nextRaw(rowList); + cellCount += rowList.size(); + int previousColumn = -1; + int version = 1; + int row = 0; + for (Cell cell : rowList) { + row = Integer.valueOf(Bytes.toString(CellUtil.cloneRow(cell))); + int q = Integer.valueOf(Bytes.toString(CellUtil.cloneQualifier(cell))); + if (q == previousColumn) { + version++; + } else { + previousColumn = q; + version = 1; + } + long expected = getPutCellTimestamp(insertedCells.get(row).get(q), version); + long actual = cell.getTimestamp(); + if (expected != actual) { + LOG.info("Row: " + row + " qualifier: " + q + " cell timestamp expected: " + expected + + " actual: " + actual); + } + } + if (!rowList.isEmpty() && rowList.size() != getLiveCellCount(row)) { + LOG.info("Row: " + row + " live cell count expected: " + getLiveCellCount(row) + + " actual: " + rowList.size()); + } + } while (hasMore); + } + return cellCount; + } + + private int scanAndCompareAndCountCells(HRegion firstRegion, HRegion secondRegion, Scan scan) + throws Exception { + int cellCount = 0; + try (RegionScanner firstRS = firstRegion.getScanner(scan)) { + try (RegionScanner secondRS = secondRegion.getScanner(scan)) { + boolean firstHasMore; + boolean secondHasMore; + do { + List firstRowList = new ArrayList<>(); + List secondRowList = new ArrayList<>(); + firstHasMore = firstRS.nextRaw(firstRowList); + secondHasMore = secondRS.nextRaw(secondRowList); + assertEquals(firstRowList.size(), secondRowList.size()); + cellCount += firstRowList.size(); + int size = firstRowList.size(); + for (int i = 0; i < size; i++) { + Cell firstCell = firstRowList.get(i); + Cell secondCell = secondRowList.get(i); + assert (CellUtil.matchingRowColumn(firstCell, secondCell)); + assert (firstCell.getType() == secondCell.getType()); + assert (Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell))); + } + } while (firstHasMore && secondHasMore); + assertEquals(firstHasMore, secondHasMore); + } + } + return cellCount; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java index a5cf9bd2d759..5359dec2e64d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestCompactor.java @@ -189,27 +189,6 @@ public void verifyKvs(KeyValue[][] kvss, boolean allFiles, List boundaries } } - public void verifyKvs(KeyValue[][] kvss) { - assertEquals(kvss.length, writers.size()); - for (int i = 0; i < kvss.length; ++i) { - KeyValue[] kvs = kvss[i]; - Writer w = writers.get(i); - assertEquals(kvs.length, w.kvs.size()); - for (int j = 0; j < kvs.length; ++j) { - assertEquals(kvs[j], w.kvs.get(j)); - } - } - } - - public void verifyKv(KeyValue[] kvs) { - assertEquals(1, writers.size()); - Writer w = writers.get(0); - assertEquals(kvs.length, w.kvs.size()); - for (int i = 0; i < kvs.length; ++i) { - assertEquals(kvs[i], w.kvs.get(i)); - } - } - public List getWriters() { return writers; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java deleted file mode 100644 index edcec5e25e37..000000000000 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestDualFileWriter.java +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver.compactions; - -import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY; -import static org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.createDummyRequest; -import static org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.createDummyStoreFile; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.OptionalLong; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.CellComparatorImpl; -import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; -import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; -import org.apache.hadoop.hbase.client.RegionInfoBuilder; -import org.apache.hadoop.hbase.regionserver.CreateStoreFileWriterParams; -import org.apache.hadoop.hbase.regionserver.HStore; -import org.apache.hadoop.hbase.regionserver.HStoreFile; -import org.apache.hadoop.hbase.regionserver.InternalScanner; -import org.apache.hadoop.hbase.regionserver.ScanInfo; -import org.apache.hadoop.hbase.regionserver.ScanType; -import org.apache.hadoop.hbase.regionserver.StoreEngine; -import org.apache.hadoop.hbase.regionserver.StoreFileScanner; -import org.apache.hadoop.hbase.regionserver.StoreUtils; -import org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.Scanner; -import org.apache.hadoop.hbase.regionserver.compactions.TestCompactor.StoreFileWritersCapture; -import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; -import org.apache.hadoop.hbase.testclassification.RegionServerTests; -import org.apache.hadoop.hbase.testclassification.SmallTests; -import org.apache.hadoop.hbase.util.Bytes; -import org.junit.ClassRule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) -@Category({ RegionServerTests.class, SmallTests.class }) -public class TestDualFileWriter { - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestDualFileWriter.class); - - private static final byte[] NAME_OF_THINGS = Bytes.toBytes("foo"); - - private static final TableName TABLE_NAME = TableName.valueOf(NAME_OF_THINGS, NAME_OF_THINGS); - - private static final KeyValue KV_A_DeleteFamilyVersion = new KeyValue(Bytes.toBytes("123"), - Bytes.toBytes("0"), null, 300L, KeyValue.Type.DeleteFamilyVersion); - private static final KeyValue KV_A_1 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("a"), 300L, KeyValue.Type.Put); - private static final KeyValue KV_A_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("a"), 200L, KeyValue.Type.Put); - private static final KeyValue KV_A_3 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("a"), 100L, KeyValue.Type.Put); - - private static final KeyValue KV_B_DeleteColumn = new KeyValue(Bytes.toBytes("123"), - Bytes.toBytes("0"), Bytes.toBytes("b"), 200L, KeyValue.Type.DeleteColumn); - private static final KeyValue KV_B = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("b"), 100L, KeyValue.Type.Put); - - private static final KeyValue KV_C = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("c"), 100L, KeyValue.Type.Put); - - private static final KeyValue KV_D_1 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("d"), 200L, KeyValue.Type.Put); - private static final KeyValue KV_D_2 = new KeyValue(Bytes.toBytes("123"), Bytes.toBytes("0"), - Bytes.toBytes("d"), 100L, KeyValue.Type.Put); - - private static final KeyValue KV_E_F_DeleteFamily = - new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), null, 200L, KeyValue.Type.DeleteFamily); - private static final KeyValue KV_E = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), - Bytes.toBytes("e"), 100L, KeyValue.Type.Put); - private static final KeyValue KV_F = new KeyValue(Bytes.toBytes("456"), Bytes.toBytes("0"), - Bytes.toBytes("f"), 100L, KeyValue.Type.Put); - private static final KeyValue KV_G_DeleteFamily = - new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), null, 400L, KeyValue.Type.DeleteFamily); - private static final KeyValue KV_G_DeleteFamilyVersion = new KeyValue(Bytes.toBytes("789"), - Bytes.toBytes("0"), null, 100L, KeyValue.Type.DeleteFamilyVersion); - private static final KeyValue KV_G_1 = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - Bytes.toBytes("g"), 500L, KeyValue.Type.Put); - private static final KeyValue KV_G_DeleteColumn = - new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), null, 300L, KeyValue.Type.DeleteColumn); - private static final KeyValue KV_G_DeleteColumnVersion = - new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), null, 200L, KeyValue.Type.Delete); - private static final KeyValue KV_G_2 = new KeyValue(Bytes.toBytes("789"), Bytes.toBytes("0"), - Bytes.toBytes("g"), 100L, KeyValue.Type.Put); - - @Parameters(name = "{index}: usePrivateReaders={0}, keepDeletedCells={1}") - public static Iterable data() { - return Arrays.asList(new Object[] { true }, new Object[] { false }); - } - - @Parameter(0) - public boolean usePrivateReaders; - - private DefaultCompactor createCompactor(StoreFileWritersCapture writers, final KeyValue[] input, - List storefiles) throws Exception { - Configuration conf = HBaseConfiguration.create(); - conf.setBoolean("hbase.regionserver.compaction.private.readers", usePrivateReaders); - conf.setBoolean(DEFAULT_COMPACTION_ENABLE_DUAL_FILE_WRITER_KEY, true); - final Scanner scanner = new Scanner(input); - // Create store mock that is satisfactory for compactor. - ColumnFamilyDescriptor familyDescriptor = ColumnFamilyDescriptorBuilder.of(NAME_OF_THINGS); - - ScanInfo si = - new ScanInfo(conf, familyDescriptor, Long.MAX_VALUE, 0, CellComparatorImpl.COMPARATOR); - HStore store = mock(HStore.class); - when(store.getStorefiles()).thenReturn(storefiles); - when(store.getColumnFamilyDescriptor()).thenReturn(familyDescriptor); - when(store.getScanInfo()).thenReturn(si); - when(store.areWritesEnabled()).thenReturn(true); - when(store.getFileSystem()).thenReturn(mock(FileSystem.class)); - when(store.getRegionInfo()).thenReturn(RegionInfoBuilder.newBuilder(TABLE_NAME).build()); - StoreEngine storeEngine = mock(StoreEngine.class); - when(storeEngine.createWriter(any(CreateStoreFileWriterParams.class))).thenAnswer(writers); - when(store.getStoreEngine()).thenReturn(storeEngine); - when(store.getComparator()).thenReturn(CellComparatorImpl.COMPARATOR); - OptionalLong maxSequenceId = StoreUtils.getMaxSequenceIdInList(storefiles); - when(store.getMaxSequenceId()).thenReturn(maxSequenceId); - - return new DefaultCompactor(conf, store) { - @Override - protected InternalScanner createScanner(HStore store, ScanInfo scanInfo, - List scanners, long smallestReadPoint, long earliestPutTs, - byte[] dropDeletesFromRow, byte[] dropDeletesToRow) throws IOException { - return scanner; - } - - @Override - protected InternalScanner createScanner(HStore store, ScanInfo scanInfo, - List scanners, ScanType scanType, long smallestReadPoint, - long earliestPutTs) throws IOException { - return scanner; - } - }; - } - - private void verify(KeyValue[] input, KeyValue[][] output) throws Exception { - StoreFileWritersCapture writers = new StoreFileWritersCapture(); - HStoreFile sf1 = createDummyStoreFile(1L); - HStoreFile sf2 = createDummyStoreFile(2L); - DefaultCompactor dfc = createCompactor(writers, input, Arrays.asList(sf1, sf2)); - List paths = dfc.compact(new CompactionRequestImpl(Arrays.asList(sf1)), - NoLimitThroughputController.INSTANCE, null); - writers.verifyKvs(output); - assertEquals(output.length, paths.size()); - } - - @SuppressWarnings("unchecked") - private static T[] a(T... a) { - return a; - } - - @Test - public void test() throws Exception { - verify( - a(KV_A_DeleteFamilyVersion, KV_A_1, KV_A_2, KV_A_3, KV_B_DeleteColumn, KV_B, KV_C, KV_D_1, - KV_D_2, // Row 123 - KV_E_F_DeleteFamily, KV_E, KV_F, // Row 456 - KV_G_DeleteFamily, KV_G_DeleteFamilyVersion, KV_G_1, KV_G_DeleteColumn, - KV_G_DeleteColumnVersion, KV_G_2), // Row 789 - a(a(KV_A_DeleteFamilyVersion, KV_A_2, KV_B_DeleteColumn, KV_C, KV_D_1, KV_E_F_DeleteFamily, - KV_G_DeleteFamily, KV_G_1), // Latest versions - a(KV_A_1, KV_A_3, KV_B, KV_D_2, KV_E, KV_F, KV_G_DeleteFamilyVersion, KV_G_DeleteColumn, - KV_G_DeleteColumnVersion, KV_G_2))); - } - - @Test - public void testEmptyOutputFile() throws Exception { - StoreFileWritersCapture writers = new StoreFileWritersCapture(); - CompactionRequestImpl request = createDummyRequest(); - DefaultCompactor dfc = - createCompactor(writers, new KeyValue[0], new ArrayList<>(request.getFiles())); - List paths = dfc.compact(request, NoLimitThroughputController.INSTANCE, null); - assertEquals(1, paths.size()); - List dummyWriters = writers.getWriters(); - assertEquals(1, dummyWriters.size()); - StoreFileWritersCapture.Writer dummyWriter = dummyWriters.get(0); - assertTrue(dummyWriter.kvs.isEmpty()); - assertTrue(dummyWriter.hasMetadata); - } -} From 266b4c3e3649fc3c1e25fb3936731a24cb1e65cb Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Thu, 14 Mar 2024 15:00:26 -0700 Subject: [PATCH 17/27] Fixed the test failure due to a log message --- .../org/apache/hadoop/hbase/regionserver/StoreFileWriter.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index e982367acbd7..c69fe76707ea 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -271,14 +271,10 @@ BloomFilterWriter getGeneralBloomWriter() { } public void close() throws IOException { - LOG.info( - "closing liveFileWriter " + liveFileWriter.getPath() + " pos " + liveFileWriter.getPos()); liveFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(true)); liveFileWriter.close(); if (historicalFileWriter != null) { historicalFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(false)); - LOG.info("closing historicalFileWriter " + historicalFileWriter.getPath() + " pos " - + historicalFileWriter.getPos()); historicalFileWriter.close(); } } From 3dd18ecabc66d52ac6bb6e5db21bda6bb8f4a289 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Thu, 14 Mar 2024 21:51:01 -0700 Subject: [PATCH 18/27] Test code minor fix and edits --- .../regionserver/TestStoreFileWriter.java | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java index cb0a5a9938af..a49c72130884 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.MemoryCompactionPolicy; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; @@ -84,6 +85,8 @@ public class TestStoreFileWriter { private HRegion[] regions = new HRegion[2]; private final byte[][] qualifiers = { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") }; + // This keeps track of all cells. It is a list of rows, each row is a list of columns, each + // column is a list of CellInfo object private ArrayList>> insertedCells; private TableName[] tableName = new TableName[2]; private final Configuration conf = testUtil.getConfiguration(); @@ -97,10 +100,13 @@ public class TestStoreFileWriter { @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}") public static synchronized Collection data() { return Arrays.asList(new Object[][] { { KeepDeletedCells.FALSE, 1 }, - { KeepDeletedCells.FALSE, 2 }, { KeepDeletedCells.FALSE, 2 }, { KeepDeletedCells.TRUE, 1 }, + { KeepDeletedCells.FALSE, 2 }, { KeepDeletedCells.FALSE, 3 }, { KeepDeletedCells.TRUE, 1 }, { KeepDeletedCells.TRUE, 2 }, { KeepDeletedCells.TRUE, 3 } }); } + // In memory representation of a cell. We only need to know timestamp and type field for our + // testing for cell. Please note the row for the cell is implicit in insertedCells. The fied + // flushCount is only for debugging private static class CellInfo { long timestamp; Cell.Type type; @@ -128,6 +134,8 @@ private void createTable(int index, boolean enableDualFileWriter) throws IOExcep @Before public void setUp() throws Exception { conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6); + conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, + String.valueOf(MemoryCompactionPolicy.NONE)); testUtil.startMiniCluster(); createTable(0, false); createTable(1, true); @@ -152,7 +160,7 @@ public void testCompactedFiles() throws Exception { scan.readAllVersions(); for (int i = 0; i < 10; i++) { - putRows(ROW_NUM / 2); + insertRows(ROW_NUM * maxVersions); deleteRows(ROW_NUM / 8); deleteRowVersions(ROW_NUM / 8); deleteColumns(ROW_NUM / 8); @@ -196,7 +204,7 @@ private void verifyCells(Scan scan, int expectedLiveCellCount, int expectedAllCe String phase) throws Exception { scan.setRaw(false); LOG.info("[" + phase + "] Live cell count expected: " + expectedLiveCellCount + " actual: " - + scanAndVerifyAndCountCells(regions[0])); + + scanAndVerifyAndCountLiveCells(regions[0])); scan.setRaw(true); LOG.info("[" + phase + "] All cell count expected: " + expectedAllCellCount + " actual: " + scanAndCompareAndCountCells(regions[0], regions[1], scan)); @@ -237,6 +245,18 @@ private int getAllCellCount() { for (int q = 0; q < qualifiers.length; q++) { count += insertedCells.get(r).get(q).size(); } + // For simplicity, the family delete markers are inserted for all columns instead of + // allocating a separate column for them. So we need to adjust the count + for (int q = 1; q < qualifiers.length; q++) { + for (CellInfo cellInfo : insertedCells.get(r).get(q)) { + if ( + cellInfo.type == Cell.Type.DeleteFamily + || cellInfo.type == Cell.Type.DeleteFamilyVersion + ) { + count--; + } + } + } } return count; } @@ -263,7 +283,7 @@ private Long getRowTimestamp(int row) { return maxTimestamp; } - private void putRows(int rowCount) throws Exception { + private void insertRows(int rowCount) throws Exception { int row; long timestamp = System.currentTimeMillis(); for (int r = 0; r < rowCount; r++) { @@ -294,6 +314,9 @@ private void deleteRows(int rowCount) throws Exception { Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); regions[0].delete(delete); regions[1].delete(delete); + // For simplicity, the family delete markers are inserted for all columns (instead of + // allocating a separate column for them) in the memory representation of the data stored + // to HBase for (int q = 0; q < qualifiers.length; q++) { insertedCells.get(row).get(q) .add(new CellInfo(timestamp, Cell.Type.DeleteFamily, flushCount)); @@ -306,6 +329,9 @@ private void deleteSingleRowVersion(int row, long timestamp) throws IOException delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp); regions[0].delete(delete); regions[1].delete(delete); + // For simplicity, the family delete version markers are inserted for all columns (instead of + // allocating a separate column for them) in the memory representation of the data stored + // to HBase for (int q = 0; q < qualifiers.length; q++) { insertedCells.get(row).get(q) .add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion, flushCount)); @@ -369,6 +395,8 @@ private long getPutCellTimestamp(List cellList, int version) { if (cellInfo.type == Cell.Type.Put) { if (previousDeleteVersionCellInfo != null) { if (previousDeleteVersionCellInfo.timestamp != cellInfo.timestamp) { + // Delete marker for this column is superfluous as its timestamp does not match with + // the next put cell previousDeleteVersionCellInfo = null; currentVersion++; if (currentVersion == version) { @@ -392,7 +420,7 @@ private long getPutCellTimestamp(List cellList, int version) { return -1; } - private int scanAndVerifyAndCountCells(HRegion region) throws Exception { + private int scanAndVerifyAndCountLiveCells(HRegion region) throws Exception { int cellCount = 0; Scan scan = new Scan(); scan.readAllVersions(); From 561ad7d9033e7610980067ee6649ce658ea42030 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Fri, 15 Mar 2024 10:51:04 -0700 Subject: [PATCH 19/27] Historical files are generated only with default store engine and default compactor --- .../regionserver/DefaultStoreFileManager.java | 6 ++---- .../hbase/regionserver/StoreFileWriter.java | 21 ++++++++++++------- .../compactions/CompactionConfiguration.java | 8 ++----- .../regionserver/TestStoreFileWriter.java | 4 ++-- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index cef269821090..fdc16c5777be 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -17,8 +17,7 @@ */ package org.apache.hadoop.hbase.regionserver; -import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES; -import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; +import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.shouldEnableHistoricalCompactionFiles; import java.io.IOException; import java.util.ArrayList; @@ -78,8 +77,7 @@ public DefaultStoreFileManager(CellComparator cellComparator, this.comConf = comConf; this.blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT); - this.enableLiveFileTracking = conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, - DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES); + this.enableLiveFileTracking = shouldEnableHistoricalCompactionFiles(conf); } private List getLiveFiles(Collection storeFiles) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index c69fe76707ea..568986e12fb3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.regionserver; +import static org.apache.hadoop.hbase.regionserver.DefaultStoreEngine.DEFAULT_COMPACTOR_CLASS_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_PARAM_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_TYPE_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.COMPACTION_EVENT_KEY; @@ -28,6 +29,7 @@ import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_FILE_REFS; import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; +import static org.apache.hadoop.hbase.regionserver.StoreEngine.STORE_ENGINE_CLASS_KEY; import java.io.IOException; import java.net.InetSocketAddress; @@ -58,6 +60,7 @@ import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl; import org.apache.hadoop.hbase.mob.MobUtils; +import org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor; import org.apache.hadoop.hbase.util.BloomContext; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.BloomFilterUtil; @@ -156,6 +159,15 @@ private StoreFileWriter(FileSystem fs, Path liveFilePath, Path historicalFilePat maxKeys, favoredNodes, fileContext, shouldDropCacheBehind, compactedFilesSupplier); } + public static boolean shouldEnableHistoricalCompactionFiles(Configuration conf) { + return conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, + DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES) + && conf.get(STORE_ENGINE_CLASS_KEY, DefaultStoreEngine.class.getName()) + .equals(DefaultStoreEngine.class.getName()) + && conf.get(DEFAULT_COMPACTOR_CLASS_KEY, DefaultCompactor.class.getName()) + .equals(DefaultCompactor.class.getName()); + } + public long getPos() throws IOException { return liveFileWriter.getPos(); } @@ -969,15 +981,8 @@ public StoreFileWriter build() throws IOException { } } - if ( - isCompaction && conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, - DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES) - ) { + if (isCompaction && shouldEnableHistoricalCompactionFiles(conf)) { historicalFilePath = getUniqueFile(fs, dir); - LOG.info("Dual file compaction is enabled liveFilePath " + liveFilePath - + " historicalFilePath " + historicalFilePath); - } else { - LOG.info("Dual file compaction is not enabled liveFilePath " + liveFilePath); } // make sure we call this before actually create the writer diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java index ea15c320d35c..538efecb4018 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/CompactionConfiguration.java @@ -17,8 +17,7 @@ */ package org.apache.hadoop.hbase.regionserver.compactions; -import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES; -import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; +import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.shouldEnableHistoricalCompactionFiles; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; @@ -146,10 +145,7 @@ public class CompactionConfiguration { conf.getLong(HBASE_HSTORE_COMPACTION_MIN_SIZE_KEY, storeConfigInfo.getMemStoreFlushSize()); minFilesToCompact = Math.max(2, conf.getInt(HBASE_HSTORE_COMPACTION_MIN_KEY, conf.getInt(HBASE_HSTORE_COMPACTION_MIN_KEY_OLD, 3))); - if ( - conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, - DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES) - ) { + if (shouldEnableHistoricalCompactionFiles(conf)) { // If historical file writing is enabled, we bump up the min value by one as DualFileWriter // compacts files into two files, live and historical, instead of one. This also eliminates // infinite re-compaction when the min value is set to 2 diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java index a49c72130884..cadd02bcd2b3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java @@ -69,8 +69,8 @@ * hbase.enable.historical.compaction.files). The test maintains the information about cells * inserted in memory and compares in memory state with the state on disk. The mismatches are * currently logged only now instead of asserting on them as the test finds inconsistencies. These - * inconsistencies (data integrity issues) are due to mishandling of version delete markers - * currently in HBase (see HBASE-XXXXXX). + * inconsistencies (data integrity issues) were due to mishandling of version delete markers in + * HBase at the time this test is introduced. */ @Category({ MediumTests.class, RegionServerTests.class }) @RunWith(Parameterized.class) From 1e394559925ac3eb7281cb0d82bb36e882b99e97 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Fri, 15 Mar 2024 20:05:32 -0700 Subject: [PATCH 20/27] Simplified the test code and fix some issues --- .../hbase/regionserver/StoreFileWriter.java | 8 +- .../regionserver/TestStoreFileWriter.java | 220 ++++-------------- 2 files changed, 47 insertions(+), 181 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index 568986e12fb3..ec486b604a69 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -283,10 +283,10 @@ BloomFilterWriter getGeneralBloomWriter() { } public void close() throws IOException { - liveFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(true)); + liveFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(false)); liveFileWriter.close(); if (historicalFileWriter != null) { - historicalFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(false)); + historicalFileWriter.appendFileInfo(HISTORICAL_KEY, Bytes.toBytes(true)); historicalFileWriter.close(); } } @@ -438,6 +438,10 @@ private void appendCell(Cell cell) throws IOException { } else { // It is deleted getHistoricalFileWriter().append(cell); + if (newVersionBehavior) { + // Deleted versions are considered toward total version count when newVersionBehavior + livePutCellCount++; + } } } else { // It is an older put cell diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java index cadd02bcd2b3..f50e80865832 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.regionserver; +import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR; import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; import static org.junit.Assert.assertEquals; @@ -51,8 +52,6 @@ import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Store file writer does not do any compaction. Each cell written to either the live or historical @@ -66,11 +65,7 @@ * regular and raw scans. Then the same verification is done after tables are minor and finally * major compacted. The test also verifies that flushes do not generate historical files and the * historical files are generated only when historical file generation is enabled (by the config - * hbase.enable.historical.compaction.files). The test maintains the information about cells - * inserted in memory and compares in memory state with the state on disk. The mismatches are - * currently logged only now instead of asserting on them as the test finds inconsistencies. These - * inconsistencies (data integrity issues) were due to mishandling of version delete markers in - * HBase at the time this test is introduced. + * hbase.enable.historical.compaction.files). */ @Category({ MediumTests.class, RegionServerTests.class }) @RunWith(Parameterized.class) @@ -78,7 +73,6 @@ public class TestStoreFileWriter { @ClassRule public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestStoreFileWriter.class); - private static final Logger LOG = LoggerFactory.getLogger(RegionScannerImpl.class); private final int ROW_NUM = 100; private final Random RANDOM = new Random(11); private final HBaseTestingUtil testUtil = new HBaseTestingUtil(); @@ -96,26 +90,26 @@ public class TestStoreFileWriter { public KeepDeletedCells keepDeletedCells; @Parameterized.Parameter(1) public int maxVersions; + @Parameterized.Parameter(2) + public boolean newVersionBehavior; - @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}") + @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}, newVersionBehavior={2}") public static synchronized Collection data() { - return Arrays.asList(new Object[][] { { KeepDeletedCells.FALSE, 1 }, - { KeepDeletedCells.FALSE, 2 }, { KeepDeletedCells.FALSE, 3 }, { KeepDeletedCells.TRUE, 1 }, - { KeepDeletedCells.TRUE, 2 }, { KeepDeletedCells.TRUE, 3 } }); + return Arrays.asList( + new Object[][] { { KeepDeletedCells.FALSE, 1, true }, { KeepDeletedCells.FALSE, 2, false }, + { KeepDeletedCells.FALSE, 3, true }, { KeepDeletedCells.TRUE, 1, false }, + { KeepDeletedCells.TRUE, 2, true }, { KeepDeletedCells.TRUE, 3, false } }); } // In memory representation of a cell. We only need to know timestamp and type field for our - // testing for cell. Please note the row for the cell is implicit in insertedCells. The fied - // flushCount is only for debugging + // testing for cell. Please note the row for the cell is implicit in insertedCells. private static class CellInfo { long timestamp; Cell.Type type; - int flushCount; - CellInfo(long timestamp, Cell.Type type, int flushCount) { + CellInfo(long timestamp, Cell.Type type) { this.timestamp = timestamp; this.type = type; - this.flushCount = flushCount; } } @@ -123,7 +117,8 @@ private void createTable(int index, boolean enableDualFileWriter) throws IOExcep tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index); ColumnFamilyDescriptor familyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions) - .setKeepDeletedCells(keepDeletedCells).build(); + .setKeepDeletedCells(keepDeletedCells) + .setValue(NEW_VERSION_BEHAVIOR, Boolean.toString(newVersionBehavior)).build(); TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor) .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter)); @@ -156,9 +151,6 @@ public void tearDown() throws Exception { @Test public void testCompactedFiles() throws Exception { - Scan scan = new Scan(); - scan.readAllVersions(); - for (int i = 0; i < 10; i++) { insertRows(ROW_NUM * maxVersions); deleteRows(ROW_NUM / 8); @@ -168,7 +160,7 @@ public void testCompactedFiles() throws Exception { flushRegion(); } - verifyCells(scan, getLiveCellCount(), getAllCellCount(), "Flush"); + verifyCells(); HStore[] stores = new HStore[2]; @@ -186,7 +178,7 @@ public void testCompactedFiles() throws Exception { assertEquals(flushCount - stores[1].getCompactedFiles().size() + 2, stores[1].getStorefilesCount()); - verifyCells(scan, getLiveCellCount(), getAllCellCount(), "Minor Compaction"); + verifyCells(); regions[0].compact(true); assertEquals(1, stores[0].getStorefilesCount()); @@ -195,70 +187,12 @@ public void testCompactedFiles() throws Exception { assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2, stores[1].getStorefilesCount()); - verifyCells(scan, getLiveCellCount(), - keepDeletedCells == KeepDeletedCells.FALSE ? getLiveCellCount() : getAllCellCount(), - "Major Compaction"); + verifyCells(); } - private void verifyCells(Scan scan, int expectedLiveCellCount, int expectedAllCellCount, - String phase) throws Exception { - scan.setRaw(false); - LOG.info("[" + phase + "] Live cell count expected: " + expectedLiveCellCount + " actual: " - + scanAndVerifyAndCountLiveCells(regions[0])); - scan.setRaw(true); - LOG.info("[" + phase + "] All cell count expected: " + expectedAllCellCount + " actual: " - + scanAndCompareAndCountCells(regions[0], regions[1], scan)); - } - - private int getLiveCellCount(int row, int q) { - int count = 0; - List cellTypeList = insertedCells.get(row).get(q); - for (int version = 1; version <= maxVersions; version++) { - if (getPutCellTimestamp(cellTypeList, version) != -1) { - count++; - } else { - break; - } - } - return count; - } - - private int getLiveCellCount(int row) { - int count = 0; - for (int q = 0; q < qualifiers.length; q++) { - count += getLiveCellCount(row, q); - } - return count; - } - - private int getLiveCellCount() { - int count = 0; - for (int r = 0; r < ROW_NUM; r++) { - count += getLiveCellCount(r); - } - return count; - } - - private int getAllCellCount() { - int count = 0; - for (int r = 0; r < ROW_NUM; r++) { - for (int q = 0; q < qualifiers.length; q++) { - count += insertedCells.get(r).get(q).size(); - } - // For simplicity, the family delete markers are inserted for all columns instead of - // allocating a separate column for them. So we need to adjust the count - for (int q = 1; q < qualifiers.length; q++) { - for (CellInfo cellInfo : insertedCells.get(r).get(q)) { - if ( - cellInfo.type == Cell.Type.DeleteFamily - || cellInfo.type == Cell.Type.DeleteFamilyVersion - ) { - count--; - } - } - } - } - return count; + private void verifyCells() throws Exception { + scanAndCompare(false); + scanAndCompare(true); } private void flushRegion() throws Exception { @@ -283,6 +217,16 @@ private Long getRowTimestamp(int row) { return maxTimestamp; } + private long getNewTimestamp(long timestamp) throws Exception { + long newTimestamp = System.currentTimeMillis(); + if (timestamp == newTimestamp) { + Thread.sleep(1); + newTimestamp = System.currentTimeMillis(); + assert (timestamp < newTimestamp); + } + return newTimestamp; + } + private void insertRows(int rowCount) throws Exception { int row; long timestamp = System.currentTimeMillis(); @@ -292,17 +236,11 @@ private void insertRows(int rowCount) throws Exception { for (int q = 0; q < qualifiers.length; q++) { put.addColumn(HBaseTestingUtil.fam1, qualifiers[q], Bytes.toBytes(String.valueOf(timestamp))); - insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put, flushCount)); + insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put)); } regions[0].put(put); regions[1].put(put); - long newTimestamp = System.currentTimeMillis(); - if (timestamp == newTimestamp) { - Thread.sleep(1); - newTimestamp = System.currentTimeMillis(); - assert (timestamp < newTimestamp); - } - timestamp = newTimestamp; + timestamp = getNewTimestamp(timestamp); } } @@ -318,8 +256,7 @@ private void deleteRows(int rowCount) throws Exception { // allocating a separate column for them) in the memory representation of the data stored // to HBase for (int q = 0; q < qualifiers.length; q++) { - insertedCells.get(row).get(q) - .add(new CellInfo(timestamp, Cell.Type.DeleteFamily, flushCount)); + insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamily)); } } } @@ -333,8 +270,7 @@ private void deleteSingleRowVersion(int row, long timestamp) throws IOException // allocating a separate column for them) in the memory representation of the data stored // to HBase for (int q = 0; q < qualifiers.length; q++) { - insertedCells.get(row).get(q) - .add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion, flushCount)); + insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion)); } } @@ -362,8 +298,7 @@ private void deleteColumns(int rowCount) throws Exception { delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp); regions[0].delete(delete); regions[1].delete(delete); - insertedCells.get(row).get(q) - .add(new CellInfo(timestamp, Cell.Type.DeleteColumn, flushCount)); + insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteColumn)); } } @@ -378,92 +313,21 @@ private void deleteColumnVersions(int rowCount) throws Exception { delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp); regions[0].delete(delete); regions[1].delete(delete); - insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete, flushCount)); + insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete)); } } } - private long getPutCellTimestamp(List cellList, int version) { - if (cellList.isEmpty()) { - return -1; - } - int currentVersion = 0; - CellInfo previousDeleteVersionCellInfo = null; - int size = cellList.size(); - for (int i = size - 1; i >= 0; i--) { - CellInfo cellInfo = cellList.get(i); - if (cellInfo.type == Cell.Type.Put) { - if (previousDeleteVersionCellInfo != null) { - if (previousDeleteVersionCellInfo.timestamp != cellInfo.timestamp) { - // Delete marker for this column is superfluous as its timestamp does not match with - // the next put cell - previousDeleteVersionCellInfo = null; - currentVersion++; - if (currentVersion == version) { - return cellInfo.timestamp; - } - } - // Skip this cell as it is deleted by a family version delete marker - } else { - currentVersion++; - if (currentVersion == version) { - return cellInfo.timestamp; - } - } - } else - if (cellInfo.type == Cell.Type.DeleteFamily || cellInfo.type == Cell.Type.DeleteColumn) { - return -1; - } else { - previousDeleteVersionCellInfo = cellInfo; - } - } - return -1; - } - - private int scanAndVerifyAndCountLiveCells(HRegion region) throws Exception { - int cellCount = 0; + private Scan createScan(boolean raw) { Scan scan = new Scan(); scan.readAllVersions(); - - try (RegionScanner regionScanner = region.getScanner(scan)) { - boolean hasMore; - do { - List rowList = new ArrayList<>(); - hasMore = regionScanner.nextRaw(rowList); - cellCount += rowList.size(); - int previousColumn = -1; - int version = 1; - int row = 0; - for (Cell cell : rowList) { - row = Integer.valueOf(Bytes.toString(CellUtil.cloneRow(cell))); - int q = Integer.valueOf(Bytes.toString(CellUtil.cloneQualifier(cell))); - if (q == previousColumn) { - version++; - } else { - previousColumn = q; - version = 1; - } - long expected = getPutCellTimestamp(insertedCells.get(row).get(q), version); - long actual = cell.getTimestamp(); - if (expected != actual) { - LOG.info("Row: " + row + " qualifier: " + q + " cell timestamp expected: " + expected - + " actual: " + actual); - } - } - if (!rowList.isEmpty() && rowList.size() != getLiveCellCount(row)) { - LOG.info("Row: " + row + " live cell count expected: " + getLiveCellCount(row) - + " actual: " + rowList.size()); - } - } while (hasMore); - } - return cellCount; + scan.setRaw(raw); + return scan; } - private int scanAndCompareAndCountCells(HRegion firstRegion, HRegion secondRegion, Scan scan) - throws Exception { - int cellCount = 0; - try (RegionScanner firstRS = firstRegion.getScanner(scan)) { - try (RegionScanner secondRS = secondRegion.getScanner(scan)) { + private void scanAndCompare(boolean raw) throws Exception { + try (RegionScanner firstRS = regions[0].getScanner(createScan(raw))) { + try (RegionScanner secondRS = regions[1].getScanner(createScan(raw))) { boolean firstHasMore; boolean secondHasMore; do { @@ -472,7 +336,6 @@ private int scanAndCompareAndCountCells(HRegion firstRegion, HRegion secondRegio firstHasMore = firstRS.nextRaw(firstRowList); secondHasMore = secondRS.nextRaw(secondRowList); assertEquals(firstRowList.size(), secondRowList.size()); - cellCount += firstRowList.size(); int size = firstRowList.size(); for (int i = 0; i < size; i++) { Cell firstCell = firstRowList.get(i); @@ -485,6 +348,5 @@ private int scanAndCompareAndCountCells(HRegion firstRegion, HRegion secondRegio assertEquals(firstHasMore, secondHasMore); } } - return cellCount; } } From 3d25055f96486d321edd90656bb66c403f15d344 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Sat, 30 Mar 2024 11:20:56 -0700 Subject: [PATCH 21/27] Changes for review comments --- .../org/apache/hadoop/hbase/regionserver/HStoreFile.java | 8 ++++++-- .../apache/hadoop/hbase/regionserver/StoreFileWriter.java | 2 +- .../hadoop/hbase/regionserver/CreateRandomStoreFile.java | 2 +- .../hadoop/hbase/regionserver/TestCompactorMemLeak.java | 2 +- .../org/apache/hadoop/hbase/regionserver/TestHStore.java | 2 +- .../hadoop/hbase/regionserver/TestStoreFileWriter.java | 3 ++- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index a9951aa9260c..b2e222428bac 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -140,10 +140,14 @@ public class HStoreFile implements StoreFile { // Indicates if the file got compacted private volatile boolean compactedAway = false; - // Indicate if the file contains historical cell versions. This is used when + // Indicates if the file contains historical cell versions. This is used when // hbase.enable.historical.compaction.files is set to true. In that case, compactions // can generate two files, one with the live cell versions and the other with the remaining - // (historical) cell versions. + // (historical) cell versions. If isHistorical is true then the hfile is historical. + // Historical files are skipped for regular (not raw) scans for latest row versions. + // When hbase.enable.historical.compaction.files is false, isHistorical will be false + // for all files. This means all files will be treated as live files. Historical files are + // generated only when hbase.enable.historical.compaction.files is true. private volatile boolean isHistorical = false; // Keys for metadata stored in backing HFile. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index ec486b604a69..6a678338f60a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -301,7 +301,7 @@ public void appendFileInfo(byte[] key, byte[] value) throws IOException { /** * For use in testing. */ - HFile.Writer getHFileWriter() { + HFile.Writer getLiveFileWriter() { return liveFileWriter.getHFileWriter(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java index 4754c5ba530b..320fc99f15b7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java @@ -193,7 +193,7 @@ public boolean run(String[] args) throws IOException { int numMetaBlocks = ThreadLocalRandom.current().nextInt(10) + 1; LOG.info("Writing " + numMetaBlocks + " meta blocks"); for (int metaI = 0; metaI < numMetaBlocks; ++metaI) { - sfw.getHFileWriter().appendMetaBlock(generateString(), new BytesWritable(generateValue())); + sfw.getLiveFileWriter().appendMetaBlock(generateString(), new BytesWritable(generateValue())); } sfw.close(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java index ad478bceac71..08bbed6e18ed 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactorMemLeak.java @@ -129,7 +129,7 @@ public MyCompactor(Configuration conf, HStore store) { @Override protected List commitWriter(StoreFileWriter writer, FileDetails fd, CompactionRequestImpl request) throws IOException { - HFileWriterImpl writerImpl = (HFileWriterImpl) writer.getHFileWriter(); + HFileWriterImpl writerImpl = (HFileWriterImpl) writer.getLiveFileWriter(); Cell cell = writerImpl.getLastCell(); // The cell should be backend with an KeyOnlyKeyValue. IS_LAST_CELL_ON_HEAP.set(cell instanceof KeyOnlyKeyValue); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java index 979e6b040352..b0ad3083de78 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java @@ -1958,7 +1958,7 @@ public void testHFileContextSetWithCFAndTable() throws Exception { .createWriter(CreateStoreFileWriterParams.create().maxKeyCount(10000L) .compression(Compression.Algorithm.NONE).isCompaction(true).includeMVCCReadpoint(true) .includesTag(false).shouldDropBehind(true)); - HFileContext hFileContext = writer.getHFileWriter().getFileContext(); + HFileContext hFileContext = writer.getLiveFileWriter().getFileContext(); assertArrayEquals(family, hFileContext.getColumnFamily()); assertArrayEquals(table, hFileContext.getTableName()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java index f50e80865832..6c880a5f5f4e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java @@ -98,7 +98,8 @@ public static synchronized Collection data() { return Arrays.asList( new Object[][] { { KeepDeletedCells.FALSE, 1, true }, { KeepDeletedCells.FALSE, 2, false }, { KeepDeletedCells.FALSE, 3, true }, { KeepDeletedCells.TRUE, 1, false }, - { KeepDeletedCells.TRUE, 2, true }, { KeepDeletedCells.TRUE, 3, false } }); + // { KeepDeletedCells.TRUE, 2, true }, see HBASE-28442 + { KeepDeletedCells.TRUE, 3, false } }); } // In memory representation of a cell. We only need to know timestamp and type field for our From 6632743478043acb1fcf17e627756f94dfff3fde Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Sun, 31 Mar 2024 11:43:15 -0700 Subject: [PATCH 22/27] Added warning logs --- .../hbase/regionserver/StoreFileWriter.java | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index 6a678338f60a..da8c226b53dd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -160,12 +160,29 @@ private StoreFileWriter(FileSystem fs, Path liveFilePath, Path historicalFilePat } public static boolean shouldEnableHistoricalCompactionFiles(Configuration conf) { - return conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, - DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES) - && conf.get(STORE_ENGINE_CLASS_KEY, DefaultStoreEngine.class.getName()) - .equals(DefaultStoreEngine.class.getName()) - && conf.get(DEFAULT_COMPACTOR_CLASS_KEY, DefaultCompactor.class.getName()) - .equals(DefaultCompactor.class.getName()); + if ( + conf.getBoolean(ENABLE_HISTORICAL_COMPACTION_FILES, + DEFAULT_ENABLE_HISTORICAL_COMPACTION_FILES) + ) { + // Historical compaction files are supported only for default store engine with + // default compactor. + String storeEngine = conf.get(STORE_ENGINE_CLASS_KEY, DefaultStoreEngine.class.getName()); + if (!storeEngine.equals(DefaultStoreEngine.class.getName())) { + LOG.warn("Historical compaction file generation is ignored for " + storeEngine + + ". hbase.enable.historical.compaction.files can be set to true only for the " + + "default compaction (DefaultStoreEngine and DefaultCompactor)"); + return false; + } + String compactor = conf.get(DEFAULT_COMPACTOR_CLASS_KEY, DefaultCompactor.class.getName()); + if (!compactor.equals(DefaultCompactor.class.getName())) { + LOG.warn("Historical compaction file generation is ignored for " + compactor + + ". hbase.enable.historical.compaction.files can be set to true only for the " + + "default compaction (DefaultStoreEngine and DefaultCompactor)"); + return false; + } + return true; + } + return false; } public long getPos() throws IOException { From 975cf3a14e16299c018beec8eb56662b065d275c Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Sat, 20 Apr 2024 11:34:02 -0700 Subject: [PATCH 23/27] Removed public trackTimestamps() from StoreFileWriter --- .../hadoop/hbase/regionserver/StoreFileWriter.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index da8c226b53dd..67fa2244e957 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -257,17 +257,6 @@ public void appendTrackedTimestampsToMetadata() throws IOException { } } - /** - * Record the earlest Put timestamp. If the timeRangeTracker is not set, update TimeRangeTracker - * to include the timestamp of this key - */ - public void trackTimestamps(final Cell cell) { - liveFileWriter.trackTimestamps(cell); - if (historicalFileWriter != null) { - historicalFileWriter.trackTimestamps(cell); - } - } - @Override public void beforeShipped() throws IOException { liveFileWriter.beforeShipped(); From ed61712b6b5ad807ae6fd84a72acf6da1bf6e913 Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Tue, 23 Apr 2024 23:23:37 -0700 Subject: [PATCH 24/27] Changes for consistent view while getting storefile list --- .../regionserver/BrokenStoreFileCleaner.java | 2 +- .../regionserver/DateTieredStoreEngine.java | 6 +- .../regionserver/DefaultStoreEngine.java | 6 +- .../regionserver/DefaultStoreFileManager.java | 106 ++++++++++-------- .../hadoop/hbase/regionserver/HStore.java | 22 ++-- .../hbase/regionserver/StoreEngine.java | 2 +- .../hbase/regionserver/StoreFileManager.java | 2 +- .../regionserver/StripeStoreFileManager.java | 2 +- .../compactions/StripeCompactionPolicy.java | 8 +- .../hadoop/hbase/regionserver/TestHStore.java | 2 +- .../TestStripeStoreFileManager.java | 4 +- .../TestStripeCompactionPolicy.java | 12 +- 12 files changed, 96 insertions(+), 78 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BrokenStoreFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BrokenStoreFileCleaner.java index ba223de966c0..c235bdc29dc9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BrokenStoreFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BrokenStoreFileCleaner.java @@ -162,7 +162,7 @@ private boolean isCompactedFile(FileStatus file, HStore store) { } private boolean isActiveStorefile(FileStatus file, HStore store) { - return store.getStoreEngine().getStoreFileManager().getStorefiles().stream() + return store.getStoreEngine().getStoreFileManager().getStoreFiles().stream() .anyMatch(sf -> sf.getPath().equals(file.getPath())); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java index d15a6c92ef0b..ded6564bce53 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java @@ -43,7 +43,7 @@ public class DateTieredStoreEngine extends StoreEngine { @Override public boolean needsCompaction(List filesCompacting) { - return compactionPolicy.needsCompaction(storeFileManager.getStorefiles(), filesCompacting); + return compactionPolicy.needsCompaction(storeFileManager.getStoreFiles(), filesCompacting); } @Override @@ -65,14 +65,14 @@ private final class DateTieredCompactionContext extends CompactionContext { @Override public List preSelect(List filesCompacting) { - return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStorefiles(), + return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStoreFiles(), filesCompacting); } @Override public boolean select(List filesCompacting, boolean isUserCompaction, boolean mayUseOffPeak, boolean forceMajor) throws IOException { - request = compactionPolicy.selectCompaction(storeFileManager.getStorefiles(), filesCompacting, + request = compactionPolicy.selectCompaction(storeFileManager.getStoreFiles(), filesCompacting, isUserCompaction, mayUseOffPeak, forceMajor); return request != null; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java index 0c9fb9adcc2c..7b095596a3da 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreEngine.java @@ -56,7 +56,7 @@ public class DefaultStoreEngine extends StoreEngine filesCompacting) { - return compactionPolicy.needsCompaction(this.storeFileManager.getStorefiles(), filesCompacting); + return compactionPolicy.needsCompaction(this.storeFileManager.getStoreFiles(), filesCompacting); } @Override @@ -111,7 +111,7 @@ private class DefaultCompactionContext extends CompactionContext { @Override public boolean select(List filesCompacting, boolean isUserCompaction, boolean mayUseOffPeak, boolean forceMajor) throws IOException { - request = compactionPolicy.selectCompaction(storeFileManager.getStorefiles(), filesCompacting, + request = compactionPolicy.selectCompaction(storeFileManager.getStoreFiles(), filesCompacting, isUserCompaction, mayUseOffPeak, forceMajor); return request != null; } @@ -124,7 +124,7 @@ public List compact(ThroughputController throughputController, User user) @Override public List preSelect(List filesCompacting) { - return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStorefiles(), + return compactionPolicy.preSelectCompactionForCoprocessor(storeFileManager.getStoreFiles(), filesCompacting); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index fdc16c5777be..b20f75e1656f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -19,6 +19,7 @@ import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.shouldEnableHistoricalCompactionFiles; +import edu.umd.cs.findbugs.annotations.Nullable; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -51,16 +52,31 @@ class DefaultStoreFileManager implements StoreFileManager { private final CompactionConfiguration comConf; private final int blockingFileCount; private final Comparator storeFileComparator; - /** - * List of store files inside this store. This is an immutable list that is atomically replaced - * when its contents change. - */ - private volatile ImmutableList storefiles = ImmutableList.of(); - /** - * List of store files that include the latest cells inside this store. This is an immutable list - * that is atomically replaced when its contents change. - */ - private volatile ImmutableList liveStoreFiles = ImmutableList.of(); + + static class StoreFileList { + /** + * List of store files inside this store. This is an immutable list that is atomically replaced + * when its contents change. + */ + final ImmutableList all; + /** + * List of store files that include the latest cells inside this store. This is an immutable + * list that is atomically replaced when its contents change. + */ + @Nullable + final ImmutableList live; + + StoreFileList(ImmutableList storeFiles, ImmutableList liveStoreFiles) { + this.all = storeFiles; + this.live = liveStoreFiles; + } + } + + private static final StoreFileList EMPTY_STORE_FILE_LIST = + new StoreFileList(ImmutableList.of(), null); + + private volatile StoreFileList storeFiles = EMPTY_STORE_FILE_LIST; + /** * List of compacted files inside this store that needs to be excluded in reads because further * new reads will be using only the newly created files out of compaction. These compacted files @@ -93,16 +109,15 @@ private List getLiveFiles(Collection storeFiles) throws @Override public void loadFiles(List storeFiles) throws IOException { - if (enableLiveFileTracking) { - this.liveStoreFiles = - ImmutableList.sortedCopyOf(getStoreFileComparator(), getLiveFiles(storeFiles)); - } - this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, storeFiles); + this.storeFiles = new StoreFileList(ImmutableList.sortedCopyOf(storeFileComparator, storeFiles), + enableLiveFileTracking + ? ImmutableList.sortedCopyOf(storeFileComparator, getLiveFiles(storeFiles)) + : null); } @Override - public final Collection getStorefiles() { - return storefiles; + public final Collection getStoreFiles() { + return storeFiles.all; } @Override @@ -112,21 +127,19 @@ public Collection getCompactedfiles() { @Override public void insertNewFiles(Collection sfs) throws IOException { - if (enableLiveFileTracking) { - this.liveStoreFiles = ImmutableList.sortedCopyOf(getStoreFileComparator(), - Iterables.concat(this.liveStoreFiles, getLiveFiles(sfs))); - } - this.storefiles = - ImmutableList.sortedCopyOf(storeFileComparator, Iterables.concat(this.storefiles, sfs)); + storeFiles = new StoreFileList( + ImmutableList.sortedCopyOf(storeFileComparator, Iterables.concat(storeFiles.all, sfs)), + enableLiveFileTracking + ? ImmutableList.sortedCopyOf(storeFileComparator, + Iterables.concat(storeFiles.live, getLiveFiles(sfs))) + : null); } @Override public ImmutableCollection clearFiles() { - if (enableLiveFileTracking) { - liveStoreFiles = ImmutableList.of(); - } - ImmutableList result = storefiles; - storefiles = ImmutableList.of(); + ImmutableList result = storeFiles.all; + storeFiles = + new StoreFileList(ImmutableList.of(), enableLiveFileTracking ? ImmutableList.of() : null); return result; } @@ -139,7 +152,7 @@ public Collection clearCompactedFiles() { @Override public final int getStorefileCount() { - return storefiles.size(); + return storeFiles.all.size(); } @Override @@ -150,32 +163,37 @@ public final int getCompactedFilesCount() { @Override public void addCompactionResults(Collection newCompactedfiles, Collection results) throws IOException { + ImmutableList liveStoreFiles = null; if (enableLiveFileTracking) { - this.liveStoreFiles = ImmutableList.sortedCopyOf(storeFileComparator, - Iterables.concat(Iterables.filter(liveStoreFiles, sf -> !newCompactedfiles.contains(sf)), + liveStoreFiles = ImmutableList.sortedCopyOf(storeFileComparator, + Iterables.concat(Iterables.filter(storeFiles.live, sf -> !newCompactedfiles.contains(sf)), getLiveFiles(results))); } - this.storefiles = ImmutableList.sortedCopyOf(storeFileComparator, Iterables - .concat(Iterables.filter(storefiles, sf -> !newCompactedfiles.contains(sf)), results)); + storeFiles = + new StoreFileList( + ImmutableList + .sortedCopyOf(storeFileComparator, + Iterables.concat( + Iterables.filter(storeFiles.all, sf -> !newCompactedfiles.contains(sf)), results)), + liveStoreFiles); // Mark the files as compactedAway once the storefiles and compactedfiles list is finalized // Let a background thread close the actual reader on these compacted files and also // ensure to evict the blocks from block cache so that they are no longer in // cache newCompactedfiles.forEach(HStoreFile::markCompactedAway); - this.compactedfiles = ImmutableList.sortedCopyOf(storeFileComparator, - Iterables.concat(this.compactedfiles, newCompactedfiles)); + compactedfiles = ImmutableList.sortedCopyOf(storeFileComparator, + Iterables.concat(compactedfiles, newCompactedfiles)); } @Override public void removeCompactedFiles(Collection removedCompactedfiles) { - this.compactedfiles = - this.compactedfiles.stream().filter(sf -> !removedCompactedfiles.contains(sf)) - .sorted(storeFileComparator).collect(ImmutableList.toImmutableList()); + compactedfiles = compactedfiles.stream().filter(sf -> !removedCompactedfiles.contains(sf)) + .sorted(storeFileComparator).collect(ImmutableList.toImmutableList()); } @Override public final Iterator getCandidateFilesForRowKeyBefore(KeyValue targetKey) { - return this.storefiles.reverse().iterator(); + return storeFiles.all.reverse().iterator(); } @Override @@ -190,28 +208,28 @@ public Iterator updateCandidateFilesForRowKeyBefore( @Override public final Optional getSplitPoint() throws IOException { - return StoreUtils.getSplitPoint(storefiles, cellComparator); + return StoreUtils.getSplitPoint(storeFiles.all, cellComparator); } @Override public Collection getFilesForScan(byte[] startRow, boolean includeStartRow, byte[] stopRow, boolean includeStopRow, boolean onlyLatestVersion) { if (onlyLatestVersion && enableLiveFileTracking) { - return liveStoreFiles; + return storeFiles.live; } // We cannot provide any useful input and already have the files sorted by seqNum. - return getStorefiles(); + return getStoreFiles(); } @Override public int getStoreCompactionPriority() { - int priority = blockingFileCount - storefiles.size(); + int priority = blockingFileCount - storeFiles.all.size(); return (priority == HStore.PRIORITY_USER) ? priority + 1 : priority; } @Override public Collection getUnneededFiles(long maxTs, List filesCompacting) { - ImmutableList files = storefiles; + ImmutableList files = storeFiles.all; // 1) We can never get rid of the last file which has the maximum seqid. // 2) Files that are not the latest can't become one due to (1), so the rest are fair game. return files.stream().limit(Math.max(0, files.size() - 1)).filter(sf -> { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 7f06759a7eee..3c879dbdb730 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -593,7 +593,7 @@ public long timeOfOldestEdit() { /** Returns All store files. */ @Override public Collection getStorefiles() { - return this.storeEngine.getStoreFileManager().getStorefiles(); + return this.storeEngine.getStoreFileManager().getStoreFiles(); } @Override @@ -1429,7 +1429,7 @@ public CompactionProgress getCompactionProgress() { @Override public boolean shouldPerformMajorCompaction() throws IOException { - for (HStoreFile sf : this.storeEngine.getStoreFileManager().getStorefiles()) { + for (HStoreFile sf : this.storeEngine.getStoreFileManager().getStoreFiles()) { // TODO: what are these reader checks all over the place? if (sf.getReader() == null) { LOG.debug("StoreFile {} has null Reader", sf); @@ -1437,7 +1437,7 @@ public boolean shouldPerformMajorCompaction() throws IOException { } } return storeEngine.getCompactionPolicy() - .shouldPerformMajorCompaction(this.storeEngine.getStoreFileManager().getStorefiles()); + .shouldPerformMajorCompaction(this.storeEngine.getStoreFileManager().getStoreFiles()); } public Optional requestCompaction() throws IOException { @@ -1615,7 +1615,7 @@ private void finishCompactionRequest(CompactionRequestImpl cr) { protected void refreshStoreSizeAndTotalBytes() throws IOException { this.storeSize.set(0L); this.totalUncompressedBytes.set(0L); - for (HStoreFile hsf : this.storeEngine.getStoreFileManager().getStorefiles()) { + for (HStoreFile hsf : this.storeEngine.getStoreFileManager().getStoreFiles()) { StoreFileReader r = hsf.getReader(); if (r == null) { LOG.debug("StoreFile {} has a null Reader", hsf); @@ -1785,7 +1785,7 @@ public int getCompactedFilesCount() { } private LongStream getStoreFileAgeStream() { - return this.storeEngine.getStoreFileManager().getStorefiles().stream().filter(sf -> { + return this.storeEngine.getStoreFileManager().getStoreFiles().stream().filter(sf -> { if (sf.getReader() == null) { LOG.debug("StoreFile {} has a null Reader", sf); return false; @@ -1813,13 +1813,13 @@ public OptionalDouble getAvgStoreFileAge() { @Override public long getNumReferenceFiles() { - return this.storeEngine.getStoreFileManager().getStorefiles().stream() + return this.storeEngine.getStoreFileManager().getStoreFiles().stream() .filter(HStoreFile::isReference).count(); } @Override public long getNumHFiles() { - return this.storeEngine.getStoreFileManager().getStorefiles().stream() + return this.storeEngine.getStoreFileManager().getStoreFiles().stream() .filter(HStoreFile::isHFile).count(); } @@ -1831,19 +1831,19 @@ public long getStoreSizeUncompressed() { @Override public long getStorefilesSize() { // Include all StoreFiles - return StoreUtils.getStorefilesSize(this.storeEngine.getStoreFileManager().getStorefiles(), + return StoreUtils.getStorefilesSize(this.storeEngine.getStoreFileManager().getStoreFiles(), sf -> true); } @Override public long getHFilesSize() { // Include only StoreFiles which are HFiles - return StoreUtils.getStorefilesSize(this.storeEngine.getStoreFileManager().getStorefiles(), + return StoreUtils.getStorefilesSize(this.storeEngine.getStoreFileManager().getStoreFiles(), HStoreFile::isHFile); } private long getStorefilesFieldSize(ToLongFunction f) { - return this.storeEngine.getStoreFileManager().getStorefiles().stream() + return this.storeEngine.getStoreFileManager().getStoreFiles().stream() .mapToLong(file -> StoreUtils.getStorefileFieldSize(file, f)).sum(); } @@ -2416,7 +2416,7 @@ public int getCurrentParallelPutCount() { } public int getStoreRefCount() { - return this.storeEngine.getStoreFileManager().getStorefiles().stream() + return this.storeEngine.getStoreFileManager().getStoreFiles().stream() .filter(sf -> sf.getReader() != null).filter(HStoreFile::isHFile) .mapToInt(HStoreFile::getRefCount).sum(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java index 34f882516bae..fbf9a4ffb135 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreEngine.java @@ -360,7 +360,7 @@ public void refreshStoreFiles(Collection newFiles) throws IOException { * replicas to keep up to date with the primary region files. */ private void refreshStoreFilesInternal(Collection newFiles) throws IOException { - Collection currentFiles = storeFileManager.getStorefiles(); + Collection currentFiles = storeFileManager.getStoreFiles(); Collection compactedFiles = storeFileManager.getCompactedfiles(); if (currentFiles == null) { currentFiles = Collections.emptySet(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java index 9d918374702f..86a14047f138 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileManager.java @@ -96,7 +96,7 @@ void addCompactionResults(Collection compactedFiles, Collection getStorefiles(); + Collection getStoreFiles(); /** * List of compacted files inside this store that needs to be excluded in reads because further diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeStoreFileManager.java index 57b723992a15..8ac8397b868c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StripeStoreFileManager.java @@ -136,7 +136,7 @@ public void loadFiles(List storeFiles) { } @Override - public Collection getStorefiles() { + public Collection getStoreFiles() { return state.allFilesCached; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/StripeCompactionPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/StripeCompactionPolicy.java index f5be2b380382..9a00508cd00d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/StripeCompactionPolicy.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/StripeCompactionPolicy.java @@ -66,7 +66,7 @@ public List preSelectFilesForCoprocessor(StripeInformationProvider s // We sincerely hope nobody is messing with us with their coprocessors. // If they do, they are very likely to shoot themselves in the foot. // We'll just exclude all the filesCompacting from the list. - ArrayList candidateFiles = new ArrayList<>(si.getStorefiles()); + ArrayList candidateFiles = new ArrayList<>(si.getStoreFiles()); candidateFiles.removeAll(filesCompacting); return candidateFiles; } @@ -114,7 +114,7 @@ public StripeCompactionRequest selectCompaction(StripeInformationProvider si, // This can happen due to region split. We can skip it later; for now preserve // compact-all-things behavior. - Collection allFiles = si.getStorefiles(); + Collection allFiles = si.getStoreFiles(); if (StoreUtils.hasReferences(allFiles)) { LOG.debug("There are references in the store; compacting all files"); long targetKvs = estimateTargetKvs(allFiles, config.getInitialCount()).getFirst(); @@ -165,7 +165,7 @@ public StripeCompactionRequest selectCompaction(StripeInformationProvider si, public boolean needsCompactions(StripeInformationProvider si, List filesCompacting) { // Approximation on whether we need compaction. - return filesCompacting.isEmpty() && (StoreUtils.hasReferences(si.getStorefiles()) + return filesCompacting.isEmpty() && (StoreUtils.hasReferences(si.getStoreFiles()) || (si.getLevel0Files().size() >= this.config.getLevel0MinFiles()) || needsSingleStripeCompaction(si) || hasExpiredStripes(si) || allL0FilesExpired(si)); } @@ -577,7 +577,7 @@ public void setMajorRangeFull() { /** The information about stripes that the policy needs to do its stuff */ public static interface StripeInformationProvider { - public Collection getStorefiles(); + public Collection getStoreFiles(); /** * Gets the start row for a given stripe. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java index b0ad3083de78..ccc755a03580 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStore.java @@ -1764,7 +1764,7 @@ public void testAge() throws IOException { Arrays.asList(mockStoreFile(currentTime - 10), mockStoreFile(currentTime - 100), mockStoreFile(currentTime - 1000), mockStoreFile(currentTime - 10000)); StoreFileManager sfm = mock(StoreFileManager.class); - when(sfm.getStorefiles()).thenReturn(storefiles); + when(sfm.getStoreFiles()).thenReturn(storefiles); StoreEngine storeEngine = mock(StoreEngine.class); when(storeEngine.getStoreFileManager()).thenReturn(sfm); return storeEngine; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java index 3a5cab23fba3..ec5401a08b99 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStripeStoreFileManager.java @@ -117,7 +117,7 @@ public void testClearFiles() throws Exception { Collection allFiles = manager.clearFiles(); assertEquals(4, allFiles.size()); assertEquals(0, manager.getStorefileCount()); - assertEquals(0, manager.getStorefiles().size()); + assertEquals(0, manager.getStoreFiles().size()); } private static ArrayList dumpIterator(Iterator iter) { @@ -541,7 +541,7 @@ private void testPriorityScenario(int expectedPriority, int limit, int stripes, private void verifyInvalidCompactionScenario(StripeStoreFileManager manager, ArrayList filesToCompact, ArrayList filesToInsert) throws Exception { - Collection allFiles = manager.getStorefiles(); + Collection allFiles = manager.getStoreFiles(); assertThrows(IllegalStateException.class, () -> manager.addCompactionResults(filesToCompact, filesToInsert)); verifyAllFiles(manager, allFiles); // must have the same files. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestStripeCompactionPolicy.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestStripeCompactionPolicy.java index c4f98f4d94ad..295d0cc4c2fc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestStripeCompactionPolicy.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/compactions/TestStripeCompactionPolicy.java @@ -245,13 +245,13 @@ public void testWithReferences() throws Exception { when(ref.isReference()).thenReturn(true); StripeInformationProvider si = mock(StripeInformationProvider.class); Collection sfs = al(ref, createFile()); - when(si.getStorefiles()).thenReturn(sfs); + when(si.getStoreFiles()).thenReturn(sfs); assertTrue(policy.needsCompactions(si, al())); StripeCompactionPolicy.StripeCompactionRequest scr = policy.selectCompaction(si, al(), false); // UnmodifiableCollection does not implement equals so we need to change it here to a // collection that implements it. - assertEquals(si.getStorefiles(), new ArrayList<>(scr.getRequest().getFiles())); + assertEquals(si.getStoreFiles(), new ArrayList<>(scr.getRequest().getFiles())); scr.execute(sc, NoLimitThroughputController.INSTANCE, null); verify(sc, only()).compact(eq(scr.getRequest()), anyInt(), anyLong(), aryEq(OPEN_KEY), aryEq(OPEN_KEY), aryEq(OPEN_KEY), aryEq(OPEN_KEY), any(), any()); @@ -264,11 +264,11 @@ public void testInitialCountFromL0() throws Exception { StripeCompactionPolicy policy = createPolicy(conf, defaultSplitSize, defaultSplitCount, 2, false); StripeCompactionPolicy.StripeInformationProvider si = createStripesL0Only(3, 8); - verifyCompaction(policy, si, si.getStorefiles(), true, 2, 12L, OPEN_KEY, OPEN_KEY, true); + verifyCompaction(policy, si, si.getStoreFiles(), true, 2, 12L, OPEN_KEY, OPEN_KEY, true); si = createStripesL0Only(3, 10); // If result would be too large, split into smaller parts. - verifyCompaction(policy, si, si.getStorefiles(), true, 3, 10L, OPEN_KEY, OPEN_KEY, true); + verifyCompaction(policy, si, si.getStoreFiles(), true, 3, 10L, OPEN_KEY, OPEN_KEY, true); policy = createPolicy(conf, defaultSplitSize, defaultSplitCount, 6, false); - verifyCompaction(policy, si, si.getStorefiles(), true, 6, 5L, OPEN_KEY, OPEN_KEY, true); + verifyCompaction(policy, si, si.getStoreFiles(), true, 6, 5L, OPEN_KEY, OPEN_KEY, true); } @Test @@ -857,7 +857,7 @@ private static StripeInformationProvider createStripesWithFiles(List bou ConcatenatedLists sfs = new ConcatenatedLists<>(); sfs.addAllSublists(stripes); sfs.addSublist(l0Files); - when(si.getStorefiles()).thenReturn(sfs); + when(si.getStoreFiles()).thenReturn(sfs); when(si.getStripes()).thenReturn(stripes); when(si.getStripeBoundaries()).thenReturn(boundariesList); when(si.getStripeCount()).thenReturn(stripes.size()); From 99e791ead7695922d1cc315244f04529b051df0c Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Wed, 15 May 2024 20:19:30 -0700 Subject: [PATCH 25/27] Set liveSoteFile to null only when live file tracking is disabled --- .../hbase/regionserver/DefaultStoreFileManager.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index b20f75e1656f..5cba513a73c5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -72,10 +72,7 @@ static class StoreFileList { } } - private static final StoreFileList EMPTY_STORE_FILE_LIST = - new StoreFileList(ImmutableList.of(), null); - - private volatile StoreFileList storeFiles = EMPTY_STORE_FILE_LIST; + private volatile StoreFileList storeFiles; /** * List of compacted files inside this store that needs to be excluded in reads because further @@ -91,9 +88,11 @@ public DefaultStoreFileManager(CellComparator cellComparator, this.cellComparator = cellComparator; this.storeFileComparator = storeFileComparator; this.comConf = comConf; - this.blockingFileCount = + blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT); - this.enableLiveFileTracking = shouldEnableHistoricalCompactionFiles(conf); + enableLiveFileTracking = shouldEnableHistoricalCompactionFiles(conf); + storeFiles = new StoreFileList(ImmutableList.of(), + enableLiveFileTracking ? ImmutableList.of() : null); } private List getLiveFiles(Collection storeFiles) throws IOException { From a5c4e29d9c194b273dbd23e27f3a6ebb49b376bb Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Wed, 15 May 2024 22:27:33 -0700 Subject: [PATCH 26/27] Changes for spotless --- .../hadoop/hbase/regionserver/DefaultStoreFileManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java index 5cba513a73c5..920a490daa2a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DefaultStoreFileManager.java @@ -91,8 +91,8 @@ public DefaultStoreFileManager(CellComparator cellComparator, blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT); enableLiveFileTracking = shouldEnableHistoricalCompactionFiles(conf); - storeFiles = new StoreFileList(ImmutableList.of(), - enableLiveFileTracking ? ImmutableList.of() : null); + storeFiles = + new StoreFileList(ImmutableList.of(), enableLiveFileTracking ? ImmutableList.of() : null); } private List getLiveFiles(Collection storeFiles) throws IOException { From 29218e6e096dbe13f1eb67776bfcfd77eb4533ec Mon Sep 17 00:00:00 2001 From: Kadir Ozdemir Date: Thu, 16 May 2024 19:07:42 -0700 Subject: [PATCH 27/27] Use assertTrue instead of assert --- .../hadoop/hbase/regionserver/TestStoreFileWriter.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java index 6c880a5f5f4e..6146605cd23e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR; import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -223,7 +224,7 @@ private long getNewTimestamp(long timestamp) throws Exception { if (timestamp == newTimestamp) { Thread.sleep(1); newTimestamp = System.currentTimeMillis(); - assert (timestamp < newTimestamp); + assertTrue(timestamp < newTimestamp); } return newTimestamp; } @@ -341,9 +342,10 @@ private void scanAndCompare(boolean raw) throws Exception { for (int i = 0; i < size; i++) { Cell firstCell = firstRowList.get(i); Cell secondCell = secondRowList.get(i); - assert (CellUtil.matchingRowColumn(firstCell, secondCell)); - assert (firstCell.getType() == secondCell.getType()); - assert (Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell))); + assertTrue(CellUtil.matchingRowColumn(firstCell, secondCell)); + assertTrue(firstCell.getType() == secondCell.getType()); + assertTrue( + Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell))); } } while (firstHasMore && secondHasMore); assertEquals(firstHasMore, secondHasMore);