diff --git a/Classes/LevelDB.h b/Classes/LevelDB.h index 3fbf1a0..0c81e3b 100644 --- a/Classes/LevelDB.h +++ b/Classes/LevelDB.h @@ -333,9 +333,9 @@ NSData * NSDataFromLevelDBKey (LevelDBKey * key); */ - (void) enumerateKeysAndObjectsBackward:(BOOL)backward lazily:(BOOL)lazily - startingAtKey:(id)key - filteredByPredicate:(NSPredicate *)predicate - andPrefix:(id)prefix - usingBlock:(id)block; + startingAtKey:(id _Nullable)key + filteredByPredicate:(NSPredicate * _Nullable )predicate + andPrefix:(id _Nullable)prefix + usingBlock:(void(^ _Nonnull)(LevelDBKey * _Nonnull key, id _Nonnull value, BOOL * _Nonnull stop))block; @end diff --git a/Objective-LevelDB.xcodeproj/project.pbxproj b/Objective-LevelDB.xcodeproj/project.pbxproj new file mode 100644 index 0000000..8508883 --- /dev/null +++ b/Objective-LevelDB.xcodeproj/project.pbxproj @@ -0,0 +1,808 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 2D46163B23F31910006C4C7A /* Objective_LevelDB.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D46163923F31910006C4C7A /* Objective_LevelDB.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 2D46164923F31977006C4C7A /* LDBSnapshot.mm in Sources */ = {isa = PBXBuildFile; fileRef = 2D46164223F31976006C4C7A /* LDBSnapshot.mm */; settings = {COMPILER_FLAGS = "-fno-objc-arc -w -Xanalyzer -analyzer-disable-all-checks"; }; }; + 2D46164A23F31977006C4C7A /* LDBSnapshot.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D46164323F31976006C4C7A /* LDBSnapshot.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 2D46164B23F31977006C4C7A /* LevelDB.mm in Sources */ = {isa = PBXBuildFile; fileRef = 2D46164423F31976006C4C7A /* LevelDB.mm */; settings = {COMPILER_FLAGS = "-fno-objc-arc -w -Xanalyzer -analyzer-disable-all-checks"; }; }; + 2D46164C23F31977006C4C7A /* LevelDB.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D46164523F31976006C4C7A /* LevelDB.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 2D46164D23F31977006C4C7A /* LDBCommon.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D46164623F31976006C4C7A /* LDBCommon.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 2D46164E23F31977006C4C7A /* LDBWriteBatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D46164723F31976006C4C7A /* LDBWriteBatch.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 2D46164F23F31977006C4C7A /* LDBWriteBatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = 2D46164823F31976006C4C7A /* LDBWriteBatch.mm */; settings = {COMPILER_FLAGS = "-fno-objc-arc -w -Xanalyzer -analyzer-disable-all-checks"; }; }; + 2D461B3623F325EE006C4C7A /* testutil.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AD223F325EE006C4C7A /* testutil.cc */; }; + 2D461B3723F325EE006C4C7A /* status.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AD323F325EE006C4C7A /* status.cc */; }; + 2D461B3823F325EE006C4C7A /* arena.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AD423F325EE006C4C7A /* arena.cc */; }; + 2D461B3923F325EE006C4C7A /* bloom.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AD523F325EE006C4C7A /* bloom.cc */; }; + 2D461B3A23F325EE006C4C7A /* posix_logger.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AD623F325EE006C4C7A /* posix_logger.h */; }; + 2D461B3B23F325EE006C4C7A /* testharness.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AD723F325EE006C4C7A /* testharness.h */; }; + 2D461B3C23F325EE006C4C7A /* hash.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AD823F325EE006C4C7A /* hash.cc */; }; + 2D461B3D23F325EE006C4C7A /* coding.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AD923F325EE006C4C7A /* coding.h */; }; + 2D461B3E23F325EE006C4C7A /* logging.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461ADA23F325EE006C4C7A /* logging.h */; }; + 2D461B3F23F325EE006C4C7A /* filter_policy.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461ADB23F325EE006C4C7A /* filter_policy.cc */; }; + 2D461B4023F325EE006C4C7A /* histogram.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461ADC23F325EE006C4C7A /* histogram.cc */; }; + 2D461B4123F325EE006C4C7A /* crc32c.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461ADD23F325EE006C4C7A /* crc32c.cc */; }; + 2D461B4223F325EE006C4C7A /* mutexlock.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461ADE23F325EE006C4C7A /* mutexlock.h */; }; + 2D461B4323F325EE006C4C7A /* cache.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461ADF23F325EE006C4C7A /* cache.cc */; }; + 2D461B4423F325EE006C4C7A /* testharness.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AE023F325EE006C4C7A /* testharness.cc */; }; + 2D461B4523F325EE006C4C7A /* histogram.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AE123F325EE006C4C7A /* histogram.h */; }; + 2D461B4623F325EE006C4C7A /* crc32c.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AE223F325EE006C4C7A /* crc32c.h */; }; + 2D461B4723F325EE006C4C7A /* logging.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AE323F325EE006C4C7A /* logging.cc */; }; + 2D461B4823F325EE006C4C7A /* options.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AE423F325EE006C4C7A /* options.cc */; }; + 2D461B4923F325EE006C4C7A /* arena.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AE523F325EE006C4C7A /* arena.h */; }; + 2D461B4A23F325EE006C4C7A /* comparator.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AE623F325EE006C4C7A /* comparator.cc */; }; + 2D461B4B23F325EE006C4C7A /* env_posix_test_helper.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AE723F325EE006C4C7A /* env_posix_test_helper.h */; }; + 2D461B4C23F325EE006C4C7A /* coding.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AE823F325EE006C4C7A /* coding.cc */; }; + 2D461B4D23F325EE006C4C7A /* hash.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AE923F325EE006C4C7A /* hash.h */; }; + 2D461B4E23F325EE006C4C7A /* random.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AEA23F325EE006C4C7A /* random.h */; }; + 2D461B4F23F325EE006C4C7A /* env_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AEB23F325EE006C4C7A /* env_posix.cc */; }; + 2D461B5023F325EE006C4C7A /* testutil.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461AEC23F325EE006C4C7A /* testutil.h */; }; + 2D461B5123F325EE006C4C7A /* env.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AED23F325EE006C4C7A /* env.cc */; }; + 2D461B6023F325EE006C4C7A /* table.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461AFF23F325EE006C4C7A /* table.cc */; }; + 2D461B6123F325EE006C4C7A /* block.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0023F325EE006C4C7A /* block.cc */; }; + 2D461B6223F325EE006C4C7A /* merger.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0123F325EE006C4C7A /* merger.cc */; }; + 2D461B6323F325EE006C4C7A /* iterator.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0223F325EE006C4C7A /* iterator.cc */; }; + 2D461B6423F325EE006C4C7A /* block_builder.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0323F325EE006C4C7A /* block_builder.cc */; }; + 2D461B6523F325EE006C4C7A /* iterator_wrapper.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B0423F325EE006C4C7A /* iterator_wrapper.h */; }; + 2D461B6623F325EE006C4C7A /* merger.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B0523F325EE006C4C7A /* merger.h */; }; + 2D461B6723F325EE006C4C7A /* table_builder.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0623F325EE006C4C7A /* table_builder.cc */; }; + 2D461B6823F325EE006C4C7A /* block_builder.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B0723F325EE006C4C7A /* block_builder.h */; }; + 2D461B6923F325EE006C4C7A /* filter_block.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0823F325EE006C4C7A /* filter_block.cc */; }; + 2D461B6A23F325EE006C4C7A /* block.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B0923F325EE006C4C7A /* block.h */; }; + 2D461B6B23F325EE006C4C7A /* format.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0A23F325EE006C4C7A /* format.cc */; }; + 2D461B6C23F325EE006C4C7A /* filter_block.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B0B23F325EE006C4C7A /* filter_block.h */; }; + 2D461B6D23F325EE006C4C7A /* two_level_iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B0C23F325EE006C4C7A /* two_level_iterator.h */; }; + 2D461B6E23F325EE006C4C7A /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B0D23F325EE006C4C7A /* format.h */; }; + 2D461B6F23F325EE006C4C7A /* two_level_iterator.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B0E23F325EE006C4C7A /* two_level_iterator.cc */; }; + 2D461B7023F325EE006C4C7A /* log_format.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B1023F325EE006C4C7A /* log_format.h */; }; + 2D461B7123F325EE006C4C7A /* log_writer.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1123F325EE006C4C7A /* log_writer.cc */; }; + 2D461B7223F325EE006C4C7A /* table_cache.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B1223F325EE006C4C7A /* table_cache.h */; }; + 2D461B7323F325EE006C4C7A /* db_impl.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1323F325EE006C4C7A /* db_impl.cc */; }; + 2D461B7423F325EE006C4C7A /* filename.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1423F325EE006C4C7A /* filename.cc */; }; + 2D461B7523F325EE006C4C7A /* skiplist.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B1523F325EE006C4C7A /* skiplist.h */; }; + 2D461B7623F325EE006C4C7A /* dbformat.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B1623F325EE006C4C7A /* dbformat.h */; }; + 2D461B7723F325EE006C4C7A /* table_cache.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1723F325EE006C4C7A /* table_cache.cc */; }; + 2D461B7823F325EE006C4C7A /* log_reader.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1823F325EE006C4C7A /* log_reader.cc */; }; + 2D461B7923F325EE006C4C7A /* write_batch.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1923F325EE006C4C7A /* write_batch.cc */; }; + 2D461B7A23F325EE006C4C7A /* c.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1A23F325EE006C4C7A /* c.cc */; }; + 2D461B7B23F325EE006C4C7A /* builder.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1B23F325EE006C4C7A /* builder.cc */; }; + 2D461B7C23F325EE006C4C7A /* log_writer.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B1C23F325EE006C4C7A /* log_writer.h */; }; + 2D461B7D23F325EE006C4C7A /* version_edit.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B1D23F325EE006C4C7A /* version_edit.cc */; }; + 2D461B7E23F325EE006C4C7A /* db_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B1E23F325EE006C4C7A /* db_impl.h */; }; + 2D461B7F23F325EE006C4C7A /* db_iter.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B1F23F325EE006C4C7A /* db_iter.h */; }; + 2D461B8023F325EE006C4C7A /* dbformat.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B2023F325EE006C4C7A /* dbformat.cc */; }; + 2D461B8123F325EE006C4C7A /* memtable.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2123F325EE006C4C7A /* memtable.h */; }; + 2D461B8223F325EE006C4C7A /* version_edit.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2223F325EE006C4C7A /* version_edit.h */; }; + 2D461B8323F325EE006C4C7A /* version_set.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2323F325EE006C4C7A /* version_set.h */; }; + 2D461B8423F325EE006C4C7A /* builder.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2423F325EE006C4C7A /* builder.h */; }; + 2D461B8523F325EE006C4C7A /* version_set.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B2523F325EE006C4C7A /* version_set.cc */; }; + 2D461B8623F325EE006C4C7A /* dumpfile.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B2623F325EE006C4C7A /* dumpfile.cc */; }; + 2D461B8723F325EE006C4C7A /* log_reader.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2723F325EE006C4C7A /* log_reader.h */; }; + 2D461B8823F325EE006C4C7A /* write_batch_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2823F325EE006C4C7A /* write_batch_internal.h */; }; + 2D461B8923F325EE006C4C7A /* db_iter.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B2923F325EE006C4C7A /* db_iter.cc */; }; + 2D461B8A23F325EE006C4C7A /* repair.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B2A23F325EE006C4C7A /* repair.cc */; }; + 2D461B8B23F325EE006C4C7A /* filename.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2B23F325EE006C4C7A /* filename.h */; }; + 2D461B8C23F325EE006C4C7A /* snapshot.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2C23F325EE006C4C7A /* snapshot.h */; }; + 2D461B8D23F325EE006C4C7A /* memtable.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B2D23F325EE006C4C7A /* memtable.cc */; }; + 2D461B8E23F325EE006C4C7A /* atomic_pointer.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B2F23F325EE006C4C7A /* atomic_pointer.h */; }; + 2D461B8F23F325EE006C4C7A /* port.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B3023F325EE006C4C7A /* port.h */; }; + 2D461B9023F325EE006C4C7A /* port_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B3123F325EE006C4C7A /* port_posix.cc */; }; + 2D461B9123F325EE006C4C7A /* port_posix.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B3223F325EE006C4C7A /* port_posix.h */; }; + 2D461B9223F325EE006C4C7A /* port_example.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B3323F325EE006C4C7A /* port_example.h */; }; + 2D461B9323F325EE006C4C7A /* port_posix_sse.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2D461B3423F325EE006C4C7A /* port_posix_sse.cc */; }; + 2D461B9423F325EE006C4C7A /* thread_annotations.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B3523F325EE006C4C7A /* thread_annotations.h */; }; + 2D461BA423F327CF006C4C7A /* env.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9623F327CF006C4C7A /* env.h */; }; + 2D461BA523F327CF006C4C7A /* status.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9723F327CF006C4C7A /* status.h */; }; + 2D461BA623F327CF006C4C7A /* db.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9823F327CF006C4C7A /* db.h */; }; + 2D461BA723F327CF006C4C7A /* slice.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9923F327CF006C4C7A /* slice.h */; }; + 2D461BA823F327CF006C4C7A /* cache.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9A23F327CF006C4C7A /* cache.h */; }; + 2D461BA923F327CF006C4C7A /* write_batch.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9B23F327CF006C4C7A /* write_batch.h */; }; + 2D461BAA23F327CF006C4C7A /* options.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9C23F327CF006C4C7A /* options.h */; }; + 2D461BAB23F327CF006C4C7A /* dumpfile.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9D23F327CF006C4C7A /* dumpfile.h */; }; + 2D461BAC23F327CF006C4C7A /* c.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9E23F327CF006C4C7A /* c.h */; }; + 2D461BAD23F327CF006C4C7A /* filter_policy.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461B9F23F327CF006C4C7A /* filter_policy.h */; }; + 2D461BAE23F327CF006C4C7A /* comparator.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461BA023F327CF006C4C7A /* comparator.h */; }; + 2D461BAF23F327CF006C4C7A /* table.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461BA123F327CF006C4C7A /* table.h */; }; + 2D461BB023F327CF006C4C7A /* table_builder.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461BA223F327CF006C4C7A /* table_builder.h */; }; + 2D461BB123F327CF006C4C7A /* iterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D461BA323F327CF006C4C7A /* iterator.h */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 2D46163623F31910006C4C7A /* Objective_LevelDB.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Objective_LevelDB.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 2D46163923F31910006C4C7A /* Objective_LevelDB.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Objective_LevelDB.h; sourceTree = ""; }; + 2D46163A23F31910006C4C7A /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 2D46164223F31976006C4C7A /* LDBSnapshot.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = LDBSnapshot.mm; sourceTree = ""; }; + 2D46164323F31976006C4C7A /* LDBSnapshot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LDBSnapshot.h; sourceTree = ""; }; + 2D46164423F31976006C4C7A /* LevelDB.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = LevelDB.mm; sourceTree = ""; }; + 2D46164523F31976006C4C7A /* LevelDB.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LevelDB.h; sourceTree = ""; }; + 2D46164623F31976006C4C7A /* LDBCommon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LDBCommon.h; sourceTree = ""; }; + 2D46164723F31976006C4C7A /* LDBWriteBatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LDBWriteBatch.h; sourceTree = ""; }; + 2D46164823F31976006C4C7A /* LDBWriteBatch.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = LDBWriteBatch.mm; sourceTree = ""; }; + 2D461AD223F325EE006C4C7A /* testutil.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = testutil.cc; sourceTree = ""; }; + 2D461AD323F325EE006C4C7A /* status.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = status.cc; sourceTree = ""; }; + 2D461AD423F325EE006C4C7A /* arena.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = arena.cc; sourceTree = ""; }; + 2D461AD523F325EE006C4C7A /* bloom.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bloom.cc; sourceTree = ""; }; + 2D461AD623F325EE006C4C7A /* posix_logger.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = posix_logger.h; sourceTree = ""; }; + 2D461AD723F325EE006C4C7A /* testharness.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = testharness.h; sourceTree = ""; }; + 2D461AD823F325EE006C4C7A /* hash.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hash.cc; sourceTree = ""; }; + 2D461AD923F325EE006C4C7A /* coding.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = coding.h; sourceTree = ""; }; + 2D461ADA23F325EE006C4C7A /* logging.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = logging.h; sourceTree = ""; }; + 2D461ADB23F325EE006C4C7A /* filter_policy.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filter_policy.cc; sourceTree = ""; }; + 2D461ADC23F325EE006C4C7A /* histogram.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = histogram.cc; sourceTree = ""; }; + 2D461ADD23F325EE006C4C7A /* crc32c.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = crc32c.cc; sourceTree = ""; }; + 2D461ADE23F325EE006C4C7A /* mutexlock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mutexlock.h; sourceTree = ""; }; + 2D461ADF23F325EE006C4C7A /* cache.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cache.cc; sourceTree = ""; }; + 2D461AE023F325EE006C4C7A /* testharness.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = testharness.cc; sourceTree = ""; }; + 2D461AE123F325EE006C4C7A /* histogram.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = histogram.h; sourceTree = ""; }; + 2D461AE223F325EE006C4C7A /* crc32c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = crc32c.h; sourceTree = ""; }; + 2D461AE323F325EE006C4C7A /* logging.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = logging.cc; sourceTree = ""; }; + 2D461AE423F325EE006C4C7A /* options.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = options.cc; sourceTree = ""; }; + 2D461AE523F325EE006C4C7A /* arena.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = arena.h; sourceTree = ""; }; + 2D461AE623F325EE006C4C7A /* comparator.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = comparator.cc; sourceTree = ""; }; + 2D461AE723F325EE006C4C7A /* env_posix_test_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = env_posix_test_helper.h; sourceTree = ""; }; + 2D461AE823F325EE006C4C7A /* coding.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = coding.cc; sourceTree = ""; }; + 2D461AE923F325EE006C4C7A /* hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hash.h; sourceTree = ""; }; + 2D461AEA23F325EE006C4C7A /* random.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = random.h; sourceTree = ""; }; + 2D461AEB23F325EE006C4C7A /* env_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = env_posix.cc; sourceTree = ""; }; + 2D461AEC23F325EE006C4C7A /* testutil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = testutil.h; sourceTree = ""; }; + 2D461AED23F325EE006C4C7A /* env.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = env.cc; sourceTree = ""; }; + 2D461AFF23F325EE006C4C7A /* table.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = table.cc; sourceTree = ""; }; + 2D461B0023F325EE006C4C7A /* block.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = block.cc; sourceTree = ""; }; + 2D461B0123F325EE006C4C7A /* merger.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = merger.cc; sourceTree = ""; }; + 2D461B0223F325EE006C4C7A /* iterator.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = iterator.cc; sourceTree = ""; }; + 2D461B0323F325EE006C4C7A /* block_builder.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = block_builder.cc; sourceTree = ""; }; + 2D461B0423F325EE006C4C7A /* iterator_wrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = iterator_wrapper.h; sourceTree = ""; }; + 2D461B0523F325EE006C4C7A /* merger.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = merger.h; sourceTree = ""; }; + 2D461B0623F325EE006C4C7A /* table_builder.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = table_builder.cc; sourceTree = ""; }; + 2D461B0723F325EE006C4C7A /* block_builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = block_builder.h; sourceTree = ""; }; + 2D461B0823F325EE006C4C7A /* filter_block.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filter_block.cc; sourceTree = ""; }; + 2D461B0923F325EE006C4C7A /* block.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = block.h; sourceTree = ""; }; + 2D461B0A23F325EE006C4C7A /* format.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = format.cc; sourceTree = ""; }; + 2D461B0B23F325EE006C4C7A /* filter_block.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filter_block.h; sourceTree = ""; }; + 2D461B0C23F325EE006C4C7A /* two_level_iterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = two_level_iterator.h; sourceTree = ""; }; + 2D461B0D23F325EE006C4C7A /* format.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = format.h; sourceTree = ""; }; + 2D461B0E23F325EE006C4C7A /* two_level_iterator.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = two_level_iterator.cc; sourceTree = ""; }; + 2D461B1023F325EE006C4C7A /* log_format.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = log_format.h; sourceTree = ""; }; + 2D461B1123F325EE006C4C7A /* log_writer.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = log_writer.cc; sourceTree = ""; }; + 2D461B1223F325EE006C4C7A /* table_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = table_cache.h; sourceTree = ""; }; + 2D461B1323F325EE006C4C7A /* db_impl.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = db_impl.cc; sourceTree = ""; }; + 2D461B1423F325EE006C4C7A /* filename.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filename.cc; sourceTree = ""; }; + 2D461B1523F325EE006C4C7A /* skiplist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = skiplist.h; sourceTree = ""; }; + 2D461B1623F325EE006C4C7A /* dbformat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dbformat.h; sourceTree = ""; }; + 2D461B1723F325EE006C4C7A /* table_cache.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = table_cache.cc; sourceTree = ""; }; + 2D461B1823F325EE006C4C7A /* log_reader.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = log_reader.cc; sourceTree = ""; }; + 2D461B1923F325EE006C4C7A /* write_batch.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = write_batch.cc; sourceTree = ""; }; + 2D461B1A23F325EE006C4C7A /* c.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = c.cc; sourceTree = ""; }; + 2D461B1B23F325EE006C4C7A /* builder.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = builder.cc; sourceTree = ""; }; + 2D461B1C23F325EE006C4C7A /* log_writer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = log_writer.h; sourceTree = ""; }; + 2D461B1D23F325EE006C4C7A /* version_edit.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = version_edit.cc; sourceTree = ""; }; + 2D461B1E23F325EE006C4C7A /* db_impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = db_impl.h; sourceTree = ""; }; + 2D461B1F23F325EE006C4C7A /* db_iter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = db_iter.h; sourceTree = ""; }; + 2D461B2023F325EE006C4C7A /* dbformat.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbformat.cc; sourceTree = ""; }; + 2D461B2123F325EE006C4C7A /* memtable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memtable.h; sourceTree = ""; }; + 2D461B2223F325EE006C4C7A /* version_edit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = version_edit.h; sourceTree = ""; }; + 2D461B2323F325EE006C4C7A /* version_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = version_set.h; sourceTree = ""; }; + 2D461B2423F325EE006C4C7A /* builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builder.h; sourceTree = ""; }; + 2D461B2523F325EE006C4C7A /* version_set.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = version_set.cc; sourceTree = ""; }; + 2D461B2623F325EE006C4C7A /* dumpfile.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dumpfile.cc; sourceTree = ""; }; + 2D461B2723F325EE006C4C7A /* log_reader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = log_reader.h; sourceTree = ""; }; + 2D461B2823F325EE006C4C7A /* write_batch_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = write_batch_internal.h; sourceTree = ""; }; + 2D461B2923F325EE006C4C7A /* db_iter.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = db_iter.cc; sourceTree = ""; }; + 2D461B2A23F325EE006C4C7A /* repair.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = repair.cc; sourceTree = ""; }; + 2D461B2B23F325EE006C4C7A /* filename.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filename.h; sourceTree = ""; }; + 2D461B2C23F325EE006C4C7A /* snapshot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = snapshot.h; sourceTree = ""; }; + 2D461B2D23F325EE006C4C7A /* memtable.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memtable.cc; sourceTree = ""; }; + 2D461B2F23F325EE006C4C7A /* atomic_pointer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_pointer.h; sourceTree = ""; }; + 2D461B3023F325EE006C4C7A /* port.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = port.h; sourceTree = ""; }; + 2D461B3123F325EE006C4C7A /* port_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = port_posix.cc; sourceTree = ""; }; + 2D461B3223F325EE006C4C7A /* port_posix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = port_posix.h; sourceTree = ""; }; + 2D461B3323F325EE006C4C7A /* port_example.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = port_example.h; sourceTree = ""; }; + 2D461B3423F325EE006C4C7A /* port_posix_sse.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = port_posix_sse.cc; sourceTree = ""; }; + 2D461B3523F325EE006C4C7A /* thread_annotations.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = thread_annotations.h; sourceTree = ""; }; + 2D461B9623F327CF006C4C7A /* env.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = env.h; sourceTree = ""; }; + 2D461B9723F327CF006C4C7A /* status.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = status.h; sourceTree = ""; }; + 2D461B9823F327CF006C4C7A /* db.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = db.h; sourceTree = ""; }; + 2D461B9923F327CF006C4C7A /* slice.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slice.h; sourceTree = ""; }; + 2D461B9A23F327CF006C4C7A /* cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cache.h; sourceTree = ""; }; + 2D461B9B23F327CF006C4C7A /* write_batch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = write_batch.h; sourceTree = ""; }; + 2D461B9C23F327CF006C4C7A /* options.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = options.h; sourceTree = ""; }; + 2D461B9D23F327CF006C4C7A /* dumpfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dumpfile.h; sourceTree = ""; }; + 2D461B9E23F327CF006C4C7A /* c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = c.h; sourceTree = ""; }; + 2D461B9F23F327CF006C4C7A /* filter_policy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filter_policy.h; sourceTree = ""; }; + 2D461BA023F327CF006C4C7A /* comparator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = comparator.h; sourceTree = ""; }; + 2D461BA123F327CF006C4C7A /* table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = table.h; sourceTree = ""; }; + 2D461BA223F327CF006C4C7A /* table_builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = table_builder.h; sourceTree = ""; }; + 2D461BA323F327CF006C4C7A /* iterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = iterator.h; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 2D46163323F31910006C4C7A /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 2D46162C23F31910006C4C7A = { + isa = PBXGroup; + children = ( + 2D46163823F31910006C4C7A /* Objective-LevelDB */, + 2D461AD023F325EE006C4C7A /* leveldb-library */, + 2D46163723F31910006C4C7A /* Products */, + ); + sourceTree = ""; + }; + 2D46163723F31910006C4C7A /* Products */ = { + isa = PBXGroup; + children = ( + 2D46163623F31910006C4C7A /* Objective_LevelDB.framework */, + ); + name = Products; + sourceTree = ""; + }; + 2D46163823F31910006C4C7A /* Objective-LevelDB */ = { + isa = PBXGroup; + children = ( + 2D46163923F31910006C4C7A /* Objective_LevelDB.h */, + 2D46163A23F31910006C4C7A /* Info.plist */, + 2D46164123F31976006C4C7A /* Classes */, + ); + path = "Objective-LevelDB"; + sourceTree = ""; + }; + 2D46164123F31976006C4C7A /* Classes */ = { + isa = PBXGroup; + children = ( + 2D46164223F31976006C4C7A /* LDBSnapshot.mm */, + 2D46164323F31976006C4C7A /* LDBSnapshot.h */, + 2D46164423F31976006C4C7A /* LevelDB.mm */, + 2D46164523F31976006C4C7A /* LevelDB.h */, + 2D46164623F31976006C4C7A /* LDBCommon.h */, + 2D46164723F31976006C4C7A /* LDBWriteBatch.h */, + 2D46164823F31976006C4C7A /* LDBWriteBatch.mm */, + ); + path = Classes; + sourceTree = SOURCE_ROOT; + }; + 2D461AD023F325EE006C4C7A /* leveldb-library */ = { + isa = PBXGroup; + children = ( + 2D461B9523F327CF006C4C7A /* leveldb */, + 2D461AD123F325EE006C4C7A /* util */, + 2D461AFE23F325EE006C4C7A /* table */, + 2D461B0F23F325EE006C4C7A /* db */, + 2D461B2E23F325EE006C4C7A /* port */, + ); + path = "leveldb-library"; + sourceTree = ""; + }; + 2D461AD123F325EE006C4C7A /* util */ = { + isa = PBXGroup; + children = ( + 2D461AD223F325EE006C4C7A /* testutil.cc */, + 2D461AD323F325EE006C4C7A /* status.cc */, + 2D461AD423F325EE006C4C7A /* arena.cc */, + 2D461AD523F325EE006C4C7A /* bloom.cc */, + 2D461AD623F325EE006C4C7A /* posix_logger.h */, + 2D461AD723F325EE006C4C7A /* testharness.h */, + 2D461AD823F325EE006C4C7A /* hash.cc */, + 2D461AD923F325EE006C4C7A /* coding.h */, + 2D461ADA23F325EE006C4C7A /* logging.h */, + 2D461ADB23F325EE006C4C7A /* filter_policy.cc */, + 2D461ADC23F325EE006C4C7A /* histogram.cc */, + 2D461ADD23F325EE006C4C7A /* crc32c.cc */, + 2D461ADE23F325EE006C4C7A /* mutexlock.h */, + 2D461ADF23F325EE006C4C7A /* cache.cc */, + 2D461AE023F325EE006C4C7A /* testharness.cc */, + 2D461AE123F325EE006C4C7A /* histogram.h */, + 2D461AE223F325EE006C4C7A /* crc32c.h */, + 2D461AE323F325EE006C4C7A /* logging.cc */, + 2D461AE423F325EE006C4C7A /* options.cc */, + 2D461AE523F325EE006C4C7A /* arena.h */, + 2D461AE623F325EE006C4C7A /* comparator.cc */, + 2D461AE723F325EE006C4C7A /* env_posix_test_helper.h */, + 2D461AE823F325EE006C4C7A /* coding.cc */, + 2D461AE923F325EE006C4C7A /* hash.h */, + 2D461AEA23F325EE006C4C7A /* random.h */, + 2D461AEB23F325EE006C4C7A /* env_posix.cc */, + 2D461AEC23F325EE006C4C7A /* testutil.h */, + 2D461AED23F325EE006C4C7A /* env.cc */, + ); + path = util; + sourceTree = ""; + }; + 2D461AFE23F325EE006C4C7A /* table */ = { + isa = PBXGroup; + children = ( + 2D461AFF23F325EE006C4C7A /* table.cc */, + 2D461B0023F325EE006C4C7A /* block.cc */, + 2D461B0123F325EE006C4C7A /* merger.cc */, + 2D461B0223F325EE006C4C7A /* iterator.cc */, + 2D461B0323F325EE006C4C7A /* block_builder.cc */, + 2D461B0423F325EE006C4C7A /* iterator_wrapper.h */, + 2D461B0523F325EE006C4C7A /* merger.h */, + 2D461B0623F325EE006C4C7A /* table_builder.cc */, + 2D461B0723F325EE006C4C7A /* block_builder.h */, + 2D461B0823F325EE006C4C7A /* filter_block.cc */, + 2D461B0923F325EE006C4C7A /* block.h */, + 2D461B0A23F325EE006C4C7A /* format.cc */, + 2D461B0B23F325EE006C4C7A /* filter_block.h */, + 2D461B0C23F325EE006C4C7A /* two_level_iterator.h */, + 2D461B0D23F325EE006C4C7A /* format.h */, + 2D461B0E23F325EE006C4C7A /* two_level_iterator.cc */, + ); + path = table; + sourceTree = ""; + }; + 2D461B0F23F325EE006C4C7A /* db */ = { + isa = PBXGroup; + children = ( + 2D461B1023F325EE006C4C7A /* log_format.h */, + 2D461B1123F325EE006C4C7A /* log_writer.cc */, + 2D461B1223F325EE006C4C7A /* table_cache.h */, + 2D461B1323F325EE006C4C7A /* db_impl.cc */, + 2D461B1423F325EE006C4C7A /* filename.cc */, + 2D461B1523F325EE006C4C7A /* skiplist.h */, + 2D461B1623F325EE006C4C7A /* dbformat.h */, + 2D461B1723F325EE006C4C7A /* table_cache.cc */, + 2D461B1823F325EE006C4C7A /* log_reader.cc */, + 2D461B1923F325EE006C4C7A /* write_batch.cc */, + 2D461B1A23F325EE006C4C7A /* c.cc */, + 2D461B1B23F325EE006C4C7A /* builder.cc */, + 2D461B1C23F325EE006C4C7A /* log_writer.h */, + 2D461B1D23F325EE006C4C7A /* version_edit.cc */, + 2D461B1E23F325EE006C4C7A /* db_impl.h */, + 2D461B1F23F325EE006C4C7A /* db_iter.h */, + 2D461B2023F325EE006C4C7A /* dbformat.cc */, + 2D461B2123F325EE006C4C7A /* memtable.h */, + 2D461B2223F325EE006C4C7A /* version_edit.h */, + 2D461B2323F325EE006C4C7A /* version_set.h */, + 2D461B2423F325EE006C4C7A /* builder.h */, + 2D461B2523F325EE006C4C7A /* version_set.cc */, + 2D461B2623F325EE006C4C7A /* dumpfile.cc */, + 2D461B2723F325EE006C4C7A /* log_reader.h */, + 2D461B2823F325EE006C4C7A /* write_batch_internal.h */, + 2D461B2923F325EE006C4C7A /* db_iter.cc */, + 2D461B2A23F325EE006C4C7A /* repair.cc */, + 2D461B2B23F325EE006C4C7A /* filename.h */, + 2D461B2C23F325EE006C4C7A /* snapshot.h */, + 2D461B2D23F325EE006C4C7A /* memtable.cc */, + ); + path = db; + sourceTree = ""; + }; + 2D461B2E23F325EE006C4C7A /* port */ = { + isa = PBXGroup; + children = ( + 2D461B2F23F325EE006C4C7A /* atomic_pointer.h */, + 2D461B3023F325EE006C4C7A /* port.h */, + 2D461B3123F325EE006C4C7A /* port_posix.cc */, + 2D461B3223F325EE006C4C7A /* port_posix.h */, + 2D461B3323F325EE006C4C7A /* port_example.h */, + 2D461B3423F325EE006C4C7A /* port_posix_sse.cc */, + 2D461B3523F325EE006C4C7A /* thread_annotations.h */, + ); + path = port; + sourceTree = ""; + }; + 2D461B9523F327CF006C4C7A /* leveldb */ = { + isa = PBXGroup; + children = ( + 2D461B9623F327CF006C4C7A /* env.h */, + 2D461B9723F327CF006C4C7A /* status.h */, + 2D461B9823F327CF006C4C7A /* db.h */, + 2D461B9923F327CF006C4C7A /* slice.h */, + 2D461B9A23F327CF006C4C7A /* cache.h */, + 2D461B9B23F327CF006C4C7A /* write_batch.h */, + 2D461B9C23F327CF006C4C7A /* options.h */, + 2D461B9D23F327CF006C4C7A /* dumpfile.h */, + 2D461B9E23F327CF006C4C7A /* c.h */, + 2D461B9F23F327CF006C4C7A /* filter_policy.h */, + 2D461BA023F327CF006C4C7A /* comparator.h */, + 2D461BA123F327CF006C4C7A /* table.h */, + 2D461BA223F327CF006C4C7A /* table_builder.h */, + 2D461BA323F327CF006C4C7A /* iterator.h */, + ); + path = leveldb; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 2D46163123F31910006C4C7A /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 2D461B3A23F325EE006C4C7A /* posix_logger.h in Headers */, + 2D461B8723F325EE006C4C7A /* log_reader.h in Headers */, + 2D461B4523F325EE006C4C7A /* histogram.h in Headers */, + 2D461B8223F325EE006C4C7A /* version_edit.h in Headers */, + 2D461B5023F325EE006C4C7A /* testutil.h in Headers */, + 2D461B4623F325EE006C4C7A /* crc32c.h in Headers */, + 2D461BAE23F327CF006C4C7A /* comparator.h in Headers */, + 2D461B4223F325EE006C4C7A /* mutexlock.h in Headers */, + 2D461BB023F327CF006C4C7A /* table_builder.h in Headers */, + 2D461BA623F327CF006C4C7A /* db.h in Headers */, + 2D46164E23F31977006C4C7A /* LDBWriteBatch.h in Headers */, + 2D461B8B23F325EE006C4C7A /* filename.h in Headers */, + 2D461BAD23F327CF006C4C7A /* filter_policy.h in Headers */, + 2D461B7523F325EE006C4C7A /* skiplist.h in Headers */, + 2D461B4D23F325EE006C4C7A /* hash.h in Headers */, + 2D461B9223F325EE006C4C7A /* port_example.h in Headers */, + 2D461BA423F327CF006C4C7A /* env.h in Headers */, + 2D461BAC23F327CF006C4C7A /* c.h in Headers */, + 2D461B3D23F325EE006C4C7A /* coding.h in Headers */, + 2D461BA523F327CF006C4C7A /* status.h in Headers */, + 2D461B8823F325EE006C4C7A /* write_batch_internal.h in Headers */, + 2D461B6D23F325EE006C4C7A /* two_level_iterator.h in Headers */, + 2D461B6823F325EE006C4C7A /* block_builder.h in Headers */, + 2D46164D23F31977006C4C7A /* LDBCommon.h in Headers */, + 2D461B6A23F325EE006C4C7A /* block.h in Headers */, + 2D461BA923F327CF006C4C7A /* write_batch.h in Headers */, + 2D461B8323F325EE006C4C7A /* version_set.h in Headers */, + 2D461B8123F325EE006C4C7A /* memtable.h in Headers */, + 2D461B3E23F325EE006C4C7A /* logging.h in Headers */, + 2D46164C23F31977006C4C7A /* LevelDB.h in Headers */, + 2D461B7023F325EE006C4C7A /* log_format.h in Headers */, + 2D46163B23F31910006C4C7A /* Objective_LevelDB.h in Headers */, + 2D461B8423F325EE006C4C7A /* builder.h in Headers */, + 2D461B7223F325EE006C4C7A /* table_cache.h in Headers */, + 2D461B6523F325EE006C4C7A /* iterator_wrapper.h in Headers */, + 2D461B8E23F325EE006C4C7A /* atomic_pointer.h in Headers */, + 2D461B7623F325EE006C4C7A /* dbformat.h in Headers */, + 2D461B3B23F325EE006C4C7A /* testharness.h in Headers */, + 2D461BAB23F327CF006C4C7A /* dumpfile.h in Headers */, + 2D461B6C23F325EE006C4C7A /* filter_block.h in Headers */, + 2D461B4923F325EE006C4C7A /* arena.h in Headers */, + 2D461B8F23F325EE006C4C7A /* port.h in Headers */, + 2D461B9423F325EE006C4C7A /* thread_annotations.h in Headers */, + 2D461B4B23F325EE006C4C7A /* env_posix_test_helper.h in Headers */, + 2D461B8C23F325EE006C4C7A /* snapshot.h in Headers */, + 2D461BAA23F327CF006C4C7A /* options.h in Headers */, + 2D461BAF23F327CF006C4C7A /* table.h in Headers */, + 2D46164A23F31977006C4C7A /* LDBSnapshot.h in Headers */, + 2D461BA823F327CF006C4C7A /* cache.h in Headers */, + 2D461B9123F325EE006C4C7A /* port_posix.h in Headers */, + 2D461B4E23F325EE006C4C7A /* random.h in Headers */, + 2D461B6623F325EE006C4C7A /* merger.h in Headers */, + 2D461B6E23F325EE006C4C7A /* format.h in Headers */, + 2D461B7E23F325EE006C4C7A /* db_impl.h in Headers */, + 2D461BB123F327CF006C4C7A /* iterator.h in Headers */, + 2D461BA723F327CF006C4C7A /* slice.h in Headers */, + 2D461B7F23F325EE006C4C7A /* db_iter.h in Headers */, + 2D461B7C23F325EE006C4C7A /* log_writer.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 2D46163523F31910006C4C7A /* Objective-LevelDB */ = { + isa = PBXNativeTarget; + buildConfigurationList = 2D46163E23F31910006C4C7A /* Build configuration list for PBXNativeTarget "Objective-LevelDB" */; + buildPhases = ( + 2D46163123F31910006C4C7A /* Headers */, + 2D46163223F31910006C4C7A /* Sources */, + 2D46163323F31910006C4C7A /* Frameworks */, + 2D46163423F31910006C4C7A /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "Objective-LevelDB"; + productName = "Objective-LevelDB"; + productReference = 2D46163623F31910006C4C7A /* Objective_LevelDB.framework */; + productType = "com.apple.product-type.framework"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 2D46162D23F31910006C4C7A /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 1130; + ORGANIZATIONNAME = AppUnite; + TargetAttributes = { + 2D46163523F31910006C4C7A = { + CreatedOnToolsVersion = 11.3.1; + LastSwiftMigration = 1130; + }; + }; + }; + buildConfigurationList = 2D46163023F31910006C4C7A /* Build configuration list for PBXProject "Objective-LevelDB" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 2D46162C23F31910006C4C7A; + productRefGroup = 2D46163723F31910006C4C7A /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 2D46163523F31910006C4C7A /* Objective-LevelDB */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 2D46163423F31910006C4C7A /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 2D46163223F31910006C4C7A /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 2D46164B23F31977006C4C7A /* LevelDB.mm in Sources */, + 2D461B5123F325EE006C4C7A /* env.cc in Sources */, + 2D461B4423F325EE006C4C7A /* testharness.cc in Sources */, + 2D461B7323F325EE006C4C7A /* db_impl.cc in Sources */, + 2D461B3823F325EE006C4C7A /* arena.cc in Sources */, + 2D461B8923F325EE006C4C7A /* db_iter.cc in Sources */, + 2D461B3723F325EE006C4C7A /* status.cc in Sources */, + 2D461B4823F325EE006C4C7A /* options.cc in Sources */, + 2D461B3623F325EE006C4C7A /* testutil.cc in Sources */, + 2D461B3923F325EE006C4C7A /* bloom.cc in Sources */, + 2D461B6723F325EE006C4C7A /* table_builder.cc in Sources */, + 2D461B6123F325EE006C4C7A /* block.cc in Sources */, + 2D461B7A23F325EE006C4C7A /* c.cc in Sources */, + 2D46164F23F31977006C4C7A /* LDBWriteBatch.mm in Sources */, + 2D461B7B23F325EE006C4C7A /* builder.cc in Sources */, + 2D461B3F23F325EE006C4C7A /* filter_policy.cc in Sources */, + 2D461B8A23F325EE006C4C7A /* repair.cc in Sources */, + 2D461B4F23F325EE006C4C7A /* env_posix.cc in Sources */, + 2D461B4A23F325EE006C4C7A /* comparator.cc in Sources */, + 2D461B7423F325EE006C4C7A /* filename.cc in Sources */, + 2D461B7D23F325EE006C4C7A /* version_edit.cc in Sources */, + 2D46164923F31977006C4C7A /* LDBSnapshot.mm in Sources */, + 2D461B8023F325EE006C4C7A /* dbformat.cc in Sources */, + 2D461B6F23F325EE006C4C7A /* two_level_iterator.cc in Sources */, + 2D461B4323F325EE006C4C7A /* cache.cc in Sources */, + 2D461B4C23F325EE006C4C7A /* coding.cc in Sources */, + 2D461B4023F325EE006C4C7A /* histogram.cc in Sources */, + 2D461B6323F325EE006C4C7A /* iterator.cc in Sources */, + 2D461B6223F325EE006C4C7A /* merger.cc in Sources */, + 2D461B6023F325EE006C4C7A /* table.cc in Sources */, + 2D461B7923F325EE006C4C7A /* write_batch.cc in Sources */, + 2D461B8623F325EE006C4C7A /* dumpfile.cc in Sources */, + 2D461B9023F325EE006C4C7A /* port_posix.cc in Sources */, + 2D461B7823F325EE006C4C7A /* log_reader.cc in Sources */, + 2D461B6B23F325EE006C4C7A /* format.cc in Sources */, + 2D461B4723F325EE006C4C7A /* logging.cc in Sources */, + 2D461B6423F325EE006C4C7A /* block_builder.cc in Sources */, + 2D461B7123F325EE006C4C7A /* log_writer.cc in Sources */, + 2D461B6923F325EE006C4C7A /* filter_block.cc in Sources */, + 2D461B7723F325EE006C4C7A /* table_cache.cc in Sources */, + 2D461B8D23F325EE006C4C7A /* memtable.cc in Sources */, + 2D461B8523F325EE006C4C7A /* version_set.cc in Sources */, + 2D461B4123F325EE006C4C7A /* crc32c.cc in Sources */, + 2D461B3C23F325EE006C4C7A /* hash.cc in Sources */, + 2D461B9323F325EE006C4C7A /* port_posix_sse.cc in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 2D46163C23F31910006C4C7A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + LEVELDB_PLATFORM_POSIX, + OS_MACOSX, + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_OBJC_BRIDGING_HEADER = "$(SRCROOT)/Objective-LevelDB/Objective_LevelDB.h"; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Debug; + }; + 2D46163D23F31910006C4C7A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_NO_COMMON_BLOCKS = YES; + GCC_PREPROCESSOR_DEFINITIONS = ( + "$(inherited)", + LEVELDB_PLATFORM_POSIX, + OS_MACOSX, + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 8.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_OBJC_BRIDGING_HEADER = "$(SRCROOT)/Objective-LevelDB/Objective_LevelDB.h"; + VALIDATE_PRODUCT = YES; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Release; + }; + 2D46163F23F31910006C4C7A /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_STYLE = Automatic; + DEFINES_MODULE = YES; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + "$(SRCROOT)/leveldb-library", + ); + INFOPLIST_FILE = "Objective-LevelDB/Info.plist"; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "com.appunite.Objective-LevelDB"; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 2D46164023F31910006C4C7A /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_STYLE = Automatic; + DEFINES_MODULE = YES; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + HEADER_SEARCH_PATHS = ( + "$(inherited)", + "$(SRCROOT)/leveldb-library", + ); + INFOPLIST_FILE = "Objective-LevelDB/Info.plist"; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "com.appunite.Objective-LevelDB"; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 2D46163023F31910006C4C7A /* Build configuration list for PBXProject "Objective-LevelDB" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 2D46163C23F31910006C4C7A /* Debug */, + 2D46163D23F31910006C4C7A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 2D46163E23F31910006C4C7A /* Build configuration list for PBXNativeTarget "Objective-LevelDB" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 2D46163F23F31910006C4C7A /* Debug */, + 2D46164023F31910006C4C7A /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 2D46162D23F31910006C4C7A /* Project object */; +} diff --git a/Objective-LevelDB.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/Objective-LevelDB.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 0000000..635f889 --- /dev/null +++ b/Objective-LevelDB.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/Objective-LevelDB.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/Objective-LevelDB.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 0000000..18d9810 --- /dev/null +++ b/Objective-LevelDB.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/Objective-LevelDB.xcodeproj/xcshareddata/xcschemes/Objective-LevelDB.xcscheme b/Objective-LevelDB.xcodeproj/xcshareddata/xcschemes/Objective-LevelDB.xcscheme new file mode 100644 index 0000000..48f4396 --- /dev/null +++ b/Objective-LevelDB.xcodeproj/xcshareddata/xcschemes/Objective-LevelDB.xcscheme @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Objective-LevelDB/Info.plist b/Objective-LevelDB/Info.plist new file mode 100644 index 0000000..9bcb244 --- /dev/null +++ b/Objective-LevelDB/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + $(CURRENT_PROJECT_VERSION) + + diff --git a/Objective-LevelDB/Objective_LevelDB.h b/Objective-LevelDB/Objective_LevelDB.h new file mode 100644 index 0000000..3b916dc --- /dev/null +++ b/Objective-LevelDB/Objective_LevelDB.h @@ -0,0 +1,19 @@ +// +// Objective_LevelDB.h +// Objective-LevelDB +// +// Created by Damian Kolasiński on 11/02/2020. +// Copyright © 2020 AppUnite. All rights reserved. +// + +#import + +//! Project version number for Objective_LevelDB. +FOUNDATION_EXPORT double Objective_LevelDBVersionNumber; + +//! Project version string for Objective_LevelDB. +FOUNDATION_EXPORT const unsigned char Objective_LevelDBVersionString[]; + +// In this header, you should import all the public headers of your framework using statements like #import + +#import "LevelDB.h" diff --git a/Objective-LevelDB.podspec b/ObjectiveLevelDBappunite.podspec similarity index 90% rename from Objective-LevelDB.podspec rename to ObjectiveLevelDBappunite.podspec index dfc9048..5d42a51 100644 --- a/Objective-LevelDB.podspec +++ b/ObjectiveLevelDBappunite.podspec @@ -1,6 +1,6 @@ Pod::Spec.new do |s| - s.name = 'Objective-LevelDB' - s.version = '2.1.5' + s.name = 'ObjectiveLevelDBappunite' + s.version = '2.1.4' s.license = 'MIT' s.summary = 'A feature-complete wrapper for LevelDB in Objective-C.' s.description = 'This is a feature-complete wrapper for Google\'s LevelDB. LevelDB is a fast key-value store written by Google.' diff --git a/leveldb-library/db/builder.cc b/leveldb-library/db/builder.cc new file mode 100644 index 0000000..f419882 --- /dev/null +++ b/leveldb-library/db/builder.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/builder.h" + +#include "db/filename.h" +#include "db/dbformat.h" +#include "db/table_cache.h" +#include "db/version_edit.h" +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "leveldb/iterator.h" + +namespace leveldb { + +Status BuildTable(const std::string& dbname, + Env* env, + const Options& options, + TableCache* table_cache, + Iterator* iter, + FileMetaData* meta) { + Status s; + meta->file_size = 0; + iter->SeekToFirst(); + + std::string fname = TableFileName(dbname, meta->number); + if (iter->Valid()) { + WritableFile* file; + s = env->NewWritableFile(fname, &file); + if (!s.ok()) { + return s; + } + + TableBuilder* builder = new TableBuilder(options, file); + meta->smallest.DecodeFrom(iter->key()); + for (; iter->Valid(); iter->Next()) { + Slice key = iter->key(); + meta->largest.DecodeFrom(key); + builder->Add(key, iter->value()); + } + + // Finish and check for builder errors + if (s.ok()) { + s = builder->Finish(); + if (s.ok()) { + meta->file_size = builder->FileSize(); + assert(meta->file_size > 0); + } + } else { + builder->Abandon(); + } + delete builder; + + // Finish and check for file errors + if (s.ok()) { + s = file->Sync(); + } + if (s.ok()) { + s = file->Close(); + } + delete file; + file = NULL; + + if (s.ok()) { + // Verify that the table is usable + Iterator* it = table_cache->NewIterator(ReadOptions(), + meta->number, + meta->file_size); + s = it->status(); + delete it; + } + } + + // Check for input iterator errors + if (!iter->status().ok()) { + s = iter->status(); + } + + if (s.ok() && meta->file_size > 0) { + // Keep it + } else { + env->DeleteFile(fname); + } + return s; +} + +} // namespace leveldb diff --git a/leveldb-library/db/builder.h b/leveldb-library/db/builder.h new file mode 100644 index 0000000..62431fc --- /dev/null +++ b/leveldb-library/db/builder.h @@ -0,0 +1,34 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_BUILDER_H_ +#define STORAGE_LEVELDB_DB_BUILDER_H_ + +#include "leveldb/status.h" + +namespace leveldb { + +struct Options; +struct FileMetaData; + +class Env; +class Iterator; +class TableCache; +class VersionEdit; + +// Build a Table file from the contents of *iter. The generated file +// will be named according to meta->number. On success, the rest of +// *meta will be filled with metadata about the generated table. +// If no data is present in *iter, meta->file_size will be set to +// zero, and no Table file will be produced. +extern Status BuildTable(const std::string& dbname, + Env* env, + const Options& options, + TableCache* table_cache, + Iterator* iter, + FileMetaData* meta); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_BUILDER_H_ diff --git a/leveldb-library/db/c.cc b/leveldb-library/db/c.cc new file mode 100644 index 0000000..08ff0ad --- /dev/null +++ b/leveldb-library/db/c.cc @@ -0,0 +1,595 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/c.h" + +#include +#include +#include "leveldb/cache.h" +#include "leveldb/comparator.h" +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "leveldb/filter_policy.h" +#include "leveldb/iterator.h" +#include "leveldb/options.h" +#include "leveldb/status.h" +#include "leveldb/write_batch.h" + +using leveldb::Cache; +using leveldb::Comparator; +using leveldb::CompressionType; +using leveldb::DB; +using leveldb::Env; +using leveldb::FileLock; +using leveldb::FilterPolicy; +using leveldb::Iterator; +using leveldb::kMajorVersion; +using leveldb::kMinorVersion; +using leveldb::Logger; +using leveldb::NewBloomFilterPolicy; +using leveldb::NewLRUCache; +using leveldb::Options; +using leveldb::RandomAccessFile; +using leveldb::Range; +using leveldb::ReadOptions; +using leveldb::SequentialFile; +using leveldb::Slice; +using leveldb::Snapshot; +using leveldb::Status; +using leveldb::WritableFile; +using leveldb::WriteBatch; +using leveldb::WriteOptions; + +extern "C" { + +struct leveldb_t { DB* rep; }; +struct leveldb_iterator_t { Iterator* rep; }; +struct leveldb_writebatch_t { WriteBatch rep; }; +struct leveldb_snapshot_t { const Snapshot* rep; }; +struct leveldb_readoptions_t { ReadOptions rep; }; +struct leveldb_writeoptions_t { WriteOptions rep; }; +struct leveldb_options_t { Options rep; }; +struct leveldb_cache_t { Cache* rep; }; +struct leveldb_seqfile_t { SequentialFile* rep; }; +struct leveldb_randomfile_t { RandomAccessFile* rep; }; +struct leveldb_writablefile_t { WritableFile* rep; }; +struct leveldb_logger_t { Logger* rep; }; +struct leveldb_filelock_t { FileLock* rep; }; + +struct leveldb_comparator_t : public Comparator { + void* state_; + void (*destructor_)(void*); + int (*compare_)( + void*, + const char* a, size_t alen, + const char* b, size_t blen); + const char* (*name_)(void*); + + virtual ~leveldb_comparator_t() { + (*destructor_)(state_); + } + + virtual int Compare(const Slice& a, const Slice& b) const { + return (*compare_)(state_, a.data(), a.size(), b.data(), b.size()); + } + + virtual const char* Name() const { + return (*name_)(state_); + } + + // No-ops since the C binding does not support key shortening methods. + virtual void FindShortestSeparator(std::string*, const Slice&) const { } + virtual void FindShortSuccessor(std::string* key) const { } +}; + +struct leveldb_filterpolicy_t : public FilterPolicy { + void* state_; + void (*destructor_)(void*); + const char* (*name_)(void*); + char* (*create_)( + void*, + const char* const* key_array, const size_t* key_length_array, + int num_keys, + size_t* filter_length); + unsigned char (*key_match_)( + void*, + const char* key, size_t length, + const char* filter, size_t filter_length); + + virtual ~leveldb_filterpolicy_t() { + (*destructor_)(state_); + } + + virtual const char* Name() const { + return (*name_)(state_); + } + + virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { + std::vector key_pointers(n); + std::vector key_sizes(n); + for (int i = 0; i < n; i++) { + key_pointers[i] = keys[i].data(); + key_sizes[i] = keys[i].size(); + } + size_t len; + char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len); + dst->append(filter, len); + free(filter); + } + + virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { + return (*key_match_)(state_, key.data(), key.size(), + filter.data(), filter.size()); + } +}; + +struct leveldb_env_t { + Env* rep; + bool is_default; +}; + +static bool SaveError(char** errptr, const Status& s) { + assert(errptr != NULL); + if (s.ok()) { + return false; + } else if (*errptr == NULL) { + *errptr = strdup(s.ToString().c_str()); + } else { + // TODO(sanjay): Merge with existing error? + free(*errptr); + *errptr = strdup(s.ToString().c_str()); + } + return true; +} + +static char* CopyString(const std::string& str) { + char* result = reinterpret_cast(malloc(sizeof(char) * str.size())); + memcpy(result, str.data(), sizeof(char) * str.size()); + return result; +} + +leveldb_t* leveldb_open( + const leveldb_options_t* options, + const char* name, + char** errptr) { + DB* db; + if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) { + return NULL; + } + leveldb_t* result = new leveldb_t; + result->rep = db; + return result; +} + +void leveldb_close(leveldb_t* db) { + delete db->rep; + delete db; +} + +void leveldb_put( + leveldb_t* db, + const leveldb_writeoptions_t* options, + const char* key, size_t keylen, + const char* val, size_t vallen, + char** errptr) { + SaveError(errptr, + db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen))); +} + +void leveldb_delete( + leveldb_t* db, + const leveldb_writeoptions_t* options, + const char* key, size_t keylen, + char** errptr) { + SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen))); +} + + +void leveldb_write( + leveldb_t* db, + const leveldb_writeoptions_t* options, + leveldb_writebatch_t* batch, + char** errptr) { + SaveError(errptr, db->rep->Write(options->rep, &batch->rep)); +} + +char* leveldb_get( + leveldb_t* db, + const leveldb_readoptions_t* options, + const char* key, size_t keylen, + size_t* vallen, + char** errptr) { + char* result = NULL; + std::string tmp; + Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp); + if (s.ok()) { + *vallen = tmp.size(); + result = CopyString(tmp); + } else { + *vallen = 0; + if (!s.IsNotFound()) { + SaveError(errptr, s); + } + } + return result; +} + +leveldb_iterator_t* leveldb_create_iterator( + leveldb_t* db, + const leveldb_readoptions_t* options) { + leveldb_iterator_t* result = new leveldb_iterator_t; + result->rep = db->rep->NewIterator(options->rep); + return result; +} + +const leveldb_snapshot_t* leveldb_create_snapshot( + leveldb_t* db) { + leveldb_snapshot_t* result = new leveldb_snapshot_t; + result->rep = db->rep->GetSnapshot(); + return result; +} + +void leveldb_release_snapshot( + leveldb_t* db, + const leveldb_snapshot_t* snapshot) { + db->rep->ReleaseSnapshot(snapshot->rep); + delete snapshot; +} + +char* leveldb_property_value( + leveldb_t* db, + const char* propname) { + std::string tmp; + if (db->rep->GetProperty(Slice(propname), &tmp)) { + // We use strdup() since we expect human readable output. + return strdup(tmp.c_str()); + } else { + return NULL; + } +} + +void leveldb_approximate_sizes( + leveldb_t* db, + int num_ranges, + const char* const* range_start_key, const size_t* range_start_key_len, + const char* const* range_limit_key, const size_t* range_limit_key_len, + uint64_t* sizes) { + Range* ranges = new Range[num_ranges]; + for (int i = 0; i < num_ranges; i++) { + ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]); + ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]); + } + db->rep->GetApproximateSizes(ranges, num_ranges, sizes); + delete[] ranges; +} + +void leveldb_compact_range( + leveldb_t* db, + const char* start_key, size_t start_key_len, + const char* limit_key, size_t limit_key_len) { + Slice a, b; + db->rep->CompactRange( + // Pass NULL Slice if corresponding "const char*" is NULL + (start_key ? (a = Slice(start_key, start_key_len), &a) : NULL), + (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL)); +} + +void leveldb_destroy_db( + const leveldb_options_t* options, + const char* name, + char** errptr) { + SaveError(errptr, DestroyDB(name, options->rep)); +} + +void leveldb_repair_db( + const leveldb_options_t* options, + const char* name, + char** errptr) { + SaveError(errptr, RepairDB(name, options->rep)); +} + +void leveldb_iter_destroy(leveldb_iterator_t* iter) { + delete iter->rep; + delete iter; +} + +unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) { + return iter->rep->Valid(); +} + +void leveldb_iter_seek_to_first(leveldb_iterator_t* iter) { + iter->rep->SeekToFirst(); +} + +void leveldb_iter_seek_to_last(leveldb_iterator_t* iter) { + iter->rep->SeekToLast(); +} + +void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) { + iter->rep->Seek(Slice(k, klen)); +} + +void leveldb_iter_next(leveldb_iterator_t* iter) { + iter->rep->Next(); +} + +void leveldb_iter_prev(leveldb_iterator_t* iter) { + iter->rep->Prev(); +} + +const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) { + Slice s = iter->rep->key(); + *klen = s.size(); + return s.data(); +} + +const char* leveldb_iter_value(const leveldb_iterator_t* iter, size_t* vlen) { + Slice s = iter->rep->value(); + *vlen = s.size(); + return s.data(); +} + +void leveldb_iter_get_error(const leveldb_iterator_t* iter, char** errptr) { + SaveError(errptr, iter->rep->status()); +} + +leveldb_writebatch_t* leveldb_writebatch_create() { + return new leveldb_writebatch_t; +} + +void leveldb_writebatch_destroy(leveldb_writebatch_t* b) { + delete b; +} + +void leveldb_writebatch_clear(leveldb_writebatch_t* b) { + b->rep.Clear(); +} + +void leveldb_writebatch_put( + leveldb_writebatch_t* b, + const char* key, size_t klen, + const char* val, size_t vlen) { + b->rep.Put(Slice(key, klen), Slice(val, vlen)); +} + +void leveldb_writebatch_delete( + leveldb_writebatch_t* b, + const char* key, size_t klen) { + b->rep.Delete(Slice(key, klen)); +} + +void leveldb_writebatch_iterate( + leveldb_writebatch_t* b, + void* state, + void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), + void (*deleted)(void*, const char* k, size_t klen)) { + class H : public WriteBatch::Handler { + public: + void* state_; + void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen); + void (*deleted_)(void*, const char* k, size_t klen); + virtual void Put(const Slice& key, const Slice& value) { + (*put_)(state_, key.data(), key.size(), value.data(), value.size()); + } + virtual void Delete(const Slice& key) { + (*deleted_)(state_, key.data(), key.size()); + } + }; + H handler; + handler.state_ = state; + handler.put_ = put; + handler.deleted_ = deleted; + b->rep.Iterate(&handler); +} + +leveldb_options_t* leveldb_options_create() { + return new leveldb_options_t; +} + +void leveldb_options_destroy(leveldb_options_t* options) { + delete options; +} + +void leveldb_options_set_comparator( + leveldb_options_t* opt, + leveldb_comparator_t* cmp) { + opt->rep.comparator = cmp; +} + +void leveldb_options_set_filter_policy( + leveldb_options_t* opt, + leveldb_filterpolicy_t* policy) { + opt->rep.filter_policy = policy; +} + +void leveldb_options_set_create_if_missing( + leveldb_options_t* opt, unsigned char v) { + opt->rep.create_if_missing = v; +} + +void leveldb_options_set_error_if_exists( + leveldb_options_t* opt, unsigned char v) { + opt->rep.error_if_exists = v; +} + +void leveldb_options_set_paranoid_checks( + leveldb_options_t* opt, unsigned char v) { + opt->rep.paranoid_checks = v; +} + +void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) { + opt->rep.env = (env ? env->rep : NULL); +} + +void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) { + opt->rep.info_log = (l ? l->rep : NULL); +} + +void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) { + opt->rep.write_buffer_size = s; +} + +void leveldb_options_set_max_open_files(leveldb_options_t* opt, int n) { + opt->rep.max_open_files = n; +} + +void leveldb_options_set_cache(leveldb_options_t* opt, leveldb_cache_t* c) { + opt->rep.block_cache = c->rep; +} + +void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) { + opt->rep.block_size = s; +} + +void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) { + opt->rep.block_restart_interval = n; +} + +void leveldb_options_set_compression(leveldb_options_t* opt, int t) { + opt->rep.compression = static_cast(t); +} + +leveldb_comparator_t* leveldb_comparator_create( + void* state, + void (*destructor)(void*), + int (*compare)( + void*, + const char* a, size_t alen, + const char* b, size_t blen), + const char* (*name)(void*)) { + leveldb_comparator_t* result = new leveldb_comparator_t; + result->state_ = state; + result->destructor_ = destructor; + result->compare_ = compare; + result->name_ = name; + return result; +} + +void leveldb_comparator_destroy(leveldb_comparator_t* cmp) { + delete cmp; +} + +leveldb_filterpolicy_t* leveldb_filterpolicy_create( + void* state, + void (*destructor)(void*), + char* (*create_filter)( + void*, + const char* const* key_array, const size_t* key_length_array, + int num_keys, + size_t* filter_length), + unsigned char (*key_may_match)( + void*, + const char* key, size_t length, + const char* filter, size_t filter_length), + const char* (*name)(void*)) { + leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t; + result->state_ = state; + result->destructor_ = destructor; + result->create_ = create_filter; + result->key_match_ = key_may_match; + result->name_ = name; + return result; +} + +void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) { + delete filter; +} + +leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) { + // Make a leveldb_filterpolicy_t, but override all of its methods so + // they delegate to a NewBloomFilterPolicy() instead of user + // supplied C functions. + struct Wrapper : public leveldb_filterpolicy_t { + const FilterPolicy* rep_; + ~Wrapper() { delete rep_; } + const char* Name() const { return rep_->Name(); } + void CreateFilter(const Slice* keys, int n, std::string* dst) const { + return rep_->CreateFilter(keys, n, dst); + } + bool KeyMayMatch(const Slice& key, const Slice& filter) const { + return rep_->KeyMayMatch(key, filter); + } + static void DoNothing(void*) { } + }; + Wrapper* wrapper = new Wrapper; + wrapper->rep_ = NewBloomFilterPolicy(bits_per_key); + wrapper->state_ = NULL; + wrapper->destructor_ = &Wrapper::DoNothing; + return wrapper; +} + +leveldb_readoptions_t* leveldb_readoptions_create() { + return new leveldb_readoptions_t; +} + +void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) { + delete opt; +} + +void leveldb_readoptions_set_verify_checksums( + leveldb_readoptions_t* opt, + unsigned char v) { + opt->rep.verify_checksums = v; +} + +void leveldb_readoptions_set_fill_cache( + leveldb_readoptions_t* opt, unsigned char v) { + opt->rep.fill_cache = v; +} + +void leveldb_readoptions_set_snapshot( + leveldb_readoptions_t* opt, + const leveldb_snapshot_t* snap) { + opt->rep.snapshot = (snap ? snap->rep : NULL); +} + +leveldb_writeoptions_t* leveldb_writeoptions_create() { + return new leveldb_writeoptions_t; +} + +void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) { + delete opt; +} + +void leveldb_writeoptions_set_sync( + leveldb_writeoptions_t* opt, unsigned char v) { + opt->rep.sync = v; +} + +leveldb_cache_t* leveldb_cache_create_lru(size_t capacity) { + leveldb_cache_t* c = new leveldb_cache_t; + c->rep = NewLRUCache(capacity); + return c; +} + +void leveldb_cache_destroy(leveldb_cache_t* cache) { + delete cache->rep; + delete cache; +} + +leveldb_env_t* leveldb_create_default_env() { + leveldb_env_t* result = new leveldb_env_t; + result->rep = Env::Default(); + result->is_default = true; + return result; +} + +void leveldb_env_destroy(leveldb_env_t* env) { + if (!env->is_default) delete env->rep; + delete env; +} + +void leveldb_free(void* ptr) { + free(ptr); +} + +int leveldb_major_version() { + return kMajorVersion; +} + +int leveldb_minor_version() { + return kMinorVersion; +} + +} // end extern "C" diff --git a/leveldb-library/db/db_impl.cc b/leveldb-library/db/db_impl.cc new file mode 100644 index 0000000..f43ad76 --- /dev/null +++ b/leveldb-library/db/db_impl.cc @@ -0,0 +1,1568 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/db_impl.h" + +#include +#include +#include +#include +#include +#include +#include "db/builder.h" +#include "db/db_iter.h" +#include "db/dbformat.h" +#include "db/filename.h" +#include "db/log_reader.h" +#include "db/log_writer.h" +#include "db/memtable.h" +#include "db/table_cache.h" +#include "db/version_set.h" +#include "db/write_batch_internal.h" +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "leveldb/status.h" +#include "leveldb/table.h" +#include "leveldb/table_builder.h" +#include "port/port.h" +#include "table/block.h" +#include "table/merger.h" +#include "table/two_level_iterator.h" +#include "util/coding.h" +#include "util/logging.h" +#include "util/mutexlock.h" + +namespace leveldb { + +const int kNumNonTableCacheFiles = 10; + +// Information kept for every waiting writer +struct DBImpl::Writer { + Status status; + WriteBatch* batch; + bool sync; + bool done; + port::CondVar cv; + + explicit Writer(port::Mutex* mu) : cv(mu) { } +}; + +struct DBImpl::CompactionState { + Compaction* const compaction; + + // Sequence numbers < smallest_snapshot are not significant since we + // will never have to service a snapshot below smallest_snapshot. + // Therefore if we have seen a sequence number S <= smallest_snapshot, + // we can drop all entries for the same key with sequence numbers < S. + SequenceNumber smallest_snapshot; + + // Files produced by compaction + struct Output { + uint64_t number; + uint64_t file_size; + InternalKey smallest, largest; + }; + std::vector outputs; + + // State kept for output being generated + WritableFile* outfile; + TableBuilder* builder; + + uint64_t total_bytes; + + Output* current_output() { return &outputs[outputs.size()-1]; } + + explicit CompactionState(Compaction* c) + : compaction(c), + outfile(NULL), + builder(NULL), + total_bytes(0) { + } +}; + +// Fix user-supplied options to be reasonable +template +static void ClipToRange(T* ptr, V minvalue, V maxvalue) { + if (static_cast(*ptr) > maxvalue) *ptr = maxvalue; + if (static_cast(*ptr) < minvalue) *ptr = minvalue; +} +Options SanitizeOptions(const std::string& dbname, + const InternalKeyComparator* icmp, + const InternalFilterPolicy* ipolicy, + const Options& src) { + Options result = src; + result.comparator = icmp; + result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL; + ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000); + ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); + ClipToRange(&result.max_file_size, 1<<20, 1<<30); + ClipToRange(&result.block_size, 1<<10, 4<<20); + if (result.info_log == NULL) { + // Open a log file in the same directory as the db + src.env->CreateDir(dbname); // In case it does not exist + src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname)); + Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log); + if (!s.ok()) { + // No place suitable for logging + result.info_log = NULL; + } + } + if (result.block_cache == NULL) { + result.block_cache = NewLRUCache(8 << 20); + } + return result; +} + +DBImpl::DBImpl(const Options& raw_options, const std::string& dbname) + : env_(raw_options.env), + internal_comparator_(raw_options.comparator), + internal_filter_policy_(raw_options.filter_policy), + options_(SanitizeOptions(dbname, &internal_comparator_, + &internal_filter_policy_, raw_options)), + owns_info_log_(options_.info_log != raw_options.info_log), + owns_cache_(options_.block_cache != raw_options.block_cache), + dbname_(dbname), + db_lock_(NULL), + shutting_down_(NULL), + bg_cv_(&mutex_), + mem_(NULL), + imm_(NULL), + logfile_(NULL), + logfile_number_(0), + log_(NULL), + seed_(0), + tmp_batch_(new WriteBatch), + bg_compaction_scheduled_(false), + manual_compaction_(NULL) { + has_imm_.Release_Store(NULL); + + // Reserve ten files or so for other uses and give the rest to TableCache. + const int table_cache_size = options_.max_open_files - kNumNonTableCacheFiles; + table_cache_ = new TableCache(dbname_, &options_, table_cache_size); + + versions_ = new VersionSet(dbname_, &options_, table_cache_, + &internal_comparator_); +} + +DBImpl::~DBImpl() { + // Wait for background work to finish + mutex_.Lock(); + shutting_down_.Release_Store(this); // Any non-NULL value is ok + while (bg_compaction_scheduled_) { + bg_cv_.Wait(); + } + mutex_.Unlock(); + + if (db_lock_ != NULL) { + env_->UnlockFile(db_lock_); + } + + delete versions_; + if (mem_ != NULL) mem_->Unref(); + if (imm_ != NULL) imm_->Unref(); + delete tmp_batch_; + delete log_; + delete logfile_; + delete table_cache_; + + if (owns_info_log_) { + delete options_.info_log; + } + if (owns_cache_) { + delete options_.block_cache; + } +} + +Status DBImpl::NewDB() { + VersionEdit new_db; + new_db.SetComparatorName(user_comparator()->Name()); + new_db.SetLogNumber(0); + new_db.SetNextFile(2); + new_db.SetLastSequence(0); + + const std::string manifest = DescriptorFileName(dbname_, 1); + WritableFile* file; + Status s = env_->NewWritableFile(manifest, &file); + if (!s.ok()) { + return s; + } + { + log::Writer log(file); + std::string record; + new_db.EncodeTo(&record); + s = log.AddRecord(record); + if (s.ok()) { + s = file->Close(); + } + } + delete file; + if (s.ok()) { + // Make "CURRENT" file that points to the new manifest file. + s = SetCurrentFile(env_, dbname_, 1); + } else { + env_->DeleteFile(manifest); + } + return s; +} + +void DBImpl::MaybeIgnoreError(Status* s) const { + if (s->ok() || options_.paranoid_checks) { + // No change needed + } else { + Log(options_.info_log, "Ignoring error %s", s->ToString().c_str()); + *s = Status::OK(); + } +} + +void DBImpl::DeleteObsoleteFiles() { + if (!bg_error_.ok()) { + // After a background error, we don't know whether a new version may + // or may not have been committed, so we cannot safely garbage collect. + return; + } + + // Make a set of all of the live files + std::set live = pending_outputs_; + versions_->AddLiveFiles(&live); + + std::vector filenames; + env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { + bool keep = true; + switch (type) { + case kLogFile: + keep = ((number >= versions_->LogNumber()) || + (number == versions_->PrevLogNumber())); + break; + case kDescriptorFile: + // Keep my manifest file, and any newer incarnations' + // (in case there is a race that allows other incarnations) + keep = (number >= versions_->ManifestFileNumber()); + break; + case kTableFile: + keep = (live.find(number) != live.end()); + break; + case kTempFile: + // Any temp files that are currently being written to must + // be recorded in pending_outputs_, which is inserted into "live" + keep = (live.find(number) != live.end()); + break; + case kCurrentFile: + case kDBLockFile: + case kInfoLogFile: + keep = true; + break; + } + + if (!keep) { + if (type == kTableFile) { + table_cache_->Evict(number); + } + Log(options_.info_log, "Delete type=%d #%lld\n", + int(type), + static_cast(number)); + env_->DeleteFile(dbname_ + "/" + filenames[i]); + } + } + } +} + +Status DBImpl::Recover(VersionEdit* edit, bool *save_manifest) { + mutex_.AssertHeld(); + + // Ignore error from CreateDir since the creation of the DB is + // committed only when the descriptor is created, and this directory + // may already exist from a previous failed creation attempt. + env_->CreateDir(dbname_); + assert(db_lock_ == NULL); + Status s = env_->LockFile(LockFileName(dbname_), &db_lock_); + if (!s.ok()) { + return s; + } + + if (!env_->FileExists(CurrentFileName(dbname_))) { + if (options_.create_if_missing) { + s = NewDB(); + if (!s.ok()) { + return s; + } + } else { + return Status::InvalidArgument( + dbname_, "does not exist (create_if_missing is false)"); + } + } else { + if (options_.error_if_exists) { + return Status::InvalidArgument( + dbname_, "exists (error_if_exists is true)"); + } + } + + s = versions_->Recover(save_manifest); + if (!s.ok()) { + return s; + } + SequenceNumber max_sequence(0); + + // Recover from all newer log files than the ones named in the + // descriptor (new log files may have been added by the previous + // incarnation without registering them in the descriptor). + // + // Note that PrevLogNumber() is no longer used, but we pay + // attention to it in case we are recovering a database + // produced by an older version of leveldb. + const uint64_t min_log = versions_->LogNumber(); + const uint64_t prev_log = versions_->PrevLogNumber(); + std::vector filenames; + s = env_->GetChildren(dbname_, &filenames); + if (!s.ok()) { + return s; + } + std::set expected; + versions_->AddLiveFiles(&expected); + uint64_t number; + FileType type; + std::vector logs; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { + expected.erase(number); + if (type == kLogFile && ((number >= min_log) || (number == prev_log))) + logs.push_back(number); + } + } + if (!expected.empty()) { + char buf[50]; + snprintf(buf, sizeof(buf), "%d missing files; e.g.", + static_cast(expected.size())); + return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin()))); + } + + // Recover in the order in which the logs were generated + std::sort(logs.begin(), logs.end()); + for (size_t i = 0; i < logs.size(); i++) { + s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit, + &max_sequence); + if (!s.ok()) { + return s; + } + + // The previous incarnation may not have written any MANIFEST + // records after allocating this log number. So we manually + // update the file number allocation counter in VersionSet. + versions_->MarkFileNumberUsed(logs[i]); + } + + if (versions_->LastSequence() < max_sequence) { + versions_->SetLastSequence(max_sequence); + } + + return Status::OK(); +} + +Status DBImpl::RecoverLogFile(uint64_t log_number, bool last_log, + bool* save_manifest, VersionEdit* edit, + SequenceNumber* max_sequence) { + struct LogReporter : public log::Reader::Reporter { + Env* env; + Logger* info_log; + const char* fname; + Status* status; // NULL if options_.paranoid_checks==false + virtual void Corruption(size_t bytes, const Status& s) { + Log(info_log, "%s%s: dropping %d bytes; %s", + (this->status == NULL ? "(ignoring error) " : ""), + fname, static_cast(bytes), s.ToString().c_str()); + if (this->status != NULL && this->status->ok()) *this->status = s; + } + }; + + mutex_.AssertHeld(); + + // Open the log file + std::string fname = LogFileName(dbname_, log_number); + SequentialFile* file; + Status status = env_->NewSequentialFile(fname, &file); + if (!status.ok()) { + MaybeIgnoreError(&status); + return status; + } + + // Create the log reader. + LogReporter reporter; + reporter.env = env_; + reporter.info_log = options_.info_log; + reporter.fname = fname.c_str(); + reporter.status = (options_.paranoid_checks ? &status : NULL); + // We intentionally make log::Reader do checksumming even if + // paranoid_checks==false so that corruptions cause entire commits + // to be skipped instead of propagating bad information (like overly + // large sequence numbers). + log::Reader reader(file, &reporter, true/*checksum*/, + 0/*initial_offset*/); + Log(options_.info_log, "Recovering log #%llu", + (unsigned long long) log_number); + + // Read all the records and add to a memtable + std::string scratch; + Slice record; + WriteBatch batch; + int compactions = 0; + MemTable* mem = NULL; + while (reader.ReadRecord(&record, &scratch) && + status.ok()) { + if (record.size() < 12) { + reporter.Corruption( + record.size(), Status::Corruption("log record too small")); + continue; + } + WriteBatchInternal::SetContents(&batch, record); + + if (mem == NULL) { + mem = new MemTable(internal_comparator_); + mem->Ref(); + } + status = WriteBatchInternal::InsertInto(&batch, mem); + MaybeIgnoreError(&status); + if (!status.ok()) { + break; + } + const SequenceNumber last_seq = + WriteBatchInternal::Sequence(&batch) + + WriteBatchInternal::Count(&batch) - 1; + if (last_seq > *max_sequence) { + *max_sequence = last_seq; + } + + if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) { + compactions++; + *save_manifest = true; + status = WriteLevel0Table(mem, edit, NULL); + mem->Unref(); + mem = NULL; + if (!status.ok()) { + // Reflect errors immediately so that conditions like full + // file-systems cause the DB::Open() to fail. + break; + } + } + } + + delete file; + + // See if we should keep reusing the last log file. + if (status.ok() && options_.reuse_logs && last_log && compactions == 0) { + assert(logfile_ == NULL); + assert(log_ == NULL); + assert(mem_ == NULL); + uint64_t lfile_size; + if (env_->GetFileSize(fname, &lfile_size).ok() && + env_->NewAppendableFile(fname, &logfile_).ok()) { + Log(options_.info_log, "Reusing old log %s \n", fname.c_str()); + log_ = new log::Writer(logfile_, lfile_size); + logfile_number_ = log_number; + if (mem != NULL) { + mem_ = mem; + mem = NULL; + } else { + // mem can be NULL if lognum exists but was empty. + mem_ = new MemTable(internal_comparator_); + mem_->Ref(); + } + } + } + + if (mem != NULL) { + // mem did not get reused; compact it. + if (status.ok()) { + *save_manifest = true; + status = WriteLevel0Table(mem, edit, NULL); + } + mem->Unref(); + } + + return status; +} + +Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, + Version* base) { + mutex_.AssertHeld(); + const uint64_t start_micros = env_->NowMicros(); + FileMetaData meta; + meta.number = versions_->NewFileNumber(); + pending_outputs_.insert(meta.number); + Iterator* iter = mem->NewIterator(); + Log(options_.info_log, "Level-0 table #%llu: started", + (unsigned long long) meta.number); + + Status s; + { + mutex_.Unlock(); + s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta); + mutex_.Lock(); + } + + Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s", + (unsigned long long) meta.number, + (unsigned long long) meta.file_size, + s.ToString().c_str()); + delete iter; + pending_outputs_.erase(meta.number); + + + // Note that if file_size is zero, the file has been deleted and + // should not be added to the manifest. + int level = 0; + if (s.ok() && meta.file_size > 0) { + const Slice min_user_key = meta.smallest.user_key(); + const Slice max_user_key = meta.largest.user_key(); + if (base != NULL) { + level = base->PickLevelForMemTableOutput(min_user_key, max_user_key); + } + edit->AddFile(level, meta.number, meta.file_size, + meta.smallest, meta.largest); + } + + CompactionStats stats; + stats.micros = env_->NowMicros() - start_micros; + stats.bytes_written = meta.file_size; + stats_[level].Add(stats); + return s; +} + +void DBImpl::CompactMemTable() { + mutex_.AssertHeld(); + assert(imm_ != NULL); + + // Save the contents of the memtable as a new Table + VersionEdit edit; + Version* base = versions_->current(); + base->Ref(); + Status s = WriteLevel0Table(imm_, &edit, base); + base->Unref(); + + if (s.ok() && shutting_down_.Acquire_Load()) { + s = Status::IOError("Deleting DB during memtable compaction"); + } + + // Replace immutable memtable with the generated Table + if (s.ok()) { + edit.SetPrevLogNumber(0); + edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed + s = versions_->LogAndApply(&edit, &mutex_); + } + + if (s.ok()) { + // Commit to the new state + imm_->Unref(); + imm_ = NULL; + has_imm_.Release_Store(NULL); + DeleteObsoleteFiles(); + } else { + RecordBackgroundError(s); + } +} + +void DBImpl::CompactRange(const Slice* begin, const Slice* end) { + int max_level_with_files = 1; + { + MutexLock l(&mutex_); + Version* base = versions_->current(); + for (int level = 1; level < config::kNumLevels; level++) { + if (base->OverlapInLevel(level, begin, end)) { + max_level_with_files = level; + } + } + } + TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap + for (int level = 0; level < max_level_with_files; level++) { + TEST_CompactRange(level, begin, end); + } +} + +void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) { + assert(level >= 0); + assert(level + 1 < config::kNumLevels); + + InternalKey begin_storage, end_storage; + + ManualCompaction manual; + manual.level = level; + manual.done = false; + if (begin == NULL) { + manual.begin = NULL; + } else { + begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek); + manual.begin = &begin_storage; + } + if (end == NULL) { + manual.end = NULL; + } else { + end_storage = InternalKey(*end, 0, static_cast(0)); + manual.end = &end_storage; + } + + MutexLock l(&mutex_); + while (!manual.done && !shutting_down_.Acquire_Load() && bg_error_.ok()) { + if (manual_compaction_ == NULL) { // Idle + manual_compaction_ = &manual; + MaybeScheduleCompaction(); + } else { // Running either my compaction or another compaction. + bg_cv_.Wait(); + } + } + if (manual_compaction_ == &manual) { + // Cancel my manual compaction since we aborted early for some reason. + manual_compaction_ = NULL; + } +} + +Status DBImpl::TEST_CompactMemTable() { + // NULL batch means just wait for earlier writes to be done + Status s = Write(WriteOptions(), NULL); + if (s.ok()) { + // Wait until the compaction completes + MutexLock l(&mutex_); + while (imm_ != NULL && bg_error_.ok()) { + bg_cv_.Wait(); + } + if (imm_ != NULL) { + s = bg_error_; + } + } + return s; +} + +void DBImpl::RecordBackgroundError(const Status& s) { + mutex_.AssertHeld(); + if (bg_error_.ok()) { + bg_error_ = s; + bg_cv_.SignalAll(); + } +} + +void DBImpl::MaybeScheduleCompaction() { + mutex_.AssertHeld(); + if (bg_compaction_scheduled_) { + // Already scheduled + } else if (shutting_down_.Acquire_Load()) { + // DB is being deleted; no more background compactions + } else if (!bg_error_.ok()) { + // Already got an error; no more changes + } else if (imm_ == NULL && + manual_compaction_ == NULL && + !versions_->NeedsCompaction()) { + // No work to be done + } else { + bg_compaction_scheduled_ = true; + env_->Schedule(&DBImpl::BGWork, this); + } +} + +void DBImpl::BGWork(void* db) { + reinterpret_cast(db)->BackgroundCall(); +} + +void DBImpl::BackgroundCall() { + MutexLock l(&mutex_); + assert(bg_compaction_scheduled_); + if (shutting_down_.Acquire_Load()) { + // No more background work when shutting down. + } else if (!bg_error_.ok()) { + // No more background work after a background error. + } else { + BackgroundCompaction(); + } + + bg_compaction_scheduled_ = false; + + // Previous compaction may have produced too many files in a level, + // so reschedule another compaction if needed. + MaybeScheduleCompaction(); + bg_cv_.SignalAll(); +} + +void DBImpl::BackgroundCompaction() { + mutex_.AssertHeld(); + + if (imm_ != NULL) { + CompactMemTable(); + return; + } + + Compaction* c; + bool is_manual = (manual_compaction_ != NULL); + InternalKey manual_end; + if (is_manual) { + ManualCompaction* m = manual_compaction_; + c = versions_->CompactRange(m->level, m->begin, m->end); + m->done = (c == NULL); + if (c != NULL) { + manual_end = c->input(0, c->num_input_files(0) - 1)->largest; + } + Log(options_.info_log, + "Manual compaction at level-%d from %s .. %s; will stop at %s\n", + m->level, + (m->begin ? m->begin->DebugString().c_str() : "(begin)"), + (m->end ? m->end->DebugString().c_str() : "(end)"), + (m->done ? "(end)" : manual_end.DebugString().c_str())); + } else { + c = versions_->PickCompaction(); + } + + Status status; + if (c == NULL) { + // Nothing to do + } else if (!is_manual && c->IsTrivialMove()) { + // Move file to next level + assert(c->num_input_files(0) == 1); + FileMetaData* f = c->input(0, 0); + c->edit()->DeleteFile(c->level(), f->number); + c->edit()->AddFile(c->level() + 1, f->number, f->file_size, + f->smallest, f->largest); + status = versions_->LogAndApply(c->edit(), &mutex_); + if (!status.ok()) { + RecordBackgroundError(status); + } + VersionSet::LevelSummaryStorage tmp; + Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n", + static_cast(f->number), + c->level() + 1, + static_cast(f->file_size), + status.ToString().c_str(), + versions_->LevelSummary(&tmp)); + } else { + CompactionState* compact = new CompactionState(c); + status = DoCompactionWork(compact); + if (!status.ok()) { + RecordBackgroundError(status); + } + CleanupCompaction(compact); + c->ReleaseInputs(); + DeleteObsoleteFiles(); + } + delete c; + + if (status.ok()) { + // Done + } else if (shutting_down_.Acquire_Load()) { + // Ignore compaction errors found during shutting down + } else { + Log(options_.info_log, + "Compaction error: %s", status.ToString().c_str()); + } + + if (is_manual) { + ManualCompaction* m = manual_compaction_; + if (!status.ok()) { + m->done = true; + } + if (!m->done) { + // We only compacted part of the requested range. Update *m + // to the range that is left to be compacted. + m->tmp_storage = manual_end; + m->begin = &m->tmp_storage; + } + manual_compaction_ = NULL; + } +} + +void DBImpl::CleanupCompaction(CompactionState* compact) { + mutex_.AssertHeld(); + if (compact->builder != NULL) { + // May happen if we get a shutdown call in the middle of compaction + compact->builder->Abandon(); + delete compact->builder; + } else { + assert(compact->outfile == NULL); + } + delete compact->outfile; + for (size_t i = 0; i < compact->outputs.size(); i++) { + const CompactionState::Output& out = compact->outputs[i]; + pending_outputs_.erase(out.number); + } + delete compact; +} + +Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { + assert(compact != NULL); + assert(compact->builder == NULL); + uint64_t file_number; + { + mutex_.Lock(); + file_number = versions_->NewFileNumber(); + pending_outputs_.insert(file_number); + CompactionState::Output out; + out.number = file_number; + out.smallest.Clear(); + out.largest.Clear(); + compact->outputs.push_back(out); + mutex_.Unlock(); + } + + // Make the output file + std::string fname = TableFileName(dbname_, file_number); + Status s = env_->NewWritableFile(fname, &compact->outfile); + if (s.ok()) { + compact->builder = new TableBuilder(options_, compact->outfile); + } + return s; +} + +Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, + Iterator* input) { + assert(compact != NULL); + assert(compact->outfile != NULL); + assert(compact->builder != NULL); + + const uint64_t output_number = compact->current_output()->number; + assert(output_number != 0); + + // Check for iterator errors + Status s = input->status(); + const uint64_t current_entries = compact->builder->NumEntries(); + if (s.ok()) { + s = compact->builder->Finish(); + } else { + compact->builder->Abandon(); + } + const uint64_t current_bytes = compact->builder->FileSize(); + compact->current_output()->file_size = current_bytes; + compact->total_bytes += current_bytes; + delete compact->builder; + compact->builder = NULL; + + // Finish and check for file errors + if (s.ok()) { + s = compact->outfile->Sync(); + } + if (s.ok()) { + s = compact->outfile->Close(); + } + delete compact->outfile; + compact->outfile = NULL; + + if (s.ok() && current_entries > 0) { + // Verify that the table is usable + Iterator* iter = table_cache_->NewIterator(ReadOptions(), + output_number, + current_bytes); + s = iter->status(); + delete iter; + if (s.ok()) { + Log(options_.info_log, + "Generated table #%llu@%d: %lld keys, %lld bytes", + (unsigned long long) output_number, + compact->compaction->level(), + (unsigned long long) current_entries, + (unsigned long long) current_bytes); + } + } + return s; +} + + +Status DBImpl::InstallCompactionResults(CompactionState* compact) { + mutex_.AssertHeld(); + Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes", + compact->compaction->num_input_files(0), + compact->compaction->level(), + compact->compaction->num_input_files(1), + compact->compaction->level() + 1, + static_cast(compact->total_bytes)); + + // Add compaction outputs + compact->compaction->AddInputDeletions(compact->compaction->edit()); + const int level = compact->compaction->level(); + for (size_t i = 0; i < compact->outputs.size(); i++) { + const CompactionState::Output& out = compact->outputs[i]; + compact->compaction->edit()->AddFile( + level + 1, + out.number, out.file_size, out.smallest, out.largest); + } + return versions_->LogAndApply(compact->compaction->edit(), &mutex_); +} + +Status DBImpl::DoCompactionWork(CompactionState* compact) { + const uint64_t start_micros = env_->NowMicros(); + int64_t imm_micros = 0; // Micros spent doing imm_ compactions + + Log(options_.info_log, "Compacting %d@%d + %d@%d files", + compact->compaction->num_input_files(0), + compact->compaction->level(), + compact->compaction->num_input_files(1), + compact->compaction->level() + 1); + + assert(versions_->NumLevelFiles(compact->compaction->level()) > 0); + assert(compact->builder == NULL); + assert(compact->outfile == NULL); + if (snapshots_.empty()) { + compact->smallest_snapshot = versions_->LastSequence(); + } else { + compact->smallest_snapshot = snapshots_.oldest()->number_; + } + + // Release mutex while we're actually doing the compaction work + mutex_.Unlock(); + + Iterator* input = versions_->MakeInputIterator(compact->compaction); + input->SeekToFirst(); + Status status; + ParsedInternalKey ikey; + std::string current_user_key; + bool has_current_user_key = false; + SequenceNumber last_sequence_for_key = kMaxSequenceNumber; + for (; input->Valid() && !shutting_down_.Acquire_Load(); ) { + // Prioritize immutable compaction work + if (has_imm_.NoBarrier_Load() != NULL) { + const uint64_t imm_start = env_->NowMicros(); + mutex_.Lock(); + if (imm_ != NULL) { + CompactMemTable(); + bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary + } + mutex_.Unlock(); + imm_micros += (env_->NowMicros() - imm_start); + } + + Slice key = input->key(); + if (compact->compaction->ShouldStopBefore(key) && + compact->builder != NULL) { + status = FinishCompactionOutputFile(compact, input); + if (!status.ok()) { + break; + } + } + + // Handle key/value, add to state, etc. + bool drop = false; + if (!ParseInternalKey(key, &ikey)) { + // Do not hide error keys + current_user_key.clear(); + has_current_user_key = false; + last_sequence_for_key = kMaxSequenceNumber; + } else { + if (!has_current_user_key || + user_comparator()->Compare(ikey.user_key, + Slice(current_user_key)) != 0) { + // First occurrence of this user key + current_user_key.assign(ikey.user_key.data(), ikey.user_key.size()); + has_current_user_key = true; + last_sequence_for_key = kMaxSequenceNumber; + } + + if (last_sequence_for_key <= compact->smallest_snapshot) { + // Hidden by an newer entry for same user key + drop = true; // (A) + } else if (ikey.type == kTypeDeletion && + ikey.sequence <= compact->smallest_snapshot && + compact->compaction->IsBaseLevelForKey(ikey.user_key)) { + // For this user key: + // (1) there is no data in higher levels + // (2) data in lower levels will have larger sequence numbers + // (3) data in layers that are being compacted here and have + // smaller sequence numbers will be dropped in the next + // few iterations of this loop (by rule (A) above). + // Therefore this deletion marker is obsolete and can be dropped. + drop = true; + } + + last_sequence_for_key = ikey.sequence; + } +#if 0 + Log(options_.info_log, + " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, " + "%d smallest_snapshot: %d", + ikey.user_key.ToString().c_str(), + (int)ikey.sequence, ikey.type, kTypeValue, drop, + compact->compaction->IsBaseLevelForKey(ikey.user_key), + (int)last_sequence_for_key, (int)compact->smallest_snapshot); +#endif + + if (!drop) { + // Open output file if necessary + if (compact->builder == NULL) { + status = OpenCompactionOutputFile(compact); + if (!status.ok()) { + break; + } + } + if (compact->builder->NumEntries() == 0) { + compact->current_output()->smallest.DecodeFrom(key); + } + compact->current_output()->largest.DecodeFrom(key); + compact->builder->Add(key, input->value()); + + // Close output file if it is big enough + if (compact->builder->FileSize() >= + compact->compaction->MaxOutputFileSize()) { + status = FinishCompactionOutputFile(compact, input); + if (!status.ok()) { + break; + } + } + } + + input->Next(); + } + + if (status.ok() && shutting_down_.Acquire_Load()) { + status = Status::IOError("Deleting DB during compaction"); + } + if (status.ok() && compact->builder != NULL) { + status = FinishCompactionOutputFile(compact, input); + } + if (status.ok()) { + status = input->status(); + } + delete input; + input = NULL; + + CompactionStats stats; + stats.micros = env_->NowMicros() - start_micros - imm_micros; + for (int which = 0; which < 2; which++) { + for (int i = 0; i < compact->compaction->num_input_files(which); i++) { + stats.bytes_read += compact->compaction->input(which, i)->file_size; + } + } + for (size_t i = 0; i < compact->outputs.size(); i++) { + stats.bytes_written += compact->outputs[i].file_size; + } + + mutex_.Lock(); + stats_[compact->compaction->level() + 1].Add(stats); + + if (status.ok()) { + status = InstallCompactionResults(compact); + } + if (!status.ok()) { + RecordBackgroundError(status); + } + VersionSet::LevelSummaryStorage tmp; + Log(options_.info_log, + "compacted to: %s", versions_->LevelSummary(&tmp)); + return status; +} + +namespace { +struct IterState { + port::Mutex* mu; + Version* version; + MemTable* mem; + MemTable* imm; +}; + +static void CleanupIteratorState(void* arg1, void* arg2) { + IterState* state = reinterpret_cast(arg1); + state->mu->Lock(); + state->mem->Unref(); + if (state->imm != NULL) state->imm->Unref(); + state->version->Unref(); + state->mu->Unlock(); + delete state; +} +} // namespace + +Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, + SequenceNumber* latest_snapshot, + uint32_t* seed) { + IterState* cleanup = new IterState; + mutex_.Lock(); + *latest_snapshot = versions_->LastSequence(); + + // Collect together all needed child iterators + std::vector list; + list.push_back(mem_->NewIterator()); + mem_->Ref(); + if (imm_ != NULL) { + list.push_back(imm_->NewIterator()); + imm_->Ref(); + } + versions_->current()->AddIterators(options, &list); + Iterator* internal_iter = + NewMergingIterator(&internal_comparator_, &list[0], list.size()); + versions_->current()->Ref(); + + cleanup->mu = &mutex_; + cleanup->mem = mem_; + cleanup->imm = imm_; + cleanup->version = versions_->current(); + internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL); + + *seed = ++seed_; + mutex_.Unlock(); + return internal_iter; +} + +Iterator* DBImpl::TEST_NewInternalIterator() { + SequenceNumber ignored; + uint32_t ignored_seed; + return NewInternalIterator(ReadOptions(), &ignored, &ignored_seed); +} + +int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() { + MutexLock l(&mutex_); + return versions_->MaxNextLevelOverlappingBytes(); +} + +Status DBImpl::Get(const ReadOptions& options, + const Slice& key, + std::string* value) { + Status s; + MutexLock l(&mutex_); + SequenceNumber snapshot; + if (options.snapshot != NULL) { + snapshot = reinterpret_cast(options.snapshot)->number_; + } else { + snapshot = versions_->LastSequence(); + } + + MemTable* mem = mem_; + MemTable* imm = imm_; + Version* current = versions_->current(); + mem->Ref(); + if (imm != NULL) imm->Ref(); + current->Ref(); + + bool have_stat_update = false; + Version::GetStats stats; + + // Unlock while reading from files and memtables + { + mutex_.Unlock(); + // First look in the memtable, then in the immutable memtable (if any). + LookupKey lkey(key, snapshot); + if (mem->Get(lkey, value, &s)) { + // Done + } else if (imm != NULL && imm->Get(lkey, value, &s)) { + // Done + } else { + s = current->Get(options, lkey, value, &stats); + have_stat_update = true; + } + mutex_.Lock(); + } + + if (have_stat_update && current->UpdateStats(stats)) { + MaybeScheduleCompaction(); + } + mem->Unref(); + if (imm != NULL) imm->Unref(); + current->Unref(); + return s; +} + +Iterator* DBImpl::NewIterator(const ReadOptions& options) { + SequenceNumber latest_snapshot; + uint32_t seed; + Iterator* iter = NewInternalIterator(options, &latest_snapshot, &seed); + return NewDBIterator( + this, user_comparator(), iter, + (options.snapshot != NULL + ? reinterpret_cast(options.snapshot)->number_ + : latest_snapshot), + seed); +} + +void DBImpl::RecordReadSample(Slice key) { + MutexLock l(&mutex_); + if (versions_->current()->RecordReadSample(key)) { + MaybeScheduleCompaction(); + } +} + +const Snapshot* DBImpl::GetSnapshot() { + MutexLock l(&mutex_); + return snapshots_.New(versions_->LastSequence()); +} + +void DBImpl::ReleaseSnapshot(const Snapshot* s) { + MutexLock l(&mutex_); + snapshots_.Delete(reinterpret_cast(s)); +} + +// Convenience methods +Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) { + return DB::Put(o, key, val); +} + +Status DBImpl::Delete(const WriteOptions& options, const Slice& key) { + return DB::Delete(options, key); +} + +Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { + Writer w(&mutex_); + w.batch = my_batch; + w.sync = options.sync; + w.done = false; + + MutexLock l(&mutex_); + writers_.push_back(&w); + while (!w.done && &w != writers_.front()) { + w.cv.Wait(); + } + if (w.done) { + return w.status; + } + + // May temporarily unlock and wait. + Status status = MakeRoomForWrite(my_batch == NULL); + uint64_t last_sequence = versions_->LastSequence(); + Writer* last_writer = &w; + if (status.ok() && my_batch != NULL) { // NULL batch is for compactions + WriteBatch* updates = BuildBatchGroup(&last_writer); + WriteBatchInternal::SetSequence(updates, last_sequence + 1); + last_sequence += WriteBatchInternal::Count(updates); + + // Add to log and apply to memtable. We can release the lock + // during this phase since &w is currently responsible for logging + // and protects against concurrent loggers and concurrent writes + // into mem_. + { + mutex_.Unlock(); + status = log_->AddRecord(WriteBatchInternal::Contents(updates)); + bool sync_error = false; + if (status.ok() && options.sync) { + status = logfile_->Sync(); + if (!status.ok()) { + sync_error = true; + } + } + if (status.ok()) { + status = WriteBatchInternal::InsertInto(updates, mem_); + } + mutex_.Lock(); + if (sync_error) { + // The state of the log file is indeterminate: the log record we + // just added may or may not show up when the DB is re-opened. + // So we force the DB into a mode where all future writes fail. + RecordBackgroundError(status); + } + } + if (updates == tmp_batch_) tmp_batch_->Clear(); + + versions_->SetLastSequence(last_sequence); + } + + while (true) { + Writer* ready = writers_.front(); + writers_.pop_front(); + if (ready != &w) { + ready->status = status; + ready->done = true; + ready->cv.Signal(); + } + if (ready == last_writer) break; + } + + // Notify new head of write queue + if (!writers_.empty()) { + writers_.front()->cv.Signal(); + } + + return status; +} + +// REQUIRES: Writer list must be non-empty +// REQUIRES: First writer must have a non-NULL batch +WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) { + assert(!writers_.empty()); + Writer* first = writers_.front(); + WriteBatch* result = first->batch; + assert(result != NULL); + + size_t size = WriteBatchInternal::ByteSize(first->batch); + + // Allow the group to grow up to a maximum size, but if the + // original write is small, limit the growth so we do not slow + // down the small write too much. + size_t max_size = 1 << 20; + if (size <= (128<<10)) { + max_size = size + (128<<10); + } + + *last_writer = first; + std::deque::iterator iter = writers_.begin(); + ++iter; // Advance past "first" + for (; iter != writers_.end(); ++iter) { + Writer* w = *iter; + if (w->sync && !first->sync) { + // Do not include a sync write into a batch handled by a non-sync write. + break; + } + + if (w->batch != NULL) { + size += WriteBatchInternal::ByteSize(w->batch); + if (size > max_size) { + // Do not make batch too big + break; + } + + // Append to *result + if (result == first->batch) { + // Switch to temporary batch instead of disturbing caller's batch + result = tmp_batch_; + assert(WriteBatchInternal::Count(result) == 0); + WriteBatchInternal::Append(result, first->batch); + } + WriteBatchInternal::Append(result, w->batch); + } + *last_writer = w; + } + return result; +} + +// REQUIRES: mutex_ is held +// REQUIRES: this thread is currently at the front of the writer queue +Status DBImpl::MakeRoomForWrite(bool force) { + mutex_.AssertHeld(); + assert(!writers_.empty()); + bool allow_delay = !force; + Status s; + while (true) { + if (!bg_error_.ok()) { + // Yield previous error + s = bg_error_; + break; + } else if ( + allow_delay && + versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) { + // We are getting close to hitting a hard limit on the number of + // L0 files. Rather than delaying a single write by several + // seconds when we hit the hard limit, start delaying each + // individual write by 1ms to reduce latency variance. Also, + // this delay hands over some CPU to the compaction thread in + // case it is sharing the same core as the writer. + mutex_.Unlock(); + env_->SleepForMicroseconds(1000); + allow_delay = false; // Do not delay a single write more than once + mutex_.Lock(); + } else if (!force && + (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) { + // There is room in current memtable + break; + } else if (imm_ != NULL) { + // We have filled up the current memtable, but the previous + // one is still being compacted, so we wait. + Log(options_.info_log, "Current memtable full; waiting...\n"); + bg_cv_.Wait(); + } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) { + // There are too many level-0 files. + Log(options_.info_log, "Too many L0 files; waiting...\n"); + bg_cv_.Wait(); + } else { + // Attempt to switch to a new memtable and trigger compaction of old + assert(versions_->PrevLogNumber() == 0); + uint64_t new_log_number = versions_->NewFileNumber(); + WritableFile* lfile = NULL; + s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile); + if (!s.ok()) { + // Avoid chewing through file number space in a tight loop. + versions_->ReuseFileNumber(new_log_number); + break; + } + delete log_; + delete logfile_; + logfile_ = lfile; + logfile_number_ = new_log_number; + log_ = new log::Writer(lfile); + imm_ = mem_; + has_imm_.Release_Store(imm_); + mem_ = new MemTable(internal_comparator_); + mem_->Ref(); + force = false; // Do not force another compaction if have room + MaybeScheduleCompaction(); + } + } + return s; +} + +bool DBImpl::GetProperty(const Slice& property, std::string* value) { + value->clear(); + + MutexLock l(&mutex_); + Slice in = property; + Slice prefix("leveldb."); + if (!in.starts_with(prefix)) return false; + in.remove_prefix(prefix.size()); + + if (in.starts_with("num-files-at-level")) { + in.remove_prefix(strlen("num-files-at-level")); + uint64_t level; + bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); + if (!ok || level >= config::kNumLevels) { + return false; + } else { + char buf[100]; + snprintf(buf, sizeof(buf), "%d", + versions_->NumLevelFiles(static_cast(level))); + *value = buf; + return true; + } + } else if (in == "stats") { + char buf[200]; + snprintf(buf, sizeof(buf), + " Compactions\n" + "Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n" + "--------------------------------------------------\n" + ); + value->append(buf); + for (int level = 0; level < config::kNumLevels; level++) { + int files = versions_->NumLevelFiles(level); + if (stats_[level].micros > 0 || files > 0) { + snprintf( + buf, sizeof(buf), + "%3d %8d %8.0f %9.0f %8.0f %9.0f\n", + level, + files, + versions_->NumLevelBytes(level) / 1048576.0, + stats_[level].micros / 1e6, + stats_[level].bytes_read / 1048576.0, + stats_[level].bytes_written / 1048576.0); + value->append(buf); + } + } + return true; + } else if (in == "sstables") { + *value = versions_->current()->DebugString(); + return true; + } else if (in == "approximate-memory-usage") { + size_t total_usage = options_.block_cache->TotalCharge(); + if (mem_) { + total_usage += mem_->ApproximateMemoryUsage(); + } + if (imm_) { + total_usage += imm_->ApproximateMemoryUsage(); + } + char buf[50]; + snprintf(buf, sizeof(buf), "%llu", + static_cast(total_usage)); + value->append(buf); + return true; + } + + return false; +} + +void DBImpl::GetApproximateSizes( + const Range* range, int n, + uint64_t* sizes) { + // TODO(opt): better implementation + Version* v; + { + MutexLock l(&mutex_); + versions_->current()->Ref(); + v = versions_->current(); + } + + for (int i = 0; i < n; i++) { + // Convert user_key into a corresponding internal key. + InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); + uint64_t start = versions_->ApproximateOffsetOf(v, k1); + uint64_t limit = versions_->ApproximateOffsetOf(v, k2); + sizes[i] = (limit >= start ? limit - start : 0); + } + + { + MutexLock l(&mutex_); + v->Unref(); + } +} + +// Default implementations of convenience methods that subclasses of DB +// can call if they wish +Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { + WriteBatch batch; + batch.Put(key, value); + return Write(opt, &batch); +} + +Status DB::Delete(const WriteOptions& opt, const Slice& key) { + WriteBatch batch; + batch.Delete(key); + return Write(opt, &batch); +} + +DB::~DB() { } + +Status DB::Open(const Options& options, const std::string& dbname, + DB** dbptr) { + *dbptr = NULL; + + DBImpl* impl = new DBImpl(options, dbname); + impl->mutex_.Lock(); + VersionEdit edit; + // Recover handles create_if_missing, error_if_exists + bool save_manifest = false; + Status s = impl->Recover(&edit, &save_manifest); + if (s.ok() && impl->mem_ == NULL) { + // Create new log and a corresponding memtable. + uint64_t new_log_number = impl->versions_->NewFileNumber(); + WritableFile* lfile; + s = options.env->NewWritableFile(LogFileName(dbname, new_log_number), + &lfile); + if (s.ok()) { + edit.SetLogNumber(new_log_number); + impl->logfile_ = lfile; + impl->logfile_number_ = new_log_number; + impl->log_ = new log::Writer(lfile); + impl->mem_ = new MemTable(impl->internal_comparator_); + impl->mem_->Ref(); + } + } + if (s.ok() && save_manifest) { + edit.SetPrevLogNumber(0); // No older logs needed after recovery. + edit.SetLogNumber(impl->logfile_number_); + s = impl->versions_->LogAndApply(&edit, &impl->mutex_); + } + if (s.ok()) { + impl->DeleteObsoleteFiles(); + impl->MaybeScheduleCompaction(); + } + impl->mutex_.Unlock(); + if (s.ok()) { + assert(impl->mem_ != NULL); + *dbptr = impl; + } else { + delete impl; + } + return s; +} + +Snapshot::~Snapshot() { +} + +Status DestroyDB(const std::string& dbname, const Options& options) { + Env* env = options.env; + std::vector filenames; + // Ignore error in case directory does not exist + env->GetChildren(dbname, &filenames); + if (filenames.empty()) { + return Status::OK(); + } + + FileLock* lock; + const std::string lockname = LockFileName(dbname); + Status result = env->LockFile(lockname, &lock); + if (result.ok()) { + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type) && + type != kDBLockFile) { // Lock file will be deleted at end + Status del = env->DeleteFile(dbname + "/" + filenames[i]); + if (result.ok() && !del.ok()) { + result = del; + } + } + } + env->UnlockFile(lock); // Ignore error since state is already gone + env->DeleteFile(lockname); + env->DeleteDir(dbname); // Ignore error in case dir contains other files + } + return result; +} + +} // namespace leveldb diff --git a/leveldb-library/db/db_impl.h b/leveldb-library/db/db_impl.h new file mode 100644 index 0000000..8ff323e --- /dev/null +++ b/leveldb-library/db/db_impl.h @@ -0,0 +1,211 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ +#define STORAGE_LEVELDB_DB_DB_IMPL_H_ + +#include +#include +#include "db/dbformat.h" +#include "db/log_writer.h" +#include "db/snapshot.h" +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "port/port.h" +#include "port/thread_annotations.h" + +namespace leveldb { + +class MemTable; +class TableCache; +class Version; +class VersionEdit; +class VersionSet; + +class DBImpl : public DB { + public: + DBImpl(const Options& options, const std::string& dbname); + virtual ~DBImpl(); + + // Implementations of the DB interface + virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); + virtual Status Delete(const WriteOptions&, const Slice& key); + virtual Status Write(const WriteOptions& options, WriteBatch* updates); + virtual Status Get(const ReadOptions& options, + const Slice& key, + std::string* value); + virtual Iterator* NewIterator(const ReadOptions&); + virtual const Snapshot* GetSnapshot(); + virtual void ReleaseSnapshot(const Snapshot* snapshot); + virtual bool GetProperty(const Slice& property, std::string* value); + virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); + virtual void CompactRange(const Slice* begin, const Slice* end); + + // Extra methods (for testing) that are not in the public DB interface + + // Compact any files in the named level that overlap [*begin,*end] + void TEST_CompactRange(int level, const Slice* begin, const Slice* end); + + // Force current memtable contents to be compacted. + Status TEST_CompactMemTable(); + + // Return an internal iterator over the current state of the database. + // The keys of this iterator are internal keys (see format.h). + // The returned iterator should be deleted when no longer needed. + Iterator* TEST_NewInternalIterator(); + + // Return the maximum overlapping data (in bytes) at next level for any + // file at a level >= 1. + int64_t TEST_MaxNextLevelOverlappingBytes(); + + // Record a sample of bytes read at the specified internal key. + // Samples are taken approximately once every config::kReadBytesPeriod + // bytes. + void RecordReadSample(Slice key); + + private: + friend class DB; + struct CompactionState; + struct Writer; + + Iterator* NewInternalIterator(const ReadOptions&, + SequenceNumber* latest_snapshot, + uint32_t* seed); + + Status NewDB(); + + // Recover the descriptor from persistent storage. May do a significant + // amount of work to recover recently logged updates. Any changes to + // be made to the descriptor are added to *edit. + Status Recover(VersionEdit* edit, bool* save_manifest) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + void MaybeIgnoreError(Status* s) const; + + // Delete any unneeded files and stale in-memory entries. + void DeleteObsoleteFiles(); + + // Compact the in-memory write buffer to disk. Switches to a new + // log-file/memtable and writes a new descriptor iff successful. + // Errors are recorded in bg_error_. + void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest, + VersionEdit* edit, SequenceNumber* max_sequence) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Status MakeRoomForWrite(bool force /* compact even if there is room? */) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + WriteBatch* BuildBatchGroup(Writer** last_writer); + + void RecordBackgroundError(const Status& s); + + void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); + static void BGWork(void* db); + void BackgroundCall(); + void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void CleanupCompaction(CompactionState* compact) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + Status DoCompactionWork(CompactionState* compact) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Status OpenCompactionOutputFile(CompactionState* compact); + Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); + Status InstallCompactionResults(CompactionState* compact) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Constant after construction + Env* const env_; + const InternalKeyComparator internal_comparator_; + const InternalFilterPolicy internal_filter_policy_; + const Options options_; // options_.comparator == &internal_comparator_ + bool owns_info_log_; + bool owns_cache_; + const std::string dbname_; + + // table_cache_ provides its own synchronization + TableCache* table_cache_; + + // Lock over the persistent DB state. Non-NULL iff successfully acquired. + FileLock* db_lock_; + + // State below is protected by mutex_ + port::Mutex mutex_; + port::AtomicPointer shutting_down_; + port::CondVar bg_cv_; // Signalled when background work finishes + MemTable* mem_; + MemTable* imm_; // Memtable being compacted + port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_ + WritableFile* logfile_; + uint64_t logfile_number_; + log::Writer* log_; + uint32_t seed_; // For sampling. + + // Queue of writers. + std::deque writers_; + WriteBatch* tmp_batch_; + + SnapshotList snapshots_; + + // Set of table files to protect from deletion because they are + // part of ongoing compactions. + std::set pending_outputs_; + + // Has a background compaction been scheduled or is running? + bool bg_compaction_scheduled_; + + // Information for a manual compaction + struct ManualCompaction { + int level; + bool done; + const InternalKey* begin; // NULL means beginning of key range + const InternalKey* end; // NULL means end of key range + InternalKey tmp_storage; // Used to keep track of compaction progress + }; + ManualCompaction* manual_compaction_; + + VersionSet* versions_; + + // Have we encountered a background error in paranoid mode? + Status bg_error_; + + // Per level compaction stats. stats_[level] stores the stats for + // compactions that produced data for the specified "level". + struct CompactionStats { + int64_t micros; + int64_t bytes_read; + int64_t bytes_written; + + CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { } + + void Add(const CompactionStats& c) { + this->micros += c.micros; + this->bytes_read += c.bytes_read; + this->bytes_written += c.bytes_written; + } + }; + CompactionStats stats_[config::kNumLevels]; + + // No copying allowed + DBImpl(const DBImpl&); + void operator=(const DBImpl&); + + const Comparator* user_comparator() const { + return internal_comparator_.user_comparator(); + } +}; + +// Sanitize db options. The caller should delete result.info_log if +// it is not equal to src.info_log. +extern Options SanitizeOptions(const std::string& db, + const InternalKeyComparator* icmp, + const InternalFilterPolicy* ipolicy, + const Options& src); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ diff --git a/leveldb-library/db/db_iter.cc b/leveldb-library/db/db_iter.cc new file mode 100644 index 0000000..3b2035e --- /dev/null +++ b/leveldb-library/db/db_iter.cc @@ -0,0 +1,317 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/db_iter.h" + +#include "db/filename.h" +#include "db/db_impl.h" +#include "db/dbformat.h" +#include "leveldb/env.h" +#include "leveldb/iterator.h" +#include "port/port.h" +#include "util/logging.h" +#include "util/mutexlock.h" +#include "util/random.h" + +namespace leveldb { + +#if 0 +static void DumpInternalIter(Iterator* iter) { + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedInternalKey k; + if (!ParseInternalKey(iter->key(), &k)) { + fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str()); + } else { + fprintf(stderr, "@ '%s'\n", k.DebugString().c_str()); + } + } +} +#endif + +namespace { + +// Memtables and sstables that make the DB representation contain +// (userkey,seq,type) => uservalue entries. DBIter +// combines multiple entries for the same userkey found in the DB +// representation into a single entry while accounting for sequence +// numbers, deletion markers, overwrites, etc. +class DBIter: public Iterator { + public: + // Which direction is the iterator currently moving? + // (1) When moving forward, the internal iterator is positioned at + // the exact entry that yields this->key(), this->value() + // (2) When moving backwards, the internal iterator is positioned + // just before all entries whose user key == this->key(). + enum Direction { + kForward, + kReverse + }; + + DBIter(DBImpl* db, const Comparator* cmp, Iterator* iter, SequenceNumber s, + uint32_t seed) + : db_(db), + user_comparator_(cmp), + iter_(iter), + sequence_(s), + direction_(kForward), + valid_(false), + rnd_(seed), + bytes_counter_(RandomPeriod()) { + } + virtual ~DBIter() { + delete iter_; + } + virtual bool Valid() const { return valid_; } + virtual Slice key() const { + assert(valid_); + return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_; + } + virtual Slice value() const { + assert(valid_); + return (direction_ == kForward) ? iter_->value() : saved_value_; + } + virtual Status status() const { + if (status_.ok()) { + return iter_->status(); + } else { + return status_; + } + } + + virtual void Next(); + virtual void Prev(); + virtual void Seek(const Slice& target); + virtual void SeekToFirst(); + virtual void SeekToLast(); + + private: + void FindNextUserEntry(bool skipping, std::string* skip); + void FindPrevUserEntry(); + bool ParseKey(ParsedInternalKey* key); + + inline void SaveKey(const Slice& k, std::string* dst) { + dst->assign(k.data(), k.size()); + } + + inline void ClearSavedValue() { + if (saved_value_.capacity() > 1048576) { + std::string empty; + swap(empty, saved_value_); + } else { + saved_value_.clear(); + } + } + + // Pick next gap with average value of config::kReadBytesPeriod. + ssize_t RandomPeriod() { + return rnd_.Uniform(2*config::kReadBytesPeriod); + } + + DBImpl* db_; + const Comparator* const user_comparator_; + Iterator* const iter_; + SequenceNumber const sequence_; + + Status status_; + std::string saved_key_; // == current key when direction_==kReverse + std::string saved_value_; // == current raw value when direction_==kReverse + Direction direction_; + bool valid_; + + Random rnd_; + ssize_t bytes_counter_; + + // No copying allowed + DBIter(const DBIter&); + void operator=(const DBIter&); +}; + +inline bool DBIter::ParseKey(ParsedInternalKey* ikey) { + Slice k = iter_->key(); + ssize_t n = k.size() + iter_->value().size(); + bytes_counter_ -= n; + while (bytes_counter_ < 0) { + bytes_counter_ += RandomPeriod(); + db_->RecordReadSample(k); + } + if (!ParseInternalKey(k, ikey)) { + status_ = Status::Corruption("corrupted internal key in DBIter"); + return false; + } else { + return true; + } +} + +void DBIter::Next() { + assert(valid_); + + if (direction_ == kReverse) { // Switch directions? + direction_ = kForward; + // iter_ is pointing just before the entries for this->key(), + // so advance into the range of entries for this->key() and then + // use the normal skipping code below. + if (!iter_->Valid()) { + iter_->SeekToFirst(); + } else { + iter_->Next(); + } + if (!iter_->Valid()) { + valid_ = false; + saved_key_.clear(); + return; + } + // saved_key_ already contains the key to skip past. + } else { + // Store in saved_key_ the current key so we skip it below. + SaveKey(ExtractUserKey(iter_->key()), &saved_key_); + } + + FindNextUserEntry(true, &saved_key_); +} + +void DBIter::FindNextUserEntry(bool skipping, std::string* skip) { + // Loop until we hit an acceptable entry to yield + assert(iter_->Valid()); + assert(direction_ == kForward); + do { + ParsedInternalKey ikey; + if (ParseKey(&ikey) && ikey.sequence <= sequence_) { + switch (ikey.type) { + case kTypeDeletion: + // Arrange to skip all upcoming entries for this key since + // they are hidden by this deletion. + SaveKey(ikey.user_key, skip); + skipping = true; + break; + case kTypeValue: + if (skipping && + user_comparator_->Compare(ikey.user_key, *skip) <= 0) { + // Entry hidden + } else { + valid_ = true; + saved_key_.clear(); + return; + } + break; + } + } + iter_->Next(); + } while (iter_->Valid()); + saved_key_.clear(); + valid_ = false; +} + +void DBIter::Prev() { + assert(valid_); + + if (direction_ == kForward) { // Switch directions? + // iter_ is pointing at the current entry. Scan backwards until + // the key changes so we can use the normal reverse scanning code. + assert(iter_->Valid()); // Otherwise valid_ would have been false + SaveKey(ExtractUserKey(iter_->key()), &saved_key_); + while (true) { + iter_->Prev(); + if (!iter_->Valid()) { + valid_ = false; + saved_key_.clear(); + ClearSavedValue(); + return; + } + if (user_comparator_->Compare(ExtractUserKey(iter_->key()), + saved_key_) < 0) { + break; + } + } + direction_ = kReverse; + } + + FindPrevUserEntry(); +} + +void DBIter::FindPrevUserEntry() { + assert(direction_ == kReverse); + + ValueType value_type = kTypeDeletion; + if (iter_->Valid()) { + do { + ParsedInternalKey ikey; + if (ParseKey(&ikey) && ikey.sequence <= sequence_) { + if ((value_type != kTypeDeletion) && + user_comparator_->Compare(ikey.user_key, saved_key_) < 0) { + // We encountered a non-deleted value in entries for previous keys, + break; + } + value_type = ikey.type; + if (value_type == kTypeDeletion) { + saved_key_.clear(); + ClearSavedValue(); + } else { + Slice raw_value = iter_->value(); + if (saved_value_.capacity() > raw_value.size() + 1048576) { + std::string empty; + swap(empty, saved_value_); + } + SaveKey(ExtractUserKey(iter_->key()), &saved_key_); + saved_value_.assign(raw_value.data(), raw_value.size()); + } + } + iter_->Prev(); + } while (iter_->Valid()); + } + + if (value_type == kTypeDeletion) { + // End + valid_ = false; + saved_key_.clear(); + ClearSavedValue(); + direction_ = kForward; + } else { + valid_ = true; + } +} + +void DBIter::Seek(const Slice& target) { + direction_ = kForward; + ClearSavedValue(); + saved_key_.clear(); + AppendInternalKey( + &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek)); + iter_->Seek(saved_key_); + if (iter_->Valid()) { + FindNextUserEntry(false, &saved_key_ /* temporary storage */); + } else { + valid_ = false; + } +} + +void DBIter::SeekToFirst() { + direction_ = kForward; + ClearSavedValue(); + iter_->SeekToFirst(); + if (iter_->Valid()) { + FindNextUserEntry(false, &saved_key_ /* temporary storage */); + } else { + valid_ = false; + } +} + +void DBIter::SeekToLast() { + direction_ = kReverse; + ClearSavedValue(); + iter_->SeekToLast(); + FindPrevUserEntry(); +} + +} // anonymous namespace + +Iterator* NewDBIterator( + DBImpl* db, + const Comparator* user_key_comparator, + Iterator* internal_iter, + SequenceNumber sequence, + uint32_t seed) { + return new DBIter(db, user_key_comparator, internal_iter, sequence, seed); +} + +} // namespace leveldb diff --git a/leveldb-library/db/db_iter.h b/leveldb-library/db/db_iter.h new file mode 100644 index 0000000..04927e9 --- /dev/null +++ b/leveldb-library/db/db_iter.h @@ -0,0 +1,28 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_DB_ITER_H_ +#define STORAGE_LEVELDB_DB_DB_ITER_H_ + +#include +#include "leveldb/db.h" +#include "db/dbformat.h" + +namespace leveldb { + +class DBImpl; + +// Return a new iterator that converts internal keys (yielded by +// "*internal_iter") that were live at the specified "sequence" number +// into appropriate user keys. +extern Iterator* NewDBIterator( + DBImpl* db, + const Comparator* user_key_comparator, + Iterator* internal_iter, + SequenceNumber sequence, + uint32_t seed); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_DB_ITER_H_ diff --git a/leveldb-library/db/dbformat.cc b/leveldb-library/db/dbformat.cc new file mode 100644 index 0000000..20a7ca4 --- /dev/null +++ b/leveldb-library/db/dbformat.cc @@ -0,0 +1,140 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include "db/dbformat.h" +#include "port/port.h" +#include "util/coding.h" + +namespace leveldb { + +static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) { + assert(seq <= kMaxSequenceNumber); + assert(t <= kValueTypeForSeek); + return (seq << 8) | t; +} + +void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { + result->append(key.user_key.data(), key.user_key.size()); + PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); +} + +std::string ParsedInternalKey::DebugString() const { + char buf[50]; + snprintf(buf, sizeof(buf), "' @ %llu : %d", + (unsigned long long) sequence, + int(type)); + std::string result = "'"; + result += EscapeString(user_key.ToString()); + result += buf; + return result; +} + +std::string InternalKey::DebugString() const { + std::string result; + ParsedInternalKey parsed; + if (ParseInternalKey(rep_, &parsed)) { + result = parsed.DebugString(); + } else { + result = "(bad)"; + result.append(EscapeString(rep_)); + } + return result; +} + +const char* InternalKeyComparator::Name() const { + return "leveldb.InternalKeyComparator"; +} + +int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const { + // Order by: + // increasing user key (according to user-supplied comparator) + // decreasing sequence number + // decreasing type (though sequence# should be enough to disambiguate) + int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); + if (r == 0) { + const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); + const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); + if (anum > bnum) { + r = -1; + } else if (anum < bnum) { + r = +1; + } + } + return r; +} + +void InternalKeyComparator::FindShortestSeparator( + std::string* start, + const Slice& limit) const { + // Attempt to shorten the user portion of the key + Slice user_start = ExtractUserKey(*start); + Slice user_limit = ExtractUserKey(limit); + std::string tmp(user_start.data(), user_start.size()); + user_comparator_->FindShortestSeparator(&tmp, user_limit); + if (tmp.size() < user_start.size() && + user_comparator_->Compare(user_start, tmp) < 0) { + // User key has become shorter physically, but larger logically. + // Tack on the earliest possible number to the shortened user key. + PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); + assert(this->Compare(*start, tmp) < 0); + assert(this->Compare(tmp, limit) < 0); + start->swap(tmp); + } +} + +void InternalKeyComparator::FindShortSuccessor(std::string* key) const { + Slice user_key = ExtractUserKey(*key); + std::string tmp(user_key.data(), user_key.size()); + user_comparator_->FindShortSuccessor(&tmp); + if (tmp.size() < user_key.size() && + user_comparator_->Compare(user_key, tmp) < 0) { + // User key has become shorter physically, but larger logically. + // Tack on the earliest possible number to the shortened user key. + PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); + assert(this->Compare(*key, tmp) < 0); + key->swap(tmp); + } +} + +const char* InternalFilterPolicy::Name() const { + return user_policy_->Name(); +} + +void InternalFilterPolicy::CreateFilter(const Slice* keys, int n, + std::string* dst) const { + // We rely on the fact that the code in table.cc does not mind us + // adjusting keys[]. + Slice* mkey = const_cast(keys); + for (int i = 0; i < n; i++) { + mkey[i] = ExtractUserKey(keys[i]); + // TODO(sanjay): Suppress dups? + } + user_policy_->CreateFilter(keys, n, dst); +} + +bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const { + return user_policy_->KeyMayMatch(ExtractUserKey(key), f); +} + +LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) { + size_t usize = user_key.size(); + size_t needed = usize + 13; // A conservative estimate + char* dst; + if (needed <= sizeof(space_)) { + dst = space_; + } else { + dst = new char[needed]; + } + start_ = dst; + dst = EncodeVarint32(dst, usize + 8); + kstart_ = dst; + memcpy(dst, user_key.data(), usize); + dst += usize; + EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); + dst += 8; + end_ = dst; +} + +} // namespace leveldb diff --git a/leveldb-library/db/dbformat.h b/leveldb-library/db/dbformat.h new file mode 100644 index 0000000..ea897b1 --- /dev/null +++ b/leveldb-library/db/dbformat.h @@ -0,0 +1,230 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_DBFORMAT_H_ +#define STORAGE_LEVELDB_DB_DBFORMAT_H_ + +#include +#include "leveldb/comparator.h" +#include "leveldb/db.h" +#include "leveldb/filter_policy.h" +#include "leveldb/slice.h" +#include "leveldb/table_builder.h" +#include "util/coding.h" +#include "util/logging.h" + +namespace leveldb { + +// Grouping of constants. We may want to make some of these +// parameters set via options. +namespace config { +static const int kNumLevels = 7; + +// Level-0 compaction is started when we hit this many files. +static const int kL0_CompactionTrigger = 4; + +// Soft limit on number of level-0 files. We slow down writes at this point. +static const int kL0_SlowdownWritesTrigger = 8; + +// Maximum number of level-0 files. We stop writes at this point. +static const int kL0_StopWritesTrigger = 12; + +// Maximum level to which a new compacted memtable is pushed if it +// does not create overlap. We try to push to level 2 to avoid the +// relatively expensive level 0=>1 compactions and to avoid some +// expensive manifest file operations. We do not push all the way to +// the largest level since that can generate a lot of wasted disk +// space if the same key space is being repeatedly overwritten. +static const int kMaxMemCompactLevel = 2; + +// Approximate gap in bytes between samples of data read during iteration. +static const int kReadBytesPeriod = 1048576; + +} // namespace config + +class InternalKey; + +// Value types encoded as the last component of internal keys. +// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk +// data structures. +enum ValueType { + kTypeDeletion = 0x0, + kTypeValue = 0x1 +}; +// kValueTypeForSeek defines the ValueType that should be passed when +// constructing a ParsedInternalKey object for seeking to a particular +// sequence number (since we sort sequence numbers in decreasing order +// and the value type is embedded as the low 8 bits in the sequence +// number in internal keys, we need to use the highest-numbered +// ValueType, not the lowest). +static const ValueType kValueTypeForSeek = kTypeValue; + +typedef uint64_t SequenceNumber; + +// We leave eight bits empty at the bottom so a type and sequence# +// can be packed together into 64-bits. +static const SequenceNumber kMaxSequenceNumber = + ((0x1ull << 56) - 1); + +struct ParsedInternalKey { + Slice user_key; + SequenceNumber sequence; + ValueType type; + + ParsedInternalKey() { } // Intentionally left uninitialized (for speed) + ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t) + : user_key(u), sequence(seq), type(t) { } + std::string DebugString() const; +}; + +// Return the length of the encoding of "key". +inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { + return key.user_key.size() + 8; +} + +// Append the serialization of "key" to *result. +extern void AppendInternalKey(std::string* result, + const ParsedInternalKey& key); + +// Attempt to parse an internal key from "internal_key". On success, +// stores the parsed data in "*result", and returns true. +// +// On error, returns false, leaves "*result" in an undefined state. +extern bool ParseInternalKey(const Slice& internal_key, + ParsedInternalKey* result); + +// Returns the user key portion of an internal key. +inline Slice ExtractUserKey(const Slice& internal_key) { + assert(internal_key.size() >= 8); + return Slice(internal_key.data(), internal_key.size() - 8); +} + +inline ValueType ExtractValueType(const Slice& internal_key) { + assert(internal_key.size() >= 8); + const size_t n = internal_key.size(); + uint64_t num = DecodeFixed64(internal_key.data() + n - 8); + unsigned char c = num & 0xff; + return static_cast(c); +} + +// A comparator for internal keys that uses a specified comparator for +// the user key portion and breaks ties by decreasing sequence number. +class InternalKeyComparator : public Comparator { + private: + const Comparator* user_comparator_; + public: + explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { } + virtual const char* Name() const; + virtual int Compare(const Slice& a, const Slice& b) const; + virtual void FindShortestSeparator( + std::string* start, + const Slice& limit) const; + virtual void FindShortSuccessor(std::string* key) const; + + const Comparator* user_comparator() const { return user_comparator_; } + + int Compare(const InternalKey& a, const InternalKey& b) const; +}; + +// Filter policy wrapper that converts from internal keys to user keys +class InternalFilterPolicy : public FilterPolicy { + private: + const FilterPolicy* const user_policy_; + public: + explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { } + virtual const char* Name() const; + virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const; + virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const; +}; + +// Modules in this directory should keep internal keys wrapped inside +// the following class instead of plain strings so that we do not +// incorrectly use string comparisons instead of an InternalKeyComparator. +class InternalKey { + private: + std::string rep_; + public: + InternalKey() { } // Leave rep_ as empty to indicate it is invalid + InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) { + AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t)); + } + + void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); } + Slice Encode() const { + assert(!rep_.empty()); + return rep_; + } + + Slice user_key() const { return ExtractUserKey(rep_); } + + void SetFrom(const ParsedInternalKey& p) { + rep_.clear(); + AppendInternalKey(&rep_, p); + } + + void Clear() { rep_.clear(); } + + std::string DebugString() const; +}; + +inline int InternalKeyComparator::Compare( + const InternalKey& a, const InternalKey& b) const { + return Compare(a.Encode(), b.Encode()); +} + +inline bool ParseInternalKey(const Slice& internal_key, + ParsedInternalKey* result) { + const size_t n = internal_key.size(); + if (n < 8) return false; + uint64_t num = DecodeFixed64(internal_key.data() + n - 8); + unsigned char c = num & 0xff; + result->sequence = num >> 8; + result->type = static_cast(c); + result->user_key = Slice(internal_key.data(), n - 8); + return (c <= static_cast(kTypeValue)); +} + +// A helper class useful for DBImpl::Get() +class LookupKey { + public: + // Initialize *this for looking up user_key at a snapshot with + // the specified sequence number. + LookupKey(const Slice& user_key, SequenceNumber sequence); + + ~LookupKey(); + + // Return a key suitable for lookup in a MemTable. + Slice memtable_key() const { return Slice(start_, end_ - start_); } + + // Return an internal key (suitable for passing to an internal iterator) + Slice internal_key() const { return Slice(kstart_, end_ - kstart_); } + + // Return the user key + Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); } + + private: + // We construct a char array of the form: + // klength varint32 <-- start_ + // userkey char[klength] <-- kstart_ + // tag uint64 + // <-- end_ + // The array is a suitable MemTable key. + // The suffix starting with "userkey" can be used as an InternalKey. + const char* start_; + const char* kstart_; + const char* end_; + char space_[200]; // Avoid allocation for short keys + + // No copying allowed + LookupKey(const LookupKey&); + void operator=(const LookupKey&); +}; + +inline LookupKey::~LookupKey() { + if (start_ != space_) delete[] start_; +} + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_DBFORMAT_H_ diff --git a/leveldb-library/db/dumpfile.cc b/leveldb-library/db/dumpfile.cc new file mode 100644 index 0000000..61c47c2 --- /dev/null +++ b/leveldb-library/db/dumpfile.cc @@ -0,0 +1,225 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include "db/dbformat.h" +#include "db/filename.h" +#include "db/log_reader.h" +#include "db/version_edit.h" +#include "db/write_batch_internal.h" +#include "leveldb/env.h" +#include "leveldb/iterator.h" +#include "leveldb/options.h" +#include "leveldb/status.h" +#include "leveldb/table.h" +#include "leveldb/write_batch.h" +#include "util/logging.h" + +namespace leveldb { + +namespace { + +bool GuessType(const std::string& fname, FileType* type) { + size_t pos = fname.rfind('/'); + std::string basename; + if (pos == std::string::npos) { + basename = fname; + } else { + basename = std::string(fname.data() + pos + 1, fname.size() - pos - 1); + } + uint64_t ignored; + return ParseFileName(basename, &ignored, type); +} + +// Notified when log reader encounters corruption. +class CorruptionReporter : public log::Reader::Reporter { + public: + WritableFile* dst_; + virtual void Corruption(size_t bytes, const Status& status) { + std::string r = "corruption: "; + AppendNumberTo(&r, bytes); + r += " bytes; "; + r += status.ToString(); + r.push_back('\n'); + dst_->Append(r); + } +}; + +// Print contents of a log file. (*func)() is called on every record. +Status PrintLogContents(Env* env, const std::string& fname, + void (*func)(uint64_t, Slice, WritableFile*), + WritableFile* dst) { + SequentialFile* file; + Status s = env->NewSequentialFile(fname, &file); + if (!s.ok()) { + return s; + } + CorruptionReporter reporter; + reporter.dst_ = dst; + log::Reader reader(file, &reporter, true, 0); + Slice record; + std::string scratch; + while (reader.ReadRecord(&record, &scratch)) { + (*func)(reader.LastRecordOffset(), record, dst); + } + delete file; + return Status::OK(); +} + +// Called on every item found in a WriteBatch. +class WriteBatchItemPrinter : public WriteBatch::Handler { + public: + WritableFile* dst_; + virtual void Put(const Slice& key, const Slice& value) { + std::string r = " put '"; + AppendEscapedStringTo(&r, key); + r += "' '"; + AppendEscapedStringTo(&r, value); + r += "'\n"; + dst_->Append(r); + } + virtual void Delete(const Slice& key) { + std::string r = " del '"; + AppendEscapedStringTo(&r, key); + r += "'\n"; + dst_->Append(r); + } +}; + + +// Called on every log record (each one of which is a WriteBatch) +// found in a kLogFile. +static void WriteBatchPrinter(uint64_t pos, Slice record, WritableFile* dst) { + std::string r = "--- offset "; + AppendNumberTo(&r, pos); + r += "; "; + if (record.size() < 12) { + r += "log record length "; + AppendNumberTo(&r, record.size()); + r += " is too small\n"; + dst->Append(r); + return; + } + WriteBatch batch; + WriteBatchInternal::SetContents(&batch, record); + r += "sequence "; + AppendNumberTo(&r, WriteBatchInternal::Sequence(&batch)); + r.push_back('\n'); + dst->Append(r); + WriteBatchItemPrinter batch_item_printer; + batch_item_printer.dst_ = dst; + Status s = batch.Iterate(&batch_item_printer); + if (!s.ok()) { + dst->Append(" error: " + s.ToString() + "\n"); + } +} + +Status DumpLog(Env* env, const std::string& fname, WritableFile* dst) { + return PrintLogContents(env, fname, WriteBatchPrinter, dst); +} + +// Called on every log record (each one of which is a WriteBatch) +// found in a kDescriptorFile. +static void VersionEditPrinter(uint64_t pos, Slice record, WritableFile* dst) { + std::string r = "--- offset "; + AppendNumberTo(&r, pos); + r += "; "; + VersionEdit edit; + Status s = edit.DecodeFrom(record); + if (!s.ok()) { + r += s.ToString(); + r.push_back('\n'); + } else { + r += edit.DebugString(); + } + dst->Append(r); +} + +Status DumpDescriptor(Env* env, const std::string& fname, WritableFile* dst) { + return PrintLogContents(env, fname, VersionEditPrinter, dst); +} + +Status DumpTable(Env* env, const std::string& fname, WritableFile* dst) { + uint64_t file_size; + RandomAccessFile* file = NULL; + Table* table = NULL; + Status s = env->GetFileSize(fname, &file_size); + if (s.ok()) { + s = env->NewRandomAccessFile(fname, &file); + } + if (s.ok()) { + // We use the default comparator, which may or may not match the + // comparator used in this database. However this should not cause + // problems since we only use Table operations that do not require + // any comparisons. In particular, we do not call Seek or Prev. + s = Table::Open(Options(), file, file_size, &table); + } + if (!s.ok()) { + delete table; + delete file; + return s; + } + + ReadOptions ro; + ro.fill_cache = false; + Iterator* iter = table->NewIterator(ro); + std::string r; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + r.clear(); + ParsedInternalKey key; + if (!ParseInternalKey(iter->key(), &key)) { + r = "badkey '"; + AppendEscapedStringTo(&r, iter->key()); + r += "' => '"; + AppendEscapedStringTo(&r, iter->value()); + r += "'\n"; + dst->Append(r); + } else { + r = "'"; + AppendEscapedStringTo(&r, key.user_key); + r += "' @ "; + AppendNumberTo(&r, key.sequence); + r += " : "; + if (key.type == kTypeDeletion) { + r += "del"; + } else if (key.type == kTypeValue) { + r += "val"; + } else { + AppendNumberTo(&r, key.type); + } + r += " => '"; + AppendEscapedStringTo(&r, iter->value()); + r += "'\n"; + dst->Append(r); + } + } + s = iter->status(); + if (!s.ok()) { + dst->Append("iterator error: " + s.ToString() + "\n"); + } + + delete iter; + delete table; + delete file; + return Status::OK(); +} + +} // namespace + +Status DumpFile(Env* env, const std::string& fname, WritableFile* dst) { + FileType ftype; + if (!GuessType(fname, &ftype)) { + return Status::InvalidArgument(fname + ": unknown file type"); + } + switch (ftype) { + case kLogFile: return DumpLog(env, fname, dst); + case kDescriptorFile: return DumpDescriptor(env, fname, dst); + case kTableFile: return DumpTable(env, fname, dst); + default: + break; + } + return Status::InvalidArgument(fname + ": not a dump-able file type"); +} + +} // namespace leveldb diff --git a/leveldb-library/db/filename.cc b/leveldb-library/db/filename.cc new file mode 100644 index 0000000..da32946 --- /dev/null +++ b/leveldb-library/db/filename.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include +#include "db/filename.h" +#include "db/dbformat.h" +#include "leveldb/env.h" +#include "util/logging.h" + +namespace leveldb { + +// A utility routine: write "data" to the named file and Sync() it. +extern Status WriteStringToFileSync(Env* env, const Slice& data, + const std::string& fname); + +static std::string MakeFileName(const std::string& name, uint64_t number, + const char* suffix) { + char buf[100]; + snprintf(buf, sizeof(buf), "/%06llu.%s", + static_cast(number), + suffix); + return name + buf; +} + +std::string LogFileName(const std::string& name, uint64_t number) { + assert(number > 0); + return MakeFileName(name, number, "log"); +} + +std::string TableFileName(const std::string& name, uint64_t number) { + assert(number > 0); + return MakeFileName(name, number, "ldb"); +} + +std::string SSTTableFileName(const std::string& name, uint64_t number) { + assert(number > 0); + return MakeFileName(name, number, "sst"); +} + +std::string DescriptorFileName(const std::string& dbname, uint64_t number) { + assert(number > 0); + char buf[100]; + snprintf(buf, sizeof(buf), "/MANIFEST-%06llu", + static_cast(number)); + return dbname + buf; +} + +std::string CurrentFileName(const std::string& dbname) { + return dbname + "/CURRENT"; +} + +std::string LockFileName(const std::string& dbname) { + return dbname + "/LOCK"; +} + +std::string TempFileName(const std::string& dbname, uint64_t number) { + assert(number > 0); + return MakeFileName(dbname, number, "dbtmp"); +} + +std::string InfoLogFileName(const std::string& dbname) { + return dbname + "/LOG"; +} + +// Return the name of the old info log file for "dbname". +std::string OldInfoLogFileName(const std::string& dbname) { + return dbname + "/LOG.old"; +} + + +// Owned filenames have the form: +// dbname/CURRENT +// dbname/LOCK +// dbname/LOG +// dbname/LOG.old +// dbname/MANIFEST-[0-9]+ +// dbname/[0-9]+.(log|sst|ldb) +bool ParseFileName(const std::string& fname, + uint64_t* number, + FileType* type) { + Slice rest(fname); + if (rest == "CURRENT") { + *number = 0; + *type = kCurrentFile; + } else if (rest == "LOCK") { + *number = 0; + *type = kDBLockFile; + } else if (rest == "LOG" || rest == "LOG.old") { + *number = 0; + *type = kInfoLogFile; + } else if (rest.starts_with("MANIFEST-")) { + rest.remove_prefix(strlen("MANIFEST-")); + uint64_t num; + if (!ConsumeDecimalNumber(&rest, &num)) { + return false; + } + if (!rest.empty()) { + return false; + } + *type = kDescriptorFile; + *number = num; + } else { + // Avoid strtoull() to keep filename format independent of the + // current locale + uint64_t num; + if (!ConsumeDecimalNumber(&rest, &num)) { + return false; + } + Slice suffix = rest; + if (suffix == Slice(".log")) { + *type = kLogFile; + } else if (suffix == Slice(".sst") || suffix == Slice(".ldb")) { + *type = kTableFile; + } else if (suffix == Slice(".dbtmp")) { + *type = kTempFile; + } else { + return false; + } + *number = num; + } + return true; +} + +Status SetCurrentFile(Env* env, const std::string& dbname, + uint64_t descriptor_number) { + // Remove leading "dbname/" and add newline to manifest file name + std::string manifest = DescriptorFileName(dbname, descriptor_number); + Slice contents = manifest; + assert(contents.starts_with(dbname + "/")); + contents.remove_prefix(dbname.size() + 1); + std::string tmp = TempFileName(dbname, descriptor_number); + Status s = WriteStringToFileSync(env, contents.ToString() + "\n", tmp); + if (s.ok()) { + s = env->RenameFile(tmp, CurrentFileName(dbname)); + } + if (!s.ok()) { + env->DeleteFile(tmp); + } + return s; +} + +} // namespace leveldb diff --git a/leveldb-library/db/filename.h b/leveldb-library/db/filename.h new file mode 100644 index 0000000..87a7526 --- /dev/null +++ b/leveldb-library/db/filename.h @@ -0,0 +1,85 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// File names used by DB code + +#ifndef STORAGE_LEVELDB_DB_FILENAME_H_ +#define STORAGE_LEVELDB_DB_FILENAME_H_ + +#include +#include +#include "leveldb/slice.h" +#include "leveldb/status.h" +#include "port/port.h" + +namespace leveldb { + +class Env; + +enum FileType { + kLogFile, + kDBLockFile, + kTableFile, + kDescriptorFile, + kCurrentFile, + kTempFile, + kInfoLogFile // Either the current one, or an old one +}; + +// Return the name of the log file with the specified number +// in the db named by "dbname". The result will be prefixed with +// "dbname". +extern std::string LogFileName(const std::string& dbname, uint64_t number); + +// Return the name of the sstable with the specified number +// in the db named by "dbname". The result will be prefixed with +// "dbname". +extern std::string TableFileName(const std::string& dbname, uint64_t number); + +// Return the legacy file name for an sstable with the specified number +// in the db named by "dbname". The result will be prefixed with +// "dbname". +extern std::string SSTTableFileName(const std::string& dbname, uint64_t number); + +// Return the name of the descriptor file for the db named by +// "dbname" and the specified incarnation number. The result will be +// prefixed with "dbname". +extern std::string DescriptorFileName(const std::string& dbname, + uint64_t number); + +// Return the name of the current file. This file contains the name +// of the current manifest file. The result will be prefixed with +// "dbname". +extern std::string CurrentFileName(const std::string& dbname); + +// Return the name of the lock file for the db named by +// "dbname". The result will be prefixed with "dbname". +extern std::string LockFileName(const std::string& dbname); + +// Return the name of a temporary file owned by the db named "dbname". +// The result will be prefixed with "dbname". +extern std::string TempFileName(const std::string& dbname, uint64_t number); + +// Return the name of the info log file for "dbname". +extern std::string InfoLogFileName(const std::string& dbname); + +// Return the name of the old info log file for "dbname". +extern std::string OldInfoLogFileName(const std::string& dbname); + +// If filename is a leveldb file, store the type of the file in *type. +// The number encoded in the filename is stored in *number. If the +// filename was successfully parsed, returns true. Else return false. +extern bool ParseFileName(const std::string& filename, + uint64_t* number, + FileType* type); + +// Make the CURRENT file point to the descriptor file with the +// specified number. +extern Status SetCurrentFile(Env* env, const std::string& dbname, + uint64_t descriptor_number); + + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_FILENAME_H_ diff --git a/leveldb-library/db/log_format.h b/leveldb-library/db/log_format.h new file mode 100644 index 0000000..356e69f --- /dev/null +++ b/leveldb-library/db/log_format.h @@ -0,0 +1,35 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Log format information shared by reader and writer. +// See ../doc/log_format.md for more detail. + +#ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_ +#define STORAGE_LEVELDB_DB_LOG_FORMAT_H_ + +namespace leveldb { +namespace log { + +enum RecordType { + // Zero is reserved for preallocated files + kZeroType = 0, + + kFullType = 1, + + // For fragments + kFirstType = 2, + kMiddleType = 3, + kLastType = 4 +}; +static const int kMaxRecordType = kLastType; + +static const int kBlockSize = 32768; + +// Header is checksum (4 bytes), length (2 bytes), type (1 byte). +static const int kHeaderSize = 4 + 2 + 1; + +} // namespace log +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_LOG_FORMAT_H_ diff --git a/leveldb-library/db/log_reader.cc b/leveldb-library/db/log_reader.cc new file mode 100644 index 0000000..a6d3045 --- /dev/null +++ b/leveldb-library/db/log_reader.cc @@ -0,0 +1,284 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/log_reader.h" + +#include +#include "leveldb/env.h" +#include "util/coding.h" +#include "util/crc32c.h" + +namespace leveldb { +namespace log { + +Reader::Reporter::~Reporter() { +} + +Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum, + uint64_t initial_offset) + : file_(file), + reporter_(reporter), + checksum_(checksum), + backing_store_(new char[kBlockSize]), + buffer_(), + eof_(false), + last_record_offset_(0), + end_of_buffer_offset_(0), + initial_offset_(initial_offset), + resyncing_(initial_offset > 0) { +} + +Reader::~Reader() { + delete[] backing_store_; +} + +bool Reader::SkipToInitialBlock() { + size_t offset_in_block = initial_offset_ % kBlockSize; + uint64_t block_start_location = initial_offset_ - offset_in_block; + + // Don't search a block if we'd be in the trailer + if (offset_in_block > kBlockSize - 6) { + offset_in_block = 0; + block_start_location += kBlockSize; + } + + end_of_buffer_offset_ = block_start_location; + + // Skip to start of first block that can contain the initial record + if (block_start_location > 0) { + Status skip_status = file_->Skip(block_start_location); + if (!skip_status.ok()) { + ReportDrop(block_start_location, skip_status); + return false; + } + } + + return true; +} + +bool Reader::ReadRecord(Slice* record, std::string* scratch) { + if (last_record_offset_ < initial_offset_) { + if (!SkipToInitialBlock()) { + return false; + } + } + + scratch->clear(); + record->clear(); + bool in_fragmented_record = false; + // Record offset of the logical record that we're reading + // 0 is a dummy value to make compilers happy + uint64_t prospective_record_offset = 0; + + Slice fragment; + while (true) { + const unsigned int record_type = ReadPhysicalRecord(&fragment); + + // ReadPhysicalRecord may have only had an empty trailer remaining in its + // internal buffer. Calculate the offset of the next physical record now + // that it has returned, properly accounting for its header size. + uint64_t physical_record_offset = + end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size(); + + if (resyncing_) { + if (record_type == kMiddleType) { + continue; + } else if (record_type == kLastType) { + resyncing_ = false; + continue; + } else { + resyncing_ = false; + } + } + + switch (record_type) { + case kFullType: + if (in_fragmented_record) { + // Handle bug in earlier versions of log::Writer where + // it could emit an empty kFirstType record at the tail end + // of a block followed by a kFullType or kFirstType record + // at the beginning of the next block. + if (scratch->empty()) { + in_fragmented_record = false; + } else { + ReportCorruption(scratch->size(), "partial record without end(1)"); + } + } + prospective_record_offset = physical_record_offset; + scratch->clear(); + *record = fragment; + last_record_offset_ = prospective_record_offset; + return true; + + case kFirstType: + if (in_fragmented_record) { + // Handle bug in earlier versions of log::Writer where + // it could emit an empty kFirstType record at the tail end + // of a block followed by a kFullType or kFirstType record + // at the beginning of the next block. + if (scratch->empty()) { + in_fragmented_record = false; + } else { + ReportCorruption(scratch->size(), "partial record without end(2)"); + } + } + prospective_record_offset = physical_record_offset; + scratch->assign(fragment.data(), fragment.size()); + in_fragmented_record = true; + break; + + case kMiddleType: + if (!in_fragmented_record) { + ReportCorruption(fragment.size(), + "missing start of fragmented record(1)"); + } else { + scratch->append(fragment.data(), fragment.size()); + } + break; + + case kLastType: + if (!in_fragmented_record) { + ReportCorruption(fragment.size(), + "missing start of fragmented record(2)"); + } else { + scratch->append(fragment.data(), fragment.size()); + *record = Slice(*scratch); + last_record_offset_ = prospective_record_offset; + return true; + } + break; + + case kEof: + if (in_fragmented_record) { + // This can be caused by the writer dying immediately after + // writing a physical record but before completing the next; don't + // treat it as a corruption, just ignore the entire logical record. + scratch->clear(); + } + return false; + + case kBadRecord: + if (in_fragmented_record) { + ReportCorruption(scratch->size(), "error in middle of record"); + in_fragmented_record = false; + scratch->clear(); + } + break; + + default: { + char buf[40]; + snprintf(buf, sizeof(buf), "unknown record type %u", record_type); + ReportCorruption( + (fragment.size() + (in_fragmented_record ? scratch->size() : 0)), + buf); + in_fragmented_record = false; + scratch->clear(); + break; + } + } + } + return false; +} + +uint64_t Reader::LastRecordOffset() { + return last_record_offset_; +} + +void Reader::ReportCorruption(uint64_t bytes, const char* reason) { + ReportDrop(bytes, Status::Corruption(reason)); +} + +void Reader::ReportDrop(uint64_t bytes, const Status& reason) { + if (reporter_ != NULL && + end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) { + reporter_->Corruption(static_cast(bytes), reason); + } +} + +unsigned int Reader::ReadPhysicalRecord(Slice* result) { + while (true) { + if (buffer_.size() < kHeaderSize) { + if (!eof_) { + // Last read was a full read, so this is a trailer to skip + buffer_.clear(); + Status status = file_->Read(kBlockSize, &buffer_, backing_store_); + end_of_buffer_offset_ += buffer_.size(); + if (!status.ok()) { + buffer_.clear(); + ReportDrop(kBlockSize, status); + eof_ = true; + return kEof; + } else if (buffer_.size() < kBlockSize) { + eof_ = true; + } + continue; + } else { + // Note that if buffer_ is non-empty, we have a truncated header at the + // end of the file, which can be caused by the writer crashing in the + // middle of writing the header. Instead of considering this an error, + // just report EOF. + buffer_.clear(); + return kEof; + } + } + + // Parse the header + const char* header = buffer_.data(); + const uint32_t a = static_cast(header[4]) & 0xff; + const uint32_t b = static_cast(header[5]) & 0xff; + const unsigned int type = header[6]; + const uint32_t length = a | (b << 8); + if (kHeaderSize + length > buffer_.size()) { + size_t drop_size = buffer_.size(); + buffer_.clear(); + if (!eof_) { + ReportCorruption(drop_size, "bad record length"); + return kBadRecord; + } + // If the end of the file has been reached without reading |length| bytes + // of payload, assume the writer died in the middle of writing the record. + // Don't report a corruption. + return kEof; + } + + if (type == kZeroType && length == 0) { + // Skip zero length record without reporting any drops since + // such records are produced by the mmap based writing code in + // env_posix.cc that preallocates file regions. + buffer_.clear(); + return kBadRecord; + } + + // Check crc + if (checksum_) { + uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header)); + uint32_t actual_crc = crc32c::Value(header + 6, 1 + length); + if (actual_crc != expected_crc) { + // Drop the rest of the buffer since "length" itself may have + // been corrupted and if we trust it, we could find some + // fragment of a real log record that just happens to look + // like a valid log record. + size_t drop_size = buffer_.size(); + buffer_.clear(); + ReportCorruption(drop_size, "checksum mismatch"); + return kBadRecord; + } + } + + buffer_.remove_prefix(kHeaderSize + length); + + // Skip physical record that started before initial_offset_ + if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length < + initial_offset_) { + result->clear(); + return kBadRecord; + } + + *result = Slice(header + kHeaderSize, length); + return type; + } +} + +} // namespace log +} // namespace leveldb diff --git a/leveldb-library/db/log_reader.h b/leveldb-library/db/log_reader.h new file mode 100644 index 0000000..8389d61 --- /dev/null +++ b/leveldb-library/db/log_reader.h @@ -0,0 +1,113 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_LOG_READER_H_ +#define STORAGE_LEVELDB_DB_LOG_READER_H_ + +#include + +#include "db/log_format.h" +#include "leveldb/slice.h" +#include "leveldb/status.h" + +namespace leveldb { + +class SequentialFile; + +namespace log { + +class Reader { + public: + // Interface for reporting errors. + class Reporter { + public: + virtual ~Reporter(); + + // Some corruption was detected. "size" is the approximate number + // of bytes dropped due to the corruption. + virtual void Corruption(size_t bytes, const Status& status) = 0; + }; + + // Create a reader that will return log records from "*file". + // "*file" must remain live while this Reader is in use. + // + // If "reporter" is non-NULL, it is notified whenever some data is + // dropped due to a detected corruption. "*reporter" must remain + // live while this Reader is in use. + // + // If "checksum" is true, verify checksums if available. + // + // The Reader will start reading at the first record located at physical + // position >= initial_offset within the file. + Reader(SequentialFile* file, Reporter* reporter, bool checksum, + uint64_t initial_offset); + + ~Reader(); + + // Read the next record into *record. Returns true if read + // successfully, false if we hit end of the input. May use + // "*scratch" as temporary storage. The contents filled in *record + // will only be valid until the next mutating operation on this + // reader or the next mutation to *scratch. + bool ReadRecord(Slice* record, std::string* scratch); + + // Returns the physical offset of the last record returned by ReadRecord. + // + // Undefined before the first call to ReadRecord. + uint64_t LastRecordOffset(); + + private: + SequentialFile* const file_; + Reporter* const reporter_; + bool const checksum_; + char* const backing_store_; + Slice buffer_; + bool eof_; // Last Read() indicated EOF by returning < kBlockSize + + // Offset of the last record returned by ReadRecord. + uint64_t last_record_offset_; + // Offset of the first location past the end of buffer_. + uint64_t end_of_buffer_offset_; + + // Offset at which to start looking for the first record to return + uint64_t const initial_offset_; + + // True if we are resynchronizing after a seek (initial_offset_ > 0). In + // particular, a run of kMiddleType and kLastType records can be silently + // skipped in this mode + bool resyncing_; + + // Extend record types with the following special values + enum { + kEof = kMaxRecordType + 1, + // Returned whenever we find an invalid physical record. + // Currently there are three situations in which this happens: + // * The record has an invalid CRC (ReadPhysicalRecord reports a drop) + // * The record is a 0-length record (No drop is reported) + // * The record is below constructor's initial_offset (No drop is reported) + kBadRecord = kMaxRecordType + 2 + }; + + // Skips all blocks that are completely before "initial_offset_". + // + // Returns true on success. Handles reporting. + bool SkipToInitialBlock(); + + // Return type, or one of the preceding special values + unsigned int ReadPhysicalRecord(Slice* result); + + // Reports dropped bytes to the reporter. + // buffer_ must be updated to remove the dropped bytes prior to invocation. + void ReportCorruption(uint64_t bytes, const char* reason); + void ReportDrop(uint64_t bytes, const Status& reason); + + // No copying allowed + Reader(const Reader&); + void operator=(const Reader&); +}; + +} // namespace log +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_LOG_READER_H_ diff --git a/leveldb-library/db/log_writer.cc b/leveldb-library/db/log_writer.cc new file mode 100644 index 0000000..74a0327 --- /dev/null +++ b/leveldb-library/db/log_writer.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/log_writer.h" + +#include +#include "leveldb/env.h" +#include "util/coding.h" +#include "util/crc32c.h" + +namespace leveldb { +namespace log { + +static void InitTypeCrc(uint32_t* type_crc) { + for (int i = 0; i <= kMaxRecordType; i++) { + char t = static_cast(i); + type_crc[i] = crc32c::Value(&t, 1); + } +} + +Writer::Writer(WritableFile* dest) + : dest_(dest), + block_offset_(0) { + InitTypeCrc(type_crc_); +} + +Writer::Writer(WritableFile* dest, uint64_t dest_length) + : dest_(dest), block_offset_(dest_length % kBlockSize) { + InitTypeCrc(type_crc_); +} + +Writer::~Writer() { +} + +Status Writer::AddRecord(const Slice& slice) { + const char* ptr = slice.data(); + size_t left = slice.size(); + + // Fragment the record if necessary and emit it. Note that if slice + // is empty, we still want to iterate once to emit a single + // zero-length record + Status s; + bool begin = true; + do { + const int leftover = kBlockSize - block_offset_; + assert(leftover >= 0); + if (leftover < kHeaderSize) { + // Switch to a new block + if (leftover > 0) { + // Fill the trailer (literal below relies on kHeaderSize being 7) + assert(kHeaderSize == 7); + dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover)); + } + block_offset_ = 0; + } + + // Invariant: we never leave < kHeaderSize bytes in a block. + assert(kBlockSize - block_offset_ - kHeaderSize >= 0); + + const size_t avail = kBlockSize - block_offset_ - kHeaderSize; + const size_t fragment_length = (left < avail) ? left : avail; + + RecordType type; + const bool end = (left == fragment_length); + if (begin && end) { + type = kFullType; + } else if (begin) { + type = kFirstType; + } else if (end) { + type = kLastType; + } else { + type = kMiddleType; + } + + s = EmitPhysicalRecord(type, ptr, fragment_length); + ptr += fragment_length; + left -= fragment_length; + begin = false; + } while (s.ok() && left > 0); + return s; +} + +Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) { + assert(n <= 0xffff); // Must fit in two bytes + assert(block_offset_ + kHeaderSize + n <= kBlockSize); + + // Format the header + char buf[kHeaderSize]; + buf[4] = static_cast(n & 0xff); + buf[5] = static_cast(n >> 8); + buf[6] = static_cast(t); + + // Compute the crc of the record type and the payload. + uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n); + crc = crc32c::Mask(crc); // Adjust for storage + EncodeFixed32(buf, crc); + + // Write the header and the payload + Status s = dest_->Append(Slice(buf, kHeaderSize)); + if (s.ok()) { + s = dest_->Append(Slice(ptr, n)); + if (s.ok()) { + s = dest_->Flush(); + } + } + block_offset_ += kHeaderSize + n; + return s; +} + +} // namespace log +} // namespace leveldb diff --git a/leveldb-library/db/log_writer.h b/leveldb-library/db/log_writer.h new file mode 100644 index 0000000..9e7cc47 --- /dev/null +++ b/leveldb-library/db/log_writer.h @@ -0,0 +1,54 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_ +#define STORAGE_LEVELDB_DB_LOG_WRITER_H_ + +#include +#include "db/log_format.h" +#include "leveldb/slice.h" +#include "leveldb/status.h" + +namespace leveldb { + +class WritableFile; + +namespace log { + +class Writer { + public: + // Create a writer that will append data to "*dest". + // "*dest" must be initially empty. + // "*dest" must remain live while this Writer is in use. + explicit Writer(WritableFile* dest); + + // Create a writer that will append data to "*dest". + // "*dest" must have initial length "dest_length". + // "*dest" must remain live while this Writer is in use. + Writer(WritableFile* dest, uint64_t dest_length); + + ~Writer(); + + Status AddRecord(const Slice& slice); + + private: + WritableFile* dest_; + int block_offset_; // Current offset in block + + // crc32c values for all supported record types. These are + // pre-computed to reduce the overhead of computing the crc of the + // record type stored in the header. + uint32_t type_crc_[kMaxRecordType + 1]; + + Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length); + + // No copying allowed + Writer(const Writer&); + void operator=(const Writer&); +}; + +} // namespace log +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_ diff --git a/leveldb-library/db/memtable.cc b/leveldb-library/db/memtable.cc new file mode 100644 index 0000000..bfec0a7 --- /dev/null +++ b/leveldb-library/db/memtable.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/memtable.h" +#include "db/dbformat.h" +#include "leveldb/comparator.h" +#include "leveldb/env.h" +#include "leveldb/iterator.h" +#include "util/coding.h" + +namespace leveldb { + +static Slice GetLengthPrefixedSlice(const char* data) { + uint32_t len; + const char* p = data; + p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted + return Slice(p, len); +} + +MemTable::MemTable(const InternalKeyComparator& cmp) + : comparator_(cmp), + refs_(0), + table_(comparator_, &arena_) { +} + +MemTable::~MemTable() { + assert(refs_ == 0); +} + +size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); } + +int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr) + const { + // Internal keys are encoded as length-prefixed strings. + Slice a = GetLengthPrefixedSlice(aptr); + Slice b = GetLengthPrefixedSlice(bptr); + return comparator.Compare(a, b); +} + +// Encode a suitable internal key target for "target" and return it. +// Uses *scratch as scratch space, and the returned pointer will point +// into this scratch space. +static const char* EncodeKey(std::string* scratch, const Slice& target) { + scratch->clear(); + PutVarint32(scratch, target.size()); + scratch->append(target.data(), target.size()); + return scratch->data(); +} + +class MemTableIterator: public Iterator { + public: + explicit MemTableIterator(MemTable::Table* table) : iter_(table) { } + + virtual bool Valid() const { return iter_.Valid(); } + virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); } + virtual void SeekToFirst() { iter_.SeekToFirst(); } + virtual void SeekToLast() { iter_.SeekToLast(); } + virtual void Next() { iter_.Next(); } + virtual void Prev() { iter_.Prev(); } + virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); } + virtual Slice value() const { + Slice key_slice = GetLengthPrefixedSlice(iter_.key()); + return GetLengthPrefixedSlice(key_slice.data() + key_slice.size()); + } + + virtual Status status() const { return Status::OK(); } + + private: + MemTable::Table::Iterator iter_; + std::string tmp_; // For passing to EncodeKey + + // No copying allowed + MemTableIterator(const MemTableIterator&); + void operator=(const MemTableIterator&); +}; + +Iterator* MemTable::NewIterator() { + return new MemTableIterator(&table_); +} + +void MemTable::Add(SequenceNumber s, ValueType type, + const Slice& key, + const Slice& value) { + // Format of an entry is concatenation of: + // key_size : varint32 of internal_key.size() + // key bytes : char[internal_key.size()] + // value_size : varint32 of value.size() + // value bytes : char[value.size()] + size_t key_size = key.size(); + size_t val_size = value.size(); + size_t internal_key_size = key_size + 8; + const size_t encoded_len = + VarintLength(internal_key_size) + internal_key_size + + VarintLength(val_size) + val_size; + char* buf = arena_.Allocate(encoded_len); + char* p = EncodeVarint32(buf, internal_key_size); + memcpy(p, key.data(), key_size); + p += key_size; + EncodeFixed64(p, (s << 8) | type); + p += 8; + p = EncodeVarint32(p, val_size); + memcpy(p, value.data(), val_size); + assert((p + val_size) - buf == encoded_len); + table_.Insert(buf); +} + +bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) { + Slice memkey = key.memtable_key(); + Table::Iterator iter(&table_); + iter.Seek(memkey.data()); + if (iter.Valid()) { + // entry format is: + // klength varint32 + // userkey char[klength] + // tag uint64 + // vlength varint32 + // value char[vlength] + // Check that it belongs to same user key. We do not check the + // sequence number since the Seek() call above should have skipped + // all entries with overly large sequence numbers. + const char* entry = iter.key(); + uint32_t key_length; + const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length); + if (comparator_.comparator.user_comparator()->Compare( + Slice(key_ptr, key_length - 8), + key.user_key()) == 0) { + // Correct user key + const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); + switch (static_cast(tag & 0xff)) { + case kTypeValue: { + Slice v = GetLengthPrefixedSlice(key_ptr + key_length); + value->assign(v.data(), v.size()); + return true; + } + case kTypeDeletion: + *s = Status::NotFound(Slice()); + return true; + } + } + } + return false; +} + +} // namespace leveldb diff --git a/leveldb-library/db/memtable.h b/leveldb-library/db/memtable.h new file mode 100644 index 0000000..9f41567 --- /dev/null +++ b/leveldb-library/db/memtable.h @@ -0,0 +1,88 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_ +#define STORAGE_LEVELDB_DB_MEMTABLE_H_ + +#include +#include "leveldb/db.h" +#include "db/dbformat.h" +#include "db/skiplist.h" +#include "util/arena.h" + +namespace leveldb { + +class InternalKeyComparator; +class Mutex; +class MemTableIterator; + +class MemTable { + public: + // MemTables are reference counted. The initial reference count + // is zero and the caller must call Ref() at least once. + explicit MemTable(const InternalKeyComparator& comparator); + + // Increase reference count. + void Ref() { ++refs_; } + + // Drop reference count. Delete if no more references exist. + void Unref() { + --refs_; + assert(refs_ >= 0); + if (refs_ <= 0) { + delete this; + } + } + + // Returns an estimate of the number of bytes of data in use by this + // data structure. It is safe to call when MemTable is being modified. + size_t ApproximateMemoryUsage(); + + // Return an iterator that yields the contents of the memtable. + // + // The caller must ensure that the underlying MemTable remains live + // while the returned iterator is live. The keys returned by this + // iterator are internal keys encoded by AppendInternalKey in the + // db/format.{h,cc} module. + Iterator* NewIterator(); + + // Add an entry into memtable that maps key to value at the + // specified sequence number and with the specified type. + // Typically value will be empty if type==kTypeDeletion. + void Add(SequenceNumber seq, ValueType type, + const Slice& key, + const Slice& value); + + // If memtable contains a value for key, store it in *value and return true. + // If memtable contains a deletion for key, store a NotFound() error + // in *status and return true. + // Else, return false. + bool Get(const LookupKey& key, std::string* value, Status* s); + + private: + ~MemTable(); // Private since only Unref() should be used to delete it + + struct KeyComparator { + const InternalKeyComparator comparator; + explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } + int operator()(const char* a, const char* b) const; + }; + friend class MemTableIterator; + friend class MemTableBackwardIterator; + + typedef SkipList Table; + + KeyComparator comparator_; + int refs_; + Arena arena_; + Table table_; + + // No copying allowed + MemTable(const MemTable&); + void operator=(const MemTable&); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_MEMTABLE_H_ diff --git a/leveldb-library/db/repair.cc b/leveldb-library/db/repair.cc new file mode 100644 index 0000000..4cd4bb0 --- /dev/null +++ b/leveldb-library/db/repair.cc @@ -0,0 +1,461 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// We recover the contents of the descriptor from the other files we find. +// (1) Any log files are first converted to tables +// (2) We scan every table to compute +// (a) smallest/largest for the table +// (b) largest sequence number in the table +// (3) We generate descriptor contents: +// - log number is set to zero +// - next-file-number is set to 1 + largest file number we found +// - last-sequence-number is set to largest sequence# found across +// all tables (see 2c) +// - compaction pointers are cleared +// - every table file is added at level 0 +// +// Possible optimization 1: +// (a) Compute total size and use to pick appropriate max-level M +// (b) Sort tables by largest sequence# in the table +// (c) For each table: if it overlaps earlier table, place in level-0, +// else place in level-M. +// Possible optimization 2: +// Store per-table metadata (smallest, largest, largest-seq#, ...) +// in the table's meta section to speed up ScanTable. + +#include "db/builder.h" +#include "db/db_impl.h" +#include "db/dbformat.h" +#include "db/filename.h" +#include "db/log_reader.h" +#include "db/log_writer.h" +#include "db/memtable.h" +#include "db/table_cache.h" +#include "db/version_edit.h" +#include "db/write_batch_internal.h" +#include "leveldb/comparator.h" +#include "leveldb/db.h" +#include "leveldb/env.h" + +namespace leveldb { + +namespace { + +class Repairer { + public: + Repairer(const std::string& dbname, const Options& options) + : dbname_(dbname), + env_(options.env), + icmp_(options.comparator), + ipolicy_(options.filter_policy), + options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)), + owns_info_log_(options_.info_log != options.info_log), + owns_cache_(options_.block_cache != options.block_cache), + next_file_number_(1) { + // TableCache can be small since we expect each table to be opened once. + table_cache_ = new TableCache(dbname_, &options_, 10); + } + + ~Repairer() { + delete table_cache_; + if (owns_info_log_) { + delete options_.info_log; + } + if (owns_cache_) { + delete options_.block_cache; + } + } + + Status Run() { + Status status = FindFiles(); + if (status.ok()) { + ConvertLogFilesToTables(); + ExtractMetaData(); + status = WriteDescriptor(); + } + if (status.ok()) { + unsigned long long bytes = 0; + for (size_t i = 0; i < tables_.size(); i++) { + bytes += tables_[i].meta.file_size; + } + Log(options_.info_log, + "**** Repaired leveldb %s; " + "recovered %d files; %llu bytes. " + "Some data may have been lost. " + "****", + dbname_.c_str(), + static_cast(tables_.size()), + bytes); + } + return status; + } + + private: + struct TableInfo { + FileMetaData meta; + SequenceNumber max_sequence; + }; + + std::string const dbname_; + Env* const env_; + InternalKeyComparator const icmp_; + InternalFilterPolicy const ipolicy_; + Options const options_; + bool owns_info_log_; + bool owns_cache_; + TableCache* table_cache_; + VersionEdit edit_; + + std::vector manifests_; + std::vector table_numbers_; + std::vector logs_; + std::vector tables_; + uint64_t next_file_number_; + + Status FindFiles() { + std::vector filenames; + Status status = env_->GetChildren(dbname_, &filenames); + if (!status.ok()) { + return status; + } + if (filenames.empty()) { + return Status::IOError(dbname_, "repair found no files"); + } + + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { + if (type == kDescriptorFile) { + manifests_.push_back(filenames[i]); + } else { + if (number + 1 > next_file_number_) { + next_file_number_ = number + 1; + } + if (type == kLogFile) { + logs_.push_back(number); + } else if (type == kTableFile) { + table_numbers_.push_back(number); + } else { + // Ignore other files + } + } + } + } + return status; + } + + void ConvertLogFilesToTables() { + for (size_t i = 0; i < logs_.size(); i++) { + std::string logname = LogFileName(dbname_, logs_[i]); + Status status = ConvertLogToTable(logs_[i]); + if (!status.ok()) { + Log(options_.info_log, "Log #%llu: ignoring conversion error: %s", + (unsigned long long) logs_[i], + status.ToString().c_str()); + } + ArchiveFile(logname); + } + } + + Status ConvertLogToTable(uint64_t log) { + struct LogReporter : public log::Reader::Reporter { + Env* env; + Logger* info_log; + uint64_t lognum; + virtual void Corruption(size_t bytes, const Status& s) { + // We print error messages for corruption, but continue repairing. + Log(info_log, "Log #%llu: dropping %d bytes; %s", + (unsigned long long) lognum, + static_cast(bytes), + s.ToString().c_str()); + } + }; + + // Open the log file + std::string logname = LogFileName(dbname_, log); + SequentialFile* lfile; + Status status = env_->NewSequentialFile(logname, &lfile); + if (!status.ok()) { + return status; + } + + // Create the log reader. + LogReporter reporter; + reporter.env = env_; + reporter.info_log = options_.info_log; + reporter.lognum = log; + // We intentionally make log::Reader do checksumming so that + // corruptions cause entire commits to be skipped instead of + // propagating bad information (like overly large sequence + // numbers). + log::Reader reader(lfile, &reporter, false/*do not checksum*/, + 0/*initial_offset*/); + + // Read all the records and add to a memtable + std::string scratch; + Slice record; + WriteBatch batch; + MemTable* mem = new MemTable(icmp_); + mem->Ref(); + int counter = 0; + while (reader.ReadRecord(&record, &scratch)) { + if (record.size() < 12) { + reporter.Corruption( + record.size(), Status::Corruption("log record too small")); + continue; + } + WriteBatchInternal::SetContents(&batch, record); + status = WriteBatchInternal::InsertInto(&batch, mem); + if (status.ok()) { + counter += WriteBatchInternal::Count(&batch); + } else { + Log(options_.info_log, "Log #%llu: ignoring %s", + (unsigned long long) log, + status.ToString().c_str()); + status = Status::OK(); // Keep going with rest of file + } + } + delete lfile; + + // Do not record a version edit for this conversion to a Table + // since ExtractMetaData() will also generate edits. + FileMetaData meta; + meta.number = next_file_number_++; + Iterator* iter = mem->NewIterator(); + status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta); + delete iter; + mem->Unref(); + mem = NULL; + if (status.ok()) { + if (meta.file_size > 0) { + table_numbers_.push_back(meta.number); + } + } + Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s", + (unsigned long long) log, + counter, + (unsigned long long) meta.number, + status.ToString().c_str()); + return status; + } + + void ExtractMetaData() { + for (size_t i = 0; i < table_numbers_.size(); i++) { + ScanTable(table_numbers_[i]); + } + } + + Iterator* NewTableIterator(const FileMetaData& meta) { + // Same as compaction iterators: if paranoid_checks are on, turn + // on checksum verification. + ReadOptions r; + r.verify_checksums = options_.paranoid_checks; + return table_cache_->NewIterator(r, meta.number, meta.file_size); + } + + void ScanTable(uint64_t number) { + TableInfo t; + t.meta.number = number; + std::string fname = TableFileName(dbname_, number); + Status status = env_->GetFileSize(fname, &t.meta.file_size); + if (!status.ok()) { + // Try alternate file name. + fname = SSTTableFileName(dbname_, number); + Status s2 = env_->GetFileSize(fname, &t.meta.file_size); + if (s2.ok()) { + status = Status::OK(); + } + } + if (!status.ok()) { + ArchiveFile(TableFileName(dbname_, number)); + ArchiveFile(SSTTableFileName(dbname_, number)); + Log(options_.info_log, "Table #%llu: dropped: %s", + (unsigned long long) t.meta.number, + status.ToString().c_str()); + return; + } + + // Extract metadata by scanning through table. + int counter = 0; + Iterator* iter = NewTableIterator(t.meta); + bool empty = true; + ParsedInternalKey parsed; + t.max_sequence = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + Slice key = iter->key(); + if (!ParseInternalKey(key, &parsed)) { + Log(options_.info_log, "Table #%llu: unparsable key %s", + (unsigned long long) t.meta.number, + EscapeString(key).c_str()); + continue; + } + + counter++; + if (empty) { + empty = false; + t.meta.smallest.DecodeFrom(key); + } + t.meta.largest.DecodeFrom(key); + if (parsed.sequence > t.max_sequence) { + t.max_sequence = parsed.sequence; + } + } + if (!iter->status().ok()) { + status = iter->status(); + } + delete iter; + Log(options_.info_log, "Table #%llu: %d entries %s", + (unsigned long long) t.meta.number, + counter, + status.ToString().c_str()); + + if (status.ok()) { + tables_.push_back(t); + } else { + RepairTable(fname, t); // RepairTable archives input file. + } + } + + void RepairTable(const std::string& src, TableInfo t) { + // We will copy src contents to a new table and then rename the + // new table over the source. + + // Create builder. + std::string copy = TableFileName(dbname_, next_file_number_++); + WritableFile* file; + Status s = env_->NewWritableFile(copy, &file); + if (!s.ok()) { + return; + } + TableBuilder* builder = new TableBuilder(options_, file); + + // Copy data. + Iterator* iter = NewTableIterator(t.meta); + int counter = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + builder->Add(iter->key(), iter->value()); + counter++; + } + delete iter; + + ArchiveFile(src); + if (counter == 0) { + builder->Abandon(); // Nothing to save + } else { + s = builder->Finish(); + if (s.ok()) { + t.meta.file_size = builder->FileSize(); + } + } + delete builder; + builder = NULL; + + if (s.ok()) { + s = file->Close(); + } + delete file; + file = NULL; + + if (counter > 0 && s.ok()) { + std::string orig = TableFileName(dbname_, t.meta.number); + s = env_->RenameFile(copy, orig); + if (s.ok()) { + Log(options_.info_log, "Table #%llu: %d entries repaired", + (unsigned long long) t.meta.number, counter); + tables_.push_back(t); + } + } + if (!s.ok()) { + env_->DeleteFile(copy); + } + } + + Status WriteDescriptor() { + std::string tmp = TempFileName(dbname_, 1); + WritableFile* file; + Status status = env_->NewWritableFile(tmp, &file); + if (!status.ok()) { + return status; + } + + SequenceNumber max_sequence = 0; + for (size_t i = 0; i < tables_.size(); i++) { + if (max_sequence < tables_[i].max_sequence) { + max_sequence = tables_[i].max_sequence; + } + } + + edit_.SetComparatorName(icmp_.user_comparator()->Name()); + edit_.SetLogNumber(0); + edit_.SetNextFile(next_file_number_); + edit_.SetLastSequence(max_sequence); + + for (size_t i = 0; i < tables_.size(); i++) { + // TODO(opt): separate out into multiple levels + const TableInfo& t = tables_[i]; + edit_.AddFile(0, t.meta.number, t.meta.file_size, + t.meta.smallest, t.meta.largest); + } + + //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); + { + log::Writer log(file); + std::string record; + edit_.EncodeTo(&record); + status = log.AddRecord(record); + } + if (status.ok()) { + status = file->Close(); + } + delete file; + file = NULL; + + if (!status.ok()) { + env_->DeleteFile(tmp); + } else { + // Discard older manifests + for (size_t i = 0; i < manifests_.size(); i++) { + ArchiveFile(dbname_ + "/" + manifests_[i]); + } + + // Install new manifest + status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1)); + if (status.ok()) { + status = SetCurrentFile(env_, dbname_, 1); + } else { + env_->DeleteFile(tmp); + } + } + return status; + } + + void ArchiveFile(const std::string& fname) { + // Move into another directory. E.g., for + // dir/foo + // rename to + // dir/lost/foo + const char* slash = strrchr(fname.c_str(), '/'); + std::string new_dir; + if (slash != NULL) { + new_dir.assign(fname.data(), slash - fname.data()); + } + new_dir.append("/lost"); + env_->CreateDir(new_dir); // Ignore error + std::string new_file = new_dir; + new_file.append("/"); + new_file.append((slash == NULL) ? fname.c_str() : slash + 1); + Status s = env_->RenameFile(fname, new_file); + Log(options_.info_log, "Archiving %s: %s\n", + fname.c_str(), s.ToString().c_str()); + } +}; +} // namespace + +Status RepairDB(const std::string& dbname, const Options& options) { + Repairer repairer(dbname, options); + return repairer.Run(); +} + +} // namespace leveldb diff --git a/leveldb-library/db/skiplist.h b/leveldb-library/db/skiplist.h new file mode 100644 index 0000000..8bd7776 --- /dev/null +++ b/leveldb-library/db/skiplist.h @@ -0,0 +1,384 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_ +#define STORAGE_LEVELDB_DB_SKIPLIST_H_ + +// Thread safety +// ------------- +// +// Writes require external synchronization, most likely a mutex. +// Reads require a guarantee that the SkipList will not be destroyed +// while the read is in progress. Apart from that, reads progress +// without any internal locking or synchronization. +// +// Invariants: +// +// (1) Allocated nodes are never deleted until the SkipList is +// destroyed. This is trivially guaranteed by the code since we +// never delete any skip list nodes. +// +// (2) The contents of a Node except for the next/prev pointers are +// immutable after the Node has been linked into the SkipList. +// Only Insert() modifies the list, and it is careful to initialize +// a node and use release-stores to publish the nodes in one or +// more lists. +// +// ... prev vs. next pointer ordering ... + +#include +#include +#include "port/port.h" +#include "util/arena.h" +#include "util/random.h" + +namespace leveldb { + +class Arena; + +template +class SkipList { + private: + struct Node; + + public: + // Create a new SkipList object that will use "cmp" for comparing keys, + // and will allocate memory using "*arena". Objects allocated in the arena + // must remain allocated for the lifetime of the skiplist object. + explicit SkipList(Comparator cmp, Arena* arena); + + // Insert key into the list. + // REQUIRES: nothing that compares equal to key is currently in the list. + void Insert(const Key& key); + + // Returns true iff an entry that compares equal to key is in the list. + bool Contains(const Key& key) const; + + // Iteration over the contents of a skip list + class Iterator { + public: + // Initialize an iterator over the specified list. + // The returned iterator is not valid. + explicit Iterator(const SkipList* list); + + // Returns true iff the iterator is positioned at a valid node. + bool Valid() const; + + // Returns the key at the current position. + // REQUIRES: Valid() + const Key& key() const; + + // Advances to the next position. + // REQUIRES: Valid() + void Next(); + + // Advances to the previous position. + // REQUIRES: Valid() + void Prev(); + + // Advance to the first entry with a key >= target + void Seek(const Key& target); + + // Position at the first entry in list. + // Final state of iterator is Valid() iff list is not empty. + void SeekToFirst(); + + // Position at the last entry in list. + // Final state of iterator is Valid() iff list is not empty. + void SeekToLast(); + + private: + const SkipList* list_; + Node* node_; + // Intentionally copyable + }; + + private: + enum { kMaxHeight = 12 }; + + // Immutable after construction + Comparator const compare_; + Arena* const arena_; // Arena used for allocations of nodes + + Node* const head_; + + // Modified only by Insert(). Read racily by readers, but stale + // values are ok. + port::AtomicPointer max_height_; // Height of the entire list + + inline int GetMaxHeight() const { + return static_cast( + reinterpret_cast(max_height_.NoBarrier_Load())); + } + + // Read/written only by Insert(). + Random rnd_; + + Node* NewNode(const Key& key, int height); + int RandomHeight(); + bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } + + // Return true if key is greater than the data stored in "n" + bool KeyIsAfterNode(const Key& key, Node* n) const; + + // Return the earliest node that comes at or after key. + // Return NULL if there is no such node. + // + // If prev is non-NULL, fills prev[level] with pointer to previous + // node at "level" for every level in [0..max_height_-1]. + Node* FindGreaterOrEqual(const Key& key, Node** prev) const; + + // Return the latest node with a key < key. + // Return head_ if there is no such node. + Node* FindLessThan(const Key& key) const; + + // Return the last node in the list. + // Return head_ if list is empty. + Node* FindLast() const; + + // No copying allowed + SkipList(const SkipList&); + void operator=(const SkipList&); +}; + +// Implementation details follow +template +struct SkipList::Node { + explicit Node(const Key& k) : key(k) { } + + Key const key; + + // Accessors/mutators for links. Wrapped in methods so we can + // add the appropriate barriers as necessary. + Node* Next(int n) { + assert(n >= 0); + // Use an 'acquire load' so that we observe a fully initialized + // version of the returned Node. + return reinterpret_cast(next_[n].Acquire_Load()); + } + void SetNext(int n, Node* x) { + assert(n >= 0); + // Use a 'release store' so that anybody who reads through this + // pointer observes a fully initialized version of the inserted node. + next_[n].Release_Store(x); + } + + // No-barrier variants that can be safely used in a few locations. + Node* NoBarrier_Next(int n) { + assert(n >= 0); + return reinterpret_cast(next_[n].NoBarrier_Load()); + } + void NoBarrier_SetNext(int n, Node* x) { + assert(n >= 0); + next_[n].NoBarrier_Store(x); + } + + private: + // Array of length equal to the node height. next_[0] is lowest level link. + port::AtomicPointer next_[1]; +}; + +template +typename SkipList::Node* +SkipList::NewNode(const Key& key, int height) { + char* mem = arena_->AllocateAligned( + sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1)); + return new (mem) Node(key); +} + +template +inline SkipList::Iterator::Iterator(const SkipList* list) { + list_ = list; + node_ = NULL; +} + +template +inline bool SkipList::Iterator::Valid() const { + return node_ != NULL; +} + +template +inline const Key& SkipList::Iterator::key() const { + assert(Valid()); + return node_->key; +} + +template +inline void SkipList::Iterator::Next() { + assert(Valid()); + node_ = node_->Next(0); +} + +template +inline void SkipList::Iterator::Prev() { + // Instead of using explicit "prev" links, we just search for the + // last node that falls before key. + assert(Valid()); + node_ = list_->FindLessThan(node_->key); + if (node_ == list_->head_) { + node_ = NULL; + } +} + +template +inline void SkipList::Iterator::Seek(const Key& target) { + node_ = list_->FindGreaterOrEqual(target, NULL); +} + +template +inline void SkipList::Iterator::SeekToFirst() { + node_ = list_->head_->Next(0); +} + +template +inline void SkipList::Iterator::SeekToLast() { + node_ = list_->FindLast(); + if (node_ == list_->head_) { + node_ = NULL; + } +} + +template +int SkipList::RandomHeight() { + // Increase height with probability 1 in kBranching + static const unsigned int kBranching = 4; + int height = 1; + while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) { + height++; + } + assert(height > 0); + assert(height <= kMaxHeight); + return height; +} + +template +bool SkipList::KeyIsAfterNode(const Key& key, Node* n) const { + // NULL n is considered infinite + return (n != NULL) && (compare_(n->key, key) < 0); +} + +template +typename SkipList::Node* SkipList::FindGreaterOrEqual(const Key& key, Node** prev) + const { + Node* x = head_; + int level = GetMaxHeight() - 1; + while (true) { + Node* next = x->Next(level); + if (KeyIsAfterNode(key, next)) { + // Keep searching in this list + x = next; + } else { + if (prev != NULL) prev[level] = x; + if (level == 0) { + return next; + } else { + // Switch to next list + level--; + } + } + } +} + +template +typename SkipList::Node* +SkipList::FindLessThan(const Key& key) const { + Node* x = head_; + int level = GetMaxHeight() - 1; + while (true) { + assert(x == head_ || compare_(x->key, key) < 0); + Node* next = x->Next(level); + if (next == NULL || compare_(next->key, key) >= 0) { + if (level == 0) { + return x; + } else { + // Switch to next list + level--; + } + } else { + x = next; + } + } +} + +template +typename SkipList::Node* SkipList::FindLast() + const { + Node* x = head_; + int level = GetMaxHeight() - 1; + while (true) { + Node* next = x->Next(level); + if (next == NULL) { + if (level == 0) { + return x; + } else { + // Switch to next list + level--; + } + } else { + x = next; + } + } +} + +template +SkipList::SkipList(Comparator cmp, Arena* arena) + : compare_(cmp), + arena_(arena), + head_(NewNode(0 /* any key will do */, kMaxHeight)), + max_height_(reinterpret_cast(1)), + rnd_(0xdeadbeef) { + for (int i = 0; i < kMaxHeight; i++) { + head_->SetNext(i, NULL); + } +} + +template +void SkipList::Insert(const Key& key) { + // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual() + // here since Insert() is externally synchronized. + Node* prev[kMaxHeight]; + Node* x = FindGreaterOrEqual(key, prev); + + // Our data structure does not allow duplicate insertion + assert(x == NULL || !Equal(key, x->key)); + + int height = RandomHeight(); + if (height > GetMaxHeight()) { + for (int i = GetMaxHeight(); i < height; i++) { + prev[i] = head_; + } + //fprintf(stderr, "Change height from %d to %d\n", max_height_, height); + + // It is ok to mutate max_height_ without any synchronization + // with concurrent readers. A concurrent reader that observes + // the new value of max_height_ will see either the old value of + // new level pointers from head_ (NULL), or a new value set in + // the loop below. In the former case the reader will + // immediately drop to the next level since NULL sorts after all + // keys. In the latter case the reader will use the new node. + max_height_.NoBarrier_Store(reinterpret_cast(height)); + } + + x = NewNode(key, height); + for (int i = 0; i < height; i++) { + // NoBarrier_SetNext() suffices since we will add a barrier when + // we publish a pointer to "x" in prev[i]. + x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i)); + prev[i]->SetNext(i, x); + } +} + +template +bool SkipList::Contains(const Key& key) const { + Node* x = FindGreaterOrEqual(key, NULL); + if (x != NULL && Equal(key, x->key)) { + return true; + } else { + return false; + } +} + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_SKIPLIST_H_ diff --git a/leveldb-library/db/snapshot.h b/leveldb-library/db/snapshot.h new file mode 100644 index 0000000..6ed413c --- /dev/null +++ b/leveldb-library/db/snapshot.h @@ -0,0 +1,67 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_ +#define STORAGE_LEVELDB_DB_SNAPSHOT_H_ + +#include "db/dbformat.h" +#include "leveldb/db.h" + +namespace leveldb { + +class SnapshotList; + +// Snapshots are kept in a doubly-linked list in the DB. +// Each SnapshotImpl corresponds to a particular sequence number. +class SnapshotImpl : public Snapshot { + public: + SequenceNumber number_; // const after creation + + private: + friend class SnapshotList; + + // SnapshotImpl is kept in a doubly-linked circular list + SnapshotImpl* prev_; + SnapshotImpl* next_; + + SnapshotList* list_; // just for sanity checks +}; + +class SnapshotList { + public: + SnapshotList() { + list_.prev_ = &list_; + list_.next_ = &list_; + } + + bool empty() const { return list_.next_ == &list_; } + SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; } + SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; } + + const SnapshotImpl* New(SequenceNumber seq) { + SnapshotImpl* s = new SnapshotImpl; + s->number_ = seq; + s->list_ = this; + s->next_ = &list_; + s->prev_ = list_.prev_; + s->prev_->next_ = s; + s->next_->prev_ = s; + return s; + } + + void Delete(const SnapshotImpl* s) { + assert(s->list_ == this); + s->prev_->next_ = s->next_; + s->next_->prev_ = s->prev_; + delete s; + } + + private: + // Dummy head of doubly-linked list of snapshots + SnapshotImpl list_; +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_SNAPSHOT_H_ diff --git a/leveldb-library/db/table_cache.cc b/leveldb-library/db/table_cache.cc new file mode 100644 index 0000000..e3d82cd --- /dev/null +++ b/leveldb-library/db/table_cache.cc @@ -0,0 +1,127 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/table_cache.h" + +#include "db/filename.h" +#include "leveldb/env.h" +#include "leveldb/table.h" +#include "util/coding.h" + +namespace leveldb { + +struct TableAndFile { + RandomAccessFile* file; + Table* table; +}; + +static void DeleteEntry(const Slice& key, void* value) { + TableAndFile* tf = reinterpret_cast(value); + delete tf->table; + delete tf->file; + delete tf; +} + +static void UnrefEntry(void* arg1, void* arg2) { + Cache* cache = reinterpret_cast(arg1); + Cache::Handle* h = reinterpret_cast(arg2); + cache->Release(h); +} + +TableCache::TableCache(const std::string& dbname, + const Options* options, + int entries) + : env_(options->env), + dbname_(dbname), + options_(options), + cache_(NewLRUCache(entries)) { +} + +TableCache::~TableCache() { + delete cache_; +} + +Status TableCache::FindTable(uint64_t file_number, uint64_t file_size, + Cache::Handle** handle) { + Status s; + char buf[sizeof(file_number)]; + EncodeFixed64(buf, file_number); + Slice key(buf, sizeof(buf)); + *handle = cache_->Lookup(key); + if (*handle == NULL) { + std::string fname = TableFileName(dbname_, file_number); + RandomAccessFile* file = NULL; + Table* table = NULL; + s = env_->NewRandomAccessFile(fname, &file); + if (!s.ok()) { + std::string old_fname = SSTTableFileName(dbname_, file_number); + if (env_->NewRandomAccessFile(old_fname, &file).ok()) { + s = Status::OK(); + } + } + if (s.ok()) { + s = Table::Open(*options_, file, file_size, &table); + } + + if (!s.ok()) { + assert(table == NULL); + delete file; + // We do not cache error results so that if the error is transient, + // or somebody repairs the file, we recover automatically. + } else { + TableAndFile* tf = new TableAndFile; + tf->file = file; + tf->table = table; + *handle = cache_->Insert(key, tf, 1, &DeleteEntry); + } + } + return s; +} + +Iterator* TableCache::NewIterator(const ReadOptions& options, + uint64_t file_number, + uint64_t file_size, + Table** tableptr) { + if (tableptr != NULL) { + *tableptr = NULL; + } + + Cache::Handle* handle = NULL; + Status s = FindTable(file_number, file_size, &handle); + if (!s.ok()) { + return NewErrorIterator(s); + } + + Table* table = reinterpret_cast(cache_->Value(handle))->table; + Iterator* result = table->NewIterator(options); + result->RegisterCleanup(&UnrefEntry, cache_, handle); + if (tableptr != NULL) { + *tableptr = table; + } + return result; +} + +Status TableCache::Get(const ReadOptions& options, + uint64_t file_number, + uint64_t file_size, + const Slice& k, + void* arg, + void (*saver)(void*, const Slice&, const Slice&)) { + Cache::Handle* handle = NULL; + Status s = FindTable(file_number, file_size, &handle); + if (s.ok()) { + Table* t = reinterpret_cast(cache_->Value(handle))->table; + s = t->InternalGet(options, k, arg, saver); + cache_->Release(handle); + } + return s; +} + +void TableCache::Evict(uint64_t file_number) { + char buf[sizeof(file_number)]; + EncodeFixed64(buf, file_number); + cache_->Erase(Slice(buf, sizeof(buf))); +} + +} // namespace leveldb diff --git a/leveldb-library/db/table_cache.h b/leveldb-library/db/table_cache.h new file mode 100644 index 0000000..8cf4aaf --- /dev/null +++ b/leveldb-library/db/table_cache.h @@ -0,0 +1,61 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Thread-safe (provides internal synchronization) + +#ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_ +#define STORAGE_LEVELDB_DB_TABLE_CACHE_H_ + +#include +#include +#include "db/dbformat.h" +#include "leveldb/cache.h" +#include "leveldb/table.h" +#include "port/port.h" + +namespace leveldb { + +class Env; + +class TableCache { + public: + TableCache(const std::string& dbname, const Options* options, int entries); + ~TableCache(); + + // Return an iterator for the specified file number (the corresponding + // file length must be exactly "file_size" bytes). If "tableptr" is + // non-NULL, also sets "*tableptr" to point to the Table object + // underlying the returned iterator, or NULL if no Table object underlies + // the returned iterator. The returned "*tableptr" object is owned by + // the cache and should not be deleted, and is valid for as long as the + // returned iterator is live. + Iterator* NewIterator(const ReadOptions& options, + uint64_t file_number, + uint64_t file_size, + Table** tableptr = NULL); + + // If a seek to internal key "k" in specified file finds an entry, + // call (*handle_result)(arg, found_key, found_value). + Status Get(const ReadOptions& options, + uint64_t file_number, + uint64_t file_size, + const Slice& k, + void* arg, + void (*handle_result)(void*, const Slice&, const Slice&)); + + // Evict any entry for the specified file number + void Evict(uint64_t file_number); + + private: + Env* const env_; + const std::string dbname_; + const Options* options_; + Cache* cache_; + + Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_TABLE_CACHE_H_ diff --git a/leveldb-library/db/version_edit.cc b/leveldb-library/db/version_edit.cc new file mode 100644 index 0000000..f10a2d5 --- /dev/null +++ b/leveldb-library/db/version_edit.cc @@ -0,0 +1,266 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/version_edit.h" + +#include "db/version_set.h" +#include "util/coding.h" + +namespace leveldb { + +// Tag numbers for serialized VersionEdit. These numbers are written to +// disk and should not be changed. +enum Tag { + kComparator = 1, + kLogNumber = 2, + kNextFileNumber = 3, + kLastSequence = 4, + kCompactPointer = 5, + kDeletedFile = 6, + kNewFile = 7, + // 8 was used for large value refs + kPrevLogNumber = 9 +}; + +void VersionEdit::Clear() { + comparator_.clear(); + log_number_ = 0; + prev_log_number_ = 0; + last_sequence_ = 0; + next_file_number_ = 0; + has_comparator_ = false; + has_log_number_ = false; + has_prev_log_number_ = false; + has_next_file_number_ = false; + has_last_sequence_ = false; + deleted_files_.clear(); + new_files_.clear(); +} + +void VersionEdit::EncodeTo(std::string* dst) const { + if (has_comparator_) { + PutVarint32(dst, kComparator); + PutLengthPrefixedSlice(dst, comparator_); + } + if (has_log_number_) { + PutVarint32(dst, kLogNumber); + PutVarint64(dst, log_number_); + } + if (has_prev_log_number_) { + PutVarint32(dst, kPrevLogNumber); + PutVarint64(dst, prev_log_number_); + } + if (has_next_file_number_) { + PutVarint32(dst, kNextFileNumber); + PutVarint64(dst, next_file_number_); + } + if (has_last_sequence_) { + PutVarint32(dst, kLastSequence); + PutVarint64(dst, last_sequence_); + } + + for (size_t i = 0; i < compact_pointers_.size(); i++) { + PutVarint32(dst, kCompactPointer); + PutVarint32(dst, compact_pointers_[i].first); // level + PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode()); + } + + for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); + iter != deleted_files_.end(); + ++iter) { + PutVarint32(dst, kDeletedFile); + PutVarint32(dst, iter->first); // level + PutVarint64(dst, iter->second); // file number + } + + for (size_t i = 0; i < new_files_.size(); i++) { + const FileMetaData& f = new_files_[i].second; + PutVarint32(dst, kNewFile); + PutVarint32(dst, new_files_[i].first); // level + PutVarint64(dst, f.number); + PutVarint64(dst, f.file_size); + PutLengthPrefixedSlice(dst, f.smallest.Encode()); + PutLengthPrefixedSlice(dst, f.largest.Encode()); + } +} + +static bool GetInternalKey(Slice* input, InternalKey* dst) { + Slice str; + if (GetLengthPrefixedSlice(input, &str)) { + dst->DecodeFrom(str); + return true; + } else { + return false; + } +} + +static bool GetLevel(Slice* input, int* level) { + uint32_t v; + if (GetVarint32(input, &v) && + v < config::kNumLevels) { + *level = v; + return true; + } else { + return false; + } +} + +Status VersionEdit::DecodeFrom(const Slice& src) { + Clear(); + Slice input = src; + const char* msg = NULL; + uint32_t tag; + + // Temporary storage for parsing + int level; + uint64_t number; + FileMetaData f; + Slice str; + InternalKey key; + + while (msg == NULL && GetVarint32(&input, &tag)) { + switch (tag) { + case kComparator: + if (GetLengthPrefixedSlice(&input, &str)) { + comparator_ = str.ToString(); + has_comparator_ = true; + } else { + msg = "comparator name"; + } + break; + + case kLogNumber: + if (GetVarint64(&input, &log_number_)) { + has_log_number_ = true; + } else { + msg = "log number"; + } + break; + + case kPrevLogNumber: + if (GetVarint64(&input, &prev_log_number_)) { + has_prev_log_number_ = true; + } else { + msg = "previous log number"; + } + break; + + case kNextFileNumber: + if (GetVarint64(&input, &next_file_number_)) { + has_next_file_number_ = true; + } else { + msg = "next file number"; + } + break; + + case kLastSequence: + if (GetVarint64(&input, &last_sequence_)) { + has_last_sequence_ = true; + } else { + msg = "last sequence number"; + } + break; + + case kCompactPointer: + if (GetLevel(&input, &level) && + GetInternalKey(&input, &key)) { + compact_pointers_.push_back(std::make_pair(level, key)); + } else { + msg = "compaction pointer"; + } + break; + + case kDeletedFile: + if (GetLevel(&input, &level) && + GetVarint64(&input, &number)) { + deleted_files_.insert(std::make_pair(level, number)); + } else { + msg = "deleted file"; + } + break; + + case kNewFile: + if (GetLevel(&input, &level) && + GetVarint64(&input, &f.number) && + GetVarint64(&input, &f.file_size) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest)) { + new_files_.push_back(std::make_pair(level, f)); + } else { + msg = "new-file entry"; + } + break; + + default: + msg = "unknown tag"; + break; + } + } + + if (msg == NULL && !input.empty()) { + msg = "invalid tag"; + } + + Status result; + if (msg != NULL) { + result = Status::Corruption("VersionEdit", msg); + } + return result; +} + +std::string VersionEdit::DebugString() const { + std::string r; + r.append("VersionEdit {"); + if (has_comparator_) { + r.append("\n Comparator: "); + r.append(comparator_); + } + if (has_log_number_) { + r.append("\n LogNumber: "); + AppendNumberTo(&r, log_number_); + } + if (has_prev_log_number_) { + r.append("\n PrevLogNumber: "); + AppendNumberTo(&r, prev_log_number_); + } + if (has_next_file_number_) { + r.append("\n NextFile: "); + AppendNumberTo(&r, next_file_number_); + } + if (has_last_sequence_) { + r.append("\n LastSeq: "); + AppendNumberTo(&r, last_sequence_); + } + for (size_t i = 0; i < compact_pointers_.size(); i++) { + r.append("\n CompactPointer: "); + AppendNumberTo(&r, compact_pointers_[i].first); + r.append(" "); + r.append(compact_pointers_[i].second.DebugString()); + } + for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); + iter != deleted_files_.end(); + ++iter) { + r.append("\n DeleteFile: "); + AppendNumberTo(&r, iter->first); + r.append(" "); + AppendNumberTo(&r, iter->second); + } + for (size_t i = 0; i < new_files_.size(); i++) { + const FileMetaData& f = new_files_[i].second; + r.append("\n AddFile: "); + AppendNumberTo(&r, new_files_[i].first); + r.append(" "); + AppendNumberTo(&r, f.number); + r.append(" "); + AppendNumberTo(&r, f.file_size); + r.append(" "); + r.append(f.smallest.DebugString()); + r.append(" .. "); + r.append(f.largest.DebugString()); + } + r.append("\n}\n"); + return r; +} + +} // namespace leveldb diff --git a/leveldb-library/db/version_edit.h b/leveldb-library/db/version_edit.h new file mode 100644 index 0000000..eaef77b --- /dev/null +++ b/leveldb-library/db/version_edit.h @@ -0,0 +1,107 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_VERSION_EDIT_H_ +#define STORAGE_LEVELDB_DB_VERSION_EDIT_H_ + +#include +#include +#include +#include "db/dbformat.h" + +namespace leveldb { + +class VersionSet; + +struct FileMetaData { + int refs; + int allowed_seeks; // Seeks allowed until compaction + uint64_t number; + uint64_t file_size; // File size in bytes + InternalKey smallest; // Smallest internal key served by table + InternalKey largest; // Largest internal key served by table + + FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { } +}; + +class VersionEdit { + public: + VersionEdit() { Clear(); } + ~VersionEdit() { } + + void Clear(); + + void SetComparatorName(const Slice& name) { + has_comparator_ = true; + comparator_ = name.ToString(); + } + void SetLogNumber(uint64_t num) { + has_log_number_ = true; + log_number_ = num; + } + void SetPrevLogNumber(uint64_t num) { + has_prev_log_number_ = true; + prev_log_number_ = num; + } + void SetNextFile(uint64_t num) { + has_next_file_number_ = true; + next_file_number_ = num; + } + void SetLastSequence(SequenceNumber seq) { + has_last_sequence_ = true; + last_sequence_ = seq; + } + void SetCompactPointer(int level, const InternalKey& key) { + compact_pointers_.push_back(std::make_pair(level, key)); + } + + // Add the specified file at the specified number. + // REQUIRES: This version has not been saved (see VersionSet::SaveTo) + // REQUIRES: "smallest" and "largest" are smallest and largest keys in file + void AddFile(int level, uint64_t file, + uint64_t file_size, + const InternalKey& smallest, + const InternalKey& largest) { + FileMetaData f; + f.number = file; + f.file_size = file_size; + f.smallest = smallest; + f.largest = largest; + new_files_.push_back(std::make_pair(level, f)); + } + + // Delete the specified "file" from the specified "level". + void DeleteFile(int level, uint64_t file) { + deleted_files_.insert(std::make_pair(level, file)); + } + + void EncodeTo(std::string* dst) const; + Status DecodeFrom(const Slice& src); + + std::string DebugString() const; + + private: + friend class VersionSet; + + typedef std::set< std::pair > DeletedFileSet; + + std::string comparator_; + uint64_t log_number_; + uint64_t prev_log_number_; + uint64_t next_file_number_; + SequenceNumber last_sequence_; + bool has_comparator_; + bool has_log_number_; + bool has_prev_log_number_; + bool has_next_file_number_; + bool has_last_sequence_; + + std::vector< std::pair > compact_pointers_; + DeletedFileSet deleted_files_; + std::vector< std::pair > new_files_; +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_VERSION_EDIT_H_ diff --git a/leveldb-library/db/version_set.cc b/leveldb-library/db/version_set.cc new file mode 100644 index 0000000..b1256f9 --- /dev/null +++ b/leveldb-library/db/version_set.cc @@ -0,0 +1,1535 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/version_set.h" + +#include +#include +#include "db/filename.h" +#include "db/log_reader.h" +#include "db/log_writer.h" +#include "db/memtable.h" +#include "db/table_cache.h" +#include "leveldb/env.h" +#include "leveldb/table_builder.h" +#include "table/merger.h" +#include "table/two_level_iterator.h" +#include "util/coding.h" +#include "util/logging.h" + +namespace leveldb { + +static int TargetFileSize(const Options* options) { + return options->max_file_size; +} + +// Maximum bytes of overlaps in grandparent (i.e., level+2) before we +// stop building a single file in a level->level+1 compaction. +static int64_t MaxGrandParentOverlapBytes(const Options* options) { + return 10 * TargetFileSize(options); +} + +// Maximum number of bytes in all compacted files. We avoid expanding +// the lower level file set of a compaction if it would make the +// total compaction cover more than this many bytes. +static int64_t ExpandedCompactionByteSizeLimit(const Options* options) { + return 25 * TargetFileSize(options); +} + +static double MaxBytesForLevel(const Options* options, int level) { + // Note: the result for level zero is not really used since we set + // the level-0 compaction threshold based on number of files. + + // Result for both level-0 and level-1 + double result = 10. * 1048576.0; + while (level > 1) { + result *= 10; + level--; + } + return result; +} + +static uint64_t MaxFileSizeForLevel(const Options* options, int level) { + // We could vary per level to reduce number of files? + return TargetFileSize(options); +} + +static int64_t TotalFileSize(const std::vector& files) { + int64_t sum = 0; + for (size_t i = 0; i < files.size(); i++) { + sum += files[i]->file_size; + } + return sum; +} + +Version::~Version() { + assert(refs_ == 0); + + // Remove from linked list + prev_->next_ = next_; + next_->prev_ = prev_; + + // Drop references to files + for (int level = 0; level < config::kNumLevels; level++) { + for (size_t i = 0; i < files_[level].size(); i++) { + FileMetaData* f = files_[level][i]; + assert(f->refs > 0); + f->refs--; + if (f->refs <= 0) { + delete f; + } + } + } +} + +int FindFile(const InternalKeyComparator& icmp, + const std::vector& files, + const Slice& key) { + uint32_t left = 0; + uint32_t right = files.size(); + while (left < right) { + uint32_t mid = (left + right) / 2; + const FileMetaData* f = files[mid]; + if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) { + // Key at "mid.largest" is < "target". Therefore all + // files at or before "mid" are uninteresting. + left = mid + 1; + } else { + // Key at "mid.largest" is >= "target". Therefore all files + // after "mid" are uninteresting. + right = mid; + } + } + return right; +} + +static bool AfterFile(const Comparator* ucmp, + const Slice* user_key, const FileMetaData* f) { + // NULL user_key occurs before all keys and is therefore never after *f + return (user_key != NULL && + ucmp->Compare(*user_key, f->largest.user_key()) > 0); +} + +static bool BeforeFile(const Comparator* ucmp, + const Slice* user_key, const FileMetaData* f) { + // NULL user_key occurs after all keys and is therefore never before *f + return (user_key != NULL && + ucmp->Compare(*user_key, f->smallest.user_key()) < 0); +} + +bool SomeFileOverlapsRange( + const InternalKeyComparator& icmp, + bool disjoint_sorted_files, + const std::vector& files, + const Slice* smallest_user_key, + const Slice* largest_user_key) { + const Comparator* ucmp = icmp.user_comparator(); + if (!disjoint_sorted_files) { + // Need to check against all files + for (size_t i = 0; i < files.size(); i++) { + const FileMetaData* f = files[i]; + if (AfterFile(ucmp, smallest_user_key, f) || + BeforeFile(ucmp, largest_user_key, f)) { + // No overlap + } else { + return true; // Overlap + } + } + return false; + } + + // Binary search over file list + uint32_t index = 0; + if (smallest_user_key != NULL) { + // Find the earliest possible internal key for smallest_user_key + InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek); + index = FindFile(icmp, files, small.Encode()); + } + + if (index >= files.size()) { + // beginning of range is after all files, so no overlap. + return false; + } + + return !BeforeFile(ucmp, largest_user_key, files[index]); +} + +// An internal iterator. For a given version/level pair, yields +// information about the files in the level. For a given entry, key() +// is the largest key that occurs in the file, and value() is an +// 16-byte value containing the file number and file size, both +// encoded using EncodeFixed64. +class Version::LevelFileNumIterator : public Iterator { + public: + LevelFileNumIterator(const InternalKeyComparator& icmp, + const std::vector* flist) + : icmp_(icmp), + flist_(flist), + index_(flist->size()) { // Marks as invalid + } + virtual bool Valid() const { + return index_ < flist_->size(); + } + virtual void Seek(const Slice& target) { + index_ = FindFile(icmp_, *flist_, target); + } + virtual void SeekToFirst() { index_ = 0; } + virtual void SeekToLast() { + index_ = flist_->empty() ? 0 : flist_->size() - 1; + } + virtual void Next() { + assert(Valid()); + index_++; + } + virtual void Prev() { + assert(Valid()); + if (index_ == 0) { + index_ = flist_->size(); // Marks as invalid + } else { + index_--; + } + } + Slice key() const { + assert(Valid()); + return (*flist_)[index_]->largest.Encode(); + } + Slice value() const { + assert(Valid()); + EncodeFixed64(value_buf_, (*flist_)[index_]->number); + EncodeFixed64(value_buf_+8, (*flist_)[index_]->file_size); + return Slice(value_buf_, sizeof(value_buf_)); + } + virtual Status status() const { return Status::OK(); } + private: + const InternalKeyComparator icmp_; + const std::vector* const flist_; + uint32_t index_; + + // Backing store for value(). Holds the file number and size. + mutable char value_buf_[16]; +}; + +static Iterator* GetFileIterator(void* arg, + const ReadOptions& options, + const Slice& file_value) { + TableCache* cache = reinterpret_cast(arg); + if (file_value.size() != 16) { + return NewErrorIterator( + Status::Corruption("FileReader invoked with unexpected value")); + } else { + return cache->NewIterator(options, + DecodeFixed64(file_value.data()), + DecodeFixed64(file_value.data() + 8)); + } +} + +Iterator* Version::NewConcatenatingIterator(const ReadOptions& options, + int level) const { + return NewTwoLevelIterator( + new LevelFileNumIterator(vset_->icmp_, &files_[level]), + &GetFileIterator, vset_->table_cache_, options); +} + +void Version::AddIterators(const ReadOptions& options, + std::vector* iters) { + // Merge all level zero files together since they may overlap + for (size_t i = 0; i < files_[0].size(); i++) { + iters->push_back( + vset_->table_cache_->NewIterator( + options, files_[0][i]->number, files_[0][i]->file_size)); + } + + // For levels > 0, we can use a concatenating iterator that sequentially + // walks through the non-overlapping files in the level, opening them + // lazily. + for (int level = 1; level < config::kNumLevels; level++) { + if (!files_[level].empty()) { + iters->push_back(NewConcatenatingIterator(options, level)); + } + } +} + +// Callback from TableCache::Get() +namespace { +enum SaverState { + kNotFound, + kFound, + kDeleted, + kCorrupt, +}; +struct Saver { + SaverState state; + const Comparator* ucmp; + Slice user_key; + std::string* value; +}; +} +static void SaveValue(void* arg, const Slice& ikey, const Slice& v) { + Saver* s = reinterpret_cast(arg); + ParsedInternalKey parsed_key; + if (!ParseInternalKey(ikey, &parsed_key)) { + s->state = kCorrupt; + } else { + if (s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0) { + s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted; + if (s->state == kFound) { + s->value->assign(v.data(), v.size()); + } + } + } +} + +static bool NewestFirst(FileMetaData* a, FileMetaData* b) { + return a->number > b->number; +} + +void Version::ForEachOverlapping(Slice user_key, Slice internal_key, + void* arg, + bool (*func)(void*, int, FileMetaData*)) { + // TODO(sanjay): Change Version::Get() to use this function. + const Comparator* ucmp = vset_->icmp_.user_comparator(); + + // Search level-0 in order from newest to oldest. + std::vector tmp; + tmp.reserve(files_[0].size()); + for (uint32_t i = 0; i < files_[0].size(); i++) { + FileMetaData* f = files_[0][i]; + if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 && + ucmp->Compare(user_key, f->largest.user_key()) <= 0) { + tmp.push_back(f); + } + } + if (!tmp.empty()) { + std::sort(tmp.begin(), tmp.end(), NewestFirst); + for (uint32_t i = 0; i < tmp.size(); i++) { + if (!(*func)(arg, 0, tmp[i])) { + return; + } + } + } + + // Search other levels. + for (int level = 1; level < config::kNumLevels; level++) { + size_t num_files = files_[level].size(); + if (num_files == 0) continue; + + // Binary search to find earliest index whose largest key >= internal_key. + uint32_t index = FindFile(vset_->icmp_, files_[level], internal_key); + if (index < num_files) { + FileMetaData* f = files_[level][index]; + if (ucmp->Compare(user_key, f->smallest.user_key()) < 0) { + // All of "f" is past any data for user_key + } else { + if (!(*func)(arg, level, f)) { + return; + } + } + } + } +} + +Status Version::Get(const ReadOptions& options, + const LookupKey& k, + std::string* value, + GetStats* stats) { + Slice ikey = k.internal_key(); + Slice user_key = k.user_key(); + const Comparator* ucmp = vset_->icmp_.user_comparator(); + Status s; + + stats->seek_file = NULL; + stats->seek_file_level = -1; + FileMetaData* last_file_read = NULL; + int last_file_read_level = -1; + + // We can search level-by-level since entries never hop across + // levels. Therefore we are guaranteed that if we find data + // in an smaller level, later levels are irrelevant. + std::vector tmp; + FileMetaData* tmp2; + for (int level = 0; level < config::kNumLevels; level++) { + size_t num_files = files_[level].size(); + if (num_files == 0) continue; + + // Get the list of files to search in this level + FileMetaData* const* files = &files_[level][0]; + if (level == 0) { + // Level-0 files may overlap each other. Find all files that + // overlap user_key and process them in order from newest to oldest. + tmp.reserve(num_files); + for (uint32_t i = 0; i < num_files; i++) { + FileMetaData* f = files[i]; + if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 && + ucmp->Compare(user_key, f->largest.user_key()) <= 0) { + tmp.push_back(f); + } + } + if (tmp.empty()) continue; + + std::sort(tmp.begin(), tmp.end(), NewestFirst); + files = &tmp[0]; + num_files = tmp.size(); + } else { + // Binary search to find earliest index whose largest key >= ikey. + uint32_t index = FindFile(vset_->icmp_, files_[level], ikey); + if (index >= num_files) { + files = NULL; + num_files = 0; + } else { + tmp2 = files[index]; + if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) { + // All of "tmp2" is past any data for user_key + files = NULL; + num_files = 0; + } else { + files = &tmp2; + num_files = 1; + } + } + } + + for (uint32_t i = 0; i < num_files; ++i) { + if (last_file_read != NULL && stats->seek_file == NULL) { + // We have had more than one seek for this read. Charge the 1st file. + stats->seek_file = last_file_read; + stats->seek_file_level = last_file_read_level; + } + + FileMetaData* f = files[i]; + last_file_read = f; + last_file_read_level = level; + + Saver saver; + saver.state = kNotFound; + saver.ucmp = ucmp; + saver.user_key = user_key; + saver.value = value; + s = vset_->table_cache_->Get(options, f->number, f->file_size, + ikey, &saver, SaveValue); + if (!s.ok()) { + return s; + } + switch (saver.state) { + case kNotFound: + break; // Keep searching in other files + case kFound: + return s; + case kDeleted: + s = Status::NotFound(Slice()); // Use empty error message for speed + return s; + case kCorrupt: + s = Status::Corruption("corrupted key for ", user_key); + return s; + } + } + } + + return Status::NotFound(Slice()); // Use an empty error message for speed +} + +bool Version::UpdateStats(const GetStats& stats) { + FileMetaData* f = stats.seek_file; + if (f != NULL) { + f->allowed_seeks--; + if (f->allowed_seeks <= 0 && file_to_compact_ == NULL) { + file_to_compact_ = f; + file_to_compact_level_ = stats.seek_file_level; + return true; + } + } + return false; +} + +bool Version::RecordReadSample(Slice internal_key) { + ParsedInternalKey ikey; + if (!ParseInternalKey(internal_key, &ikey)) { + return false; + } + + struct State { + GetStats stats; // Holds first matching file + int matches; + + static bool Match(void* arg, int level, FileMetaData* f) { + State* state = reinterpret_cast(arg); + state->matches++; + if (state->matches == 1) { + // Remember first match. + state->stats.seek_file = f; + state->stats.seek_file_level = level; + } + // We can stop iterating once we have a second match. + return state->matches < 2; + } + }; + + State state; + state.matches = 0; + ForEachOverlapping(ikey.user_key, internal_key, &state, &State::Match); + + // Must have at least two matches since we want to merge across + // files. But what if we have a single file that contains many + // overwrites and deletions? Should we have another mechanism for + // finding such files? + if (state.matches >= 2) { + // 1MB cost is about 1 seek (see comment in Builder::Apply). + return UpdateStats(state.stats); + } + return false; +} + +void Version::Ref() { + ++refs_; +} + +void Version::Unref() { + assert(this != &vset_->dummy_versions_); + assert(refs_ >= 1); + --refs_; + if (refs_ == 0) { + delete this; + } +} + +bool Version::OverlapInLevel(int level, + const Slice* smallest_user_key, + const Slice* largest_user_key) { + return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level], + smallest_user_key, largest_user_key); +} + +int Version::PickLevelForMemTableOutput( + const Slice& smallest_user_key, + const Slice& largest_user_key) { + int level = 0; + if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) { + // Push to next level if there is no overlap in next level, + // and the #bytes overlapping in the level after that are limited. + InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey limit(largest_user_key, 0, static_cast(0)); + std::vector overlaps; + while (level < config::kMaxMemCompactLevel) { + if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) { + break; + } + if (level + 2 < config::kNumLevels) { + // Check that file does not overlap too many grandparent bytes. + GetOverlappingInputs(level + 2, &start, &limit, &overlaps); + const int64_t sum = TotalFileSize(overlaps); + if (sum > MaxGrandParentOverlapBytes(vset_->options_)) { + break; + } + } + level++; + } + } + return level; +} + +// Store in "*inputs" all files in "level" that overlap [begin,end] +void Version::GetOverlappingInputs( + int level, + const InternalKey* begin, + const InternalKey* end, + std::vector* inputs) { + assert(level >= 0); + assert(level < config::kNumLevels); + inputs->clear(); + Slice user_begin, user_end; + if (begin != NULL) { + user_begin = begin->user_key(); + } + if (end != NULL) { + user_end = end->user_key(); + } + const Comparator* user_cmp = vset_->icmp_.user_comparator(); + for (size_t i = 0; i < files_[level].size(); ) { + FileMetaData* f = files_[level][i++]; + const Slice file_start = f->smallest.user_key(); + const Slice file_limit = f->largest.user_key(); + if (begin != NULL && user_cmp->Compare(file_limit, user_begin) < 0) { + // "f" is completely before specified range; skip it + } else if (end != NULL && user_cmp->Compare(file_start, user_end) > 0) { + // "f" is completely after specified range; skip it + } else { + inputs->push_back(f); + if (level == 0) { + // Level-0 files may overlap each other. So check if the newly + // added file has expanded the range. If so, restart search. + if (begin != NULL && user_cmp->Compare(file_start, user_begin) < 0) { + user_begin = file_start; + inputs->clear(); + i = 0; + } else if (end != NULL && user_cmp->Compare(file_limit, user_end) > 0) { + user_end = file_limit; + inputs->clear(); + i = 0; + } + } + } + } +} + +std::string Version::DebugString() const { + std::string r; + for (int level = 0; level < config::kNumLevels; level++) { + // E.g., + // --- level 1 --- + // 17:123['a' .. 'd'] + // 20:43['e' .. 'g'] + r.append("--- level "); + AppendNumberTo(&r, level); + r.append(" ---\n"); + const std::vector& files = files_[level]; + for (size_t i = 0; i < files.size(); i++) { + r.push_back(' '); + AppendNumberTo(&r, files[i]->number); + r.push_back(':'); + AppendNumberTo(&r, files[i]->file_size); + r.append("["); + r.append(files[i]->smallest.DebugString()); + r.append(" .. "); + r.append(files[i]->largest.DebugString()); + r.append("]\n"); + } + } + return r; +} + +// A helper class so we can efficiently apply a whole sequence +// of edits to a particular state without creating intermediate +// Versions that contain full copies of the intermediate state. +class VersionSet::Builder { + private: + // Helper to sort by v->files_[file_number].smallest + struct BySmallestKey { + const InternalKeyComparator* internal_comparator; + + bool operator()(FileMetaData* f1, FileMetaData* f2) const { + int r = internal_comparator->Compare(f1->smallest, f2->smallest); + if (r != 0) { + return (r < 0); + } else { + // Break ties by file number + return (f1->number < f2->number); + } + } + }; + + typedef std::set FileSet; + struct LevelState { + std::set deleted_files; + FileSet* added_files; + }; + + VersionSet* vset_; + Version* base_; + LevelState levels_[config::kNumLevels]; + + public: + // Initialize a builder with the files from *base and other info from *vset + Builder(VersionSet* vset, Version* base) + : vset_(vset), + base_(base) { + base_->Ref(); + BySmallestKey cmp; + cmp.internal_comparator = &vset_->icmp_; + for (int level = 0; level < config::kNumLevels; level++) { + levels_[level].added_files = new FileSet(cmp); + } + } + + ~Builder() { + for (int level = 0; level < config::kNumLevels; level++) { + const FileSet* added = levels_[level].added_files; + std::vector to_unref; + to_unref.reserve(added->size()); + for (FileSet::const_iterator it = added->begin(); + it != added->end(); ++it) { + to_unref.push_back(*it); + } + delete added; + for (uint32_t i = 0; i < to_unref.size(); i++) { + FileMetaData* f = to_unref[i]; + f->refs--; + if (f->refs <= 0) { + delete f; + } + } + } + base_->Unref(); + } + + // Apply all of the edits in *edit to the current state. + void Apply(VersionEdit* edit) { + // Update compaction pointers + for (size_t i = 0; i < edit->compact_pointers_.size(); i++) { + const int level = edit->compact_pointers_[i].first; + vset_->compact_pointer_[level] = + edit->compact_pointers_[i].second.Encode().ToString(); + } + + // Delete files + const VersionEdit::DeletedFileSet& del = edit->deleted_files_; + for (VersionEdit::DeletedFileSet::const_iterator iter = del.begin(); + iter != del.end(); + ++iter) { + const int level = iter->first; + const uint64_t number = iter->second; + levels_[level].deleted_files.insert(number); + } + + // Add new files + for (size_t i = 0; i < edit->new_files_.size(); i++) { + const int level = edit->new_files_[i].first; + FileMetaData* f = new FileMetaData(edit->new_files_[i].second); + f->refs = 1; + + // We arrange to automatically compact this file after + // a certain number of seeks. Let's assume: + // (1) One seek costs 10ms + // (2) Writing or reading 1MB costs 10ms (100MB/s) + // (3) A compaction of 1MB does 25MB of IO: + // 1MB read from this level + // 10-12MB read from next level (boundaries may be misaligned) + // 10-12MB written to next level + // This implies that 25 seeks cost the same as the compaction + // of 1MB of data. I.e., one seek costs approximately the + // same as the compaction of 40KB of data. We are a little + // conservative and allow approximately one seek for every 16KB + // of data before triggering a compaction. + f->allowed_seeks = (f->file_size / 16384); + if (f->allowed_seeks < 100) f->allowed_seeks = 100; + + levels_[level].deleted_files.erase(f->number); + levels_[level].added_files->insert(f); + } + } + + // Save the current state in *v. + void SaveTo(Version* v) { + BySmallestKey cmp; + cmp.internal_comparator = &vset_->icmp_; + for (int level = 0; level < config::kNumLevels; level++) { + // Merge the set of added files with the set of pre-existing files. + // Drop any deleted files. Store the result in *v. + const std::vector& base_files = base_->files_[level]; + std::vector::const_iterator base_iter = base_files.begin(); + std::vector::const_iterator base_end = base_files.end(); + const FileSet* added = levels_[level].added_files; + v->files_[level].reserve(base_files.size() + added->size()); + for (FileSet::const_iterator added_iter = added->begin(); + added_iter != added->end(); + ++added_iter) { + // Add all smaller files listed in base_ + for (std::vector::const_iterator bpos + = std::upper_bound(base_iter, base_end, *added_iter, cmp); + base_iter != bpos; + ++base_iter) { + MaybeAddFile(v, level, *base_iter); + } + + MaybeAddFile(v, level, *added_iter); + } + + // Add remaining base files + for (; base_iter != base_end; ++base_iter) { + MaybeAddFile(v, level, *base_iter); + } + +#ifndef NDEBUG + // Make sure there is no overlap in levels > 0 + if (level > 0) { + for (uint32_t i = 1; i < v->files_[level].size(); i++) { + const InternalKey& prev_end = v->files_[level][i-1]->largest; + const InternalKey& this_begin = v->files_[level][i]->smallest; + if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) { + fprintf(stderr, "overlapping ranges in same level %s vs. %s\n", + prev_end.DebugString().c_str(), + this_begin.DebugString().c_str()); + abort(); + } + } + } +#endif + } + } + + void MaybeAddFile(Version* v, int level, FileMetaData* f) { + if (levels_[level].deleted_files.count(f->number) > 0) { + // File is deleted: do nothing + } else { + std::vector* files = &v->files_[level]; + if (level > 0 && !files->empty()) { + // Must not overlap + assert(vset_->icmp_.Compare((*files)[files->size()-1]->largest, + f->smallest) < 0); + } + f->refs++; + files->push_back(f); + } + } +}; + +VersionSet::VersionSet(const std::string& dbname, + const Options* options, + TableCache* table_cache, + const InternalKeyComparator* cmp) + : env_(options->env), + dbname_(dbname), + options_(options), + table_cache_(table_cache), + icmp_(*cmp), + next_file_number_(2), + manifest_file_number_(0), // Filled by Recover() + last_sequence_(0), + log_number_(0), + prev_log_number_(0), + descriptor_file_(NULL), + descriptor_log_(NULL), + dummy_versions_(this), + current_(NULL) { + AppendVersion(new Version(this)); +} + +VersionSet::~VersionSet() { + current_->Unref(); + assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty + delete descriptor_log_; + delete descriptor_file_; +} + +void VersionSet::AppendVersion(Version* v) { + // Make "v" current + assert(v->refs_ == 0); + assert(v != current_); + if (current_ != NULL) { + current_->Unref(); + } + current_ = v; + v->Ref(); + + // Append to linked list + v->prev_ = dummy_versions_.prev_; + v->next_ = &dummy_versions_; + v->prev_->next_ = v; + v->next_->prev_ = v; +} + +Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { + if (edit->has_log_number_) { + assert(edit->log_number_ >= log_number_); + assert(edit->log_number_ < next_file_number_); + } else { + edit->SetLogNumber(log_number_); + } + + if (!edit->has_prev_log_number_) { + edit->SetPrevLogNumber(prev_log_number_); + } + + edit->SetNextFile(next_file_number_); + edit->SetLastSequence(last_sequence_); + + Version* v = new Version(this); + { + Builder builder(this, current_); + builder.Apply(edit); + builder.SaveTo(v); + } + Finalize(v); + + // Initialize new descriptor log file if necessary by creating + // a temporary file that contains a snapshot of the current version. + std::string new_manifest_file; + Status s; + if (descriptor_log_ == NULL) { + // No reason to unlock *mu here since we only hit this path in the + // first call to LogAndApply (when opening the database). + assert(descriptor_file_ == NULL); + new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_); + edit->SetNextFile(next_file_number_); + s = env_->NewWritableFile(new_manifest_file, &descriptor_file_); + if (s.ok()) { + descriptor_log_ = new log::Writer(descriptor_file_); + s = WriteSnapshot(descriptor_log_); + } + } + + // Unlock during expensive MANIFEST log write + { + mu->Unlock(); + + // Write new record to MANIFEST log + if (s.ok()) { + std::string record; + edit->EncodeTo(&record); + s = descriptor_log_->AddRecord(record); + if (s.ok()) { + s = descriptor_file_->Sync(); + } + if (!s.ok()) { + Log(options_->info_log, "MANIFEST write: %s\n", s.ToString().c_str()); + } + } + + // If we just created a new descriptor file, install it by writing a + // new CURRENT file that points to it. + if (s.ok() && !new_manifest_file.empty()) { + s = SetCurrentFile(env_, dbname_, manifest_file_number_); + } + + mu->Lock(); + } + + // Install the new version + if (s.ok()) { + AppendVersion(v); + log_number_ = edit->log_number_; + prev_log_number_ = edit->prev_log_number_; + } else { + delete v; + if (!new_manifest_file.empty()) { + delete descriptor_log_; + delete descriptor_file_; + descriptor_log_ = NULL; + descriptor_file_ = NULL; + env_->DeleteFile(new_manifest_file); + } + } + + return s; +} + +Status VersionSet::Recover(bool *save_manifest) { + struct LogReporter : public log::Reader::Reporter { + Status* status; + virtual void Corruption(size_t bytes, const Status& s) { + if (this->status->ok()) *this->status = s; + } + }; + + // Read "CURRENT" file, which contains a pointer to the current manifest file + std::string current; + Status s = ReadFileToString(env_, CurrentFileName(dbname_), ¤t); + if (!s.ok()) { + return s; + } + if (current.empty() || current[current.size()-1] != '\n') { + return Status::Corruption("CURRENT file does not end with newline"); + } + current.resize(current.size() - 1); + + std::string dscname = dbname_ + "/" + current; + SequentialFile* file; + s = env_->NewSequentialFile(dscname, &file); + if (!s.ok()) { + return s; + } + + bool have_log_number = false; + bool have_prev_log_number = false; + bool have_next_file = false; + bool have_last_sequence = false; + uint64_t next_file = 0; + uint64_t last_sequence = 0; + uint64_t log_number = 0; + uint64_t prev_log_number = 0; + Builder builder(this, current_); + + { + LogReporter reporter; + reporter.status = &s; + log::Reader reader(file, &reporter, true/*checksum*/, 0/*initial_offset*/); + Slice record; + std::string scratch; + while (reader.ReadRecord(&record, &scratch) && s.ok()) { + VersionEdit edit; + s = edit.DecodeFrom(record); + if (s.ok()) { + if (edit.has_comparator_ && + edit.comparator_ != icmp_.user_comparator()->Name()) { + s = Status::InvalidArgument( + edit.comparator_ + " does not match existing comparator ", + icmp_.user_comparator()->Name()); + } + } + + if (s.ok()) { + builder.Apply(&edit); + } + + if (edit.has_log_number_) { + log_number = edit.log_number_; + have_log_number = true; + } + + if (edit.has_prev_log_number_) { + prev_log_number = edit.prev_log_number_; + have_prev_log_number = true; + } + + if (edit.has_next_file_number_) { + next_file = edit.next_file_number_; + have_next_file = true; + } + + if (edit.has_last_sequence_) { + last_sequence = edit.last_sequence_; + have_last_sequence = true; + } + } + } + delete file; + file = NULL; + + if (s.ok()) { + if (!have_next_file) { + s = Status::Corruption("no meta-nextfile entry in descriptor"); + } else if (!have_log_number) { + s = Status::Corruption("no meta-lognumber entry in descriptor"); + } else if (!have_last_sequence) { + s = Status::Corruption("no last-sequence-number entry in descriptor"); + } + + if (!have_prev_log_number) { + prev_log_number = 0; + } + + MarkFileNumberUsed(prev_log_number); + MarkFileNumberUsed(log_number); + } + + if (s.ok()) { + Version* v = new Version(this); + builder.SaveTo(v); + // Install recovered version + Finalize(v); + AppendVersion(v); + manifest_file_number_ = next_file; + next_file_number_ = next_file + 1; + last_sequence_ = last_sequence; + log_number_ = log_number; + prev_log_number_ = prev_log_number; + + // See if we can reuse the existing MANIFEST file. + if (ReuseManifest(dscname, current)) { + // No need to save new manifest + } else { + *save_manifest = true; + } + } + + return s; +} + +bool VersionSet::ReuseManifest(const std::string& dscname, + const std::string& dscbase) { + if (!options_->reuse_logs) { + return false; + } + FileType manifest_type; + uint64_t manifest_number; + uint64_t manifest_size; + if (!ParseFileName(dscbase, &manifest_number, &manifest_type) || + manifest_type != kDescriptorFile || + !env_->GetFileSize(dscname, &manifest_size).ok() || + // Make new compacted MANIFEST if old one is too big + manifest_size >= TargetFileSize(options_)) { + return false; + } + + assert(descriptor_file_ == NULL); + assert(descriptor_log_ == NULL); + Status r = env_->NewAppendableFile(dscname, &descriptor_file_); + if (!r.ok()) { + Log(options_->info_log, "Reuse MANIFEST: %s\n", r.ToString().c_str()); + assert(descriptor_file_ == NULL); + return false; + } + + Log(options_->info_log, "Reusing MANIFEST %s\n", dscname.c_str()); + descriptor_log_ = new log::Writer(descriptor_file_, manifest_size); + manifest_file_number_ = manifest_number; + return true; +} + +void VersionSet::MarkFileNumberUsed(uint64_t number) { + if (next_file_number_ <= number) { + next_file_number_ = number + 1; + } +} + +void VersionSet::Finalize(Version* v) { + // Precomputed best level for next compaction + int best_level = -1; + double best_score = -1; + + for (int level = 0; level < config::kNumLevels-1; level++) { + double score; + if (level == 0) { + // We treat level-0 specially by bounding the number of files + // instead of number of bytes for two reasons: + // + // (1) With larger write-buffer sizes, it is nice not to do too + // many level-0 compactions. + // + // (2) The files in level-0 are merged on every read and + // therefore we wish to avoid too many files when the individual + // file size is small (perhaps because of a small write-buffer + // setting, or very high compression ratios, or lots of + // overwrites/deletions). + score = v->files_[level].size() / + static_cast(config::kL0_CompactionTrigger); + } else { + // Compute the ratio of current size to size limit. + const uint64_t level_bytes = TotalFileSize(v->files_[level]); + score = + static_cast(level_bytes) / MaxBytesForLevel(options_, level); + } + + if (score > best_score) { + best_level = level; + best_score = score; + } + } + + v->compaction_level_ = best_level; + v->compaction_score_ = best_score; +} + +Status VersionSet::WriteSnapshot(log::Writer* log) { + // TODO: Break up into multiple records to reduce memory usage on recovery? + + // Save metadata + VersionEdit edit; + edit.SetComparatorName(icmp_.user_comparator()->Name()); + + // Save compaction pointers + for (int level = 0; level < config::kNumLevels; level++) { + if (!compact_pointer_[level].empty()) { + InternalKey key; + key.DecodeFrom(compact_pointer_[level]); + edit.SetCompactPointer(level, key); + } + } + + // Save files + for (int level = 0; level < config::kNumLevels; level++) { + const std::vector& files = current_->files_[level]; + for (size_t i = 0; i < files.size(); i++) { + const FileMetaData* f = files[i]; + edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest); + } + } + + std::string record; + edit.EncodeTo(&record); + return log->AddRecord(record); +} + +int VersionSet::NumLevelFiles(int level) const { + assert(level >= 0); + assert(level < config::kNumLevels); + return current_->files_[level].size(); +} + +const char* VersionSet::LevelSummary(LevelSummaryStorage* scratch) const { + // Update code if kNumLevels changes + assert(config::kNumLevels == 7); + snprintf(scratch->buffer, sizeof(scratch->buffer), + "files[ %d %d %d %d %d %d %d ]", + int(current_->files_[0].size()), + int(current_->files_[1].size()), + int(current_->files_[2].size()), + int(current_->files_[3].size()), + int(current_->files_[4].size()), + int(current_->files_[5].size()), + int(current_->files_[6].size())); + return scratch->buffer; +} + +uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { + uint64_t result = 0; + for (int level = 0; level < config::kNumLevels; level++) { + const std::vector& files = v->files_[level]; + for (size_t i = 0; i < files.size(); i++) { + if (icmp_.Compare(files[i]->largest, ikey) <= 0) { + // Entire file is before "ikey", so just add the file size + result += files[i]->file_size; + } else if (icmp_.Compare(files[i]->smallest, ikey) > 0) { + // Entire file is after "ikey", so ignore + if (level > 0) { + // Files other than level 0 are sorted by meta->smallest, so + // no further files in this level will contain data for + // "ikey". + break; + } + } else { + // "ikey" falls in the range for this table. Add the + // approximate offset of "ikey" within the table. + Table* tableptr; + Iterator* iter = table_cache_->NewIterator( + ReadOptions(), files[i]->number, files[i]->file_size, &tableptr); + if (tableptr != NULL) { + result += tableptr->ApproximateOffsetOf(ikey.Encode()); + } + delete iter; + } + } + } + return result; +} + +void VersionSet::AddLiveFiles(std::set* live) { + for (Version* v = dummy_versions_.next_; + v != &dummy_versions_; + v = v->next_) { + for (int level = 0; level < config::kNumLevels; level++) { + const std::vector& files = v->files_[level]; + for (size_t i = 0; i < files.size(); i++) { + live->insert(files[i]->number); + } + } + } +} + +int64_t VersionSet::NumLevelBytes(int level) const { + assert(level >= 0); + assert(level < config::kNumLevels); + return TotalFileSize(current_->files_[level]); +} + +int64_t VersionSet::MaxNextLevelOverlappingBytes() { + int64_t result = 0; + std::vector overlaps; + for (int level = 1; level < config::kNumLevels - 1; level++) { + for (size_t i = 0; i < current_->files_[level].size(); i++) { + const FileMetaData* f = current_->files_[level][i]; + current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest, + &overlaps); + const int64_t sum = TotalFileSize(overlaps); + if (sum > result) { + result = sum; + } + } + } + return result; +} + +// Stores the minimal range that covers all entries in inputs in +// *smallest, *largest. +// REQUIRES: inputs is not empty +void VersionSet::GetRange(const std::vector& inputs, + InternalKey* smallest, + InternalKey* largest) { + assert(!inputs.empty()); + smallest->Clear(); + largest->Clear(); + for (size_t i = 0; i < inputs.size(); i++) { + FileMetaData* f = inputs[i]; + if (i == 0) { + *smallest = f->smallest; + *largest = f->largest; + } else { + if (icmp_.Compare(f->smallest, *smallest) < 0) { + *smallest = f->smallest; + } + if (icmp_.Compare(f->largest, *largest) > 0) { + *largest = f->largest; + } + } + } +} + +// Stores the minimal range that covers all entries in inputs1 and inputs2 +// in *smallest, *largest. +// REQUIRES: inputs is not empty +void VersionSet::GetRange2(const std::vector& inputs1, + const std::vector& inputs2, + InternalKey* smallest, + InternalKey* largest) { + std::vector all = inputs1; + all.insert(all.end(), inputs2.begin(), inputs2.end()); + GetRange(all, smallest, largest); +} + +Iterator* VersionSet::MakeInputIterator(Compaction* c) { + ReadOptions options; + options.verify_checksums = options_->paranoid_checks; + options.fill_cache = false; + + // Level-0 files have to be merged together. For other levels, + // we will make a concatenating iterator per level. + // TODO(opt): use concatenating iterator for level-0 if there is no overlap + const int space = (c->level() == 0 ? c->inputs_[0].size() + 1 : 2); + Iterator** list = new Iterator*[space]; + int num = 0; + for (int which = 0; which < 2; which++) { + if (!c->inputs_[which].empty()) { + if (c->level() + which == 0) { + const std::vector& files = c->inputs_[which]; + for (size_t i = 0; i < files.size(); i++) { + list[num++] = table_cache_->NewIterator( + options, files[i]->number, files[i]->file_size); + } + } else { + // Create concatenating iterator for the files from this level + list[num++] = NewTwoLevelIterator( + new Version::LevelFileNumIterator(icmp_, &c->inputs_[which]), + &GetFileIterator, table_cache_, options); + } + } + } + assert(num <= space); + Iterator* result = NewMergingIterator(&icmp_, list, num); + delete[] list; + return result; +} + +Compaction* VersionSet::PickCompaction() { + Compaction* c; + int level; + + // We prefer compactions triggered by too much data in a level over + // the compactions triggered by seeks. + const bool size_compaction = (current_->compaction_score_ >= 1); + const bool seek_compaction = (current_->file_to_compact_ != NULL); + if (size_compaction) { + level = current_->compaction_level_; + assert(level >= 0); + assert(level+1 < config::kNumLevels); + c = new Compaction(options_, level); + + // Pick the first file that comes after compact_pointer_[level] + for (size_t i = 0; i < current_->files_[level].size(); i++) { + FileMetaData* f = current_->files_[level][i]; + if (compact_pointer_[level].empty() || + icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) { + c->inputs_[0].push_back(f); + break; + } + } + if (c->inputs_[0].empty()) { + // Wrap-around to the beginning of the key space + c->inputs_[0].push_back(current_->files_[level][0]); + } + } else if (seek_compaction) { + level = current_->file_to_compact_level_; + c = new Compaction(options_, level); + c->inputs_[0].push_back(current_->file_to_compact_); + } else { + return NULL; + } + + c->input_version_ = current_; + c->input_version_->Ref(); + + // Files in level 0 may overlap each other, so pick up all overlapping ones + if (level == 0) { + InternalKey smallest, largest; + GetRange(c->inputs_[0], &smallest, &largest); + // Note that the next call will discard the file we placed in + // c->inputs_[0] earlier and replace it with an overlapping set + // which will include the picked file. + current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]); + assert(!c->inputs_[0].empty()); + } + + SetupOtherInputs(c); + + return c; +} + +void VersionSet::SetupOtherInputs(Compaction* c) { + const int level = c->level(); + InternalKey smallest, largest; + GetRange(c->inputs_[0], &smallest, &largest); + + current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]); + + // Get entire range covered by compaction + InternalKey all_start, all_limit; + GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit); + + // See if we can grow the number of inputs in "level" without + // changing the number of "level+1" files we pick up. + if (!c->inputs_[1].empty()) { + std::vector expanded0; + current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0); + const int64_t inputs0_size = TotalFileSize(c->inputs_[0]); + const int64_t inputs1_size = TotalFileSize(c->inputs_[1]); + const int64_t expanded0_size = TotalFileSize(expanded0); + if (expanded0.size() > c->inputs_[0].size() && + inputs1_size + expanded0_size < + ExpandedCompactionByteSizeLimit(options_)) { + InternalKey new_start, new_limit; + GetRange(expanded0, &new_start, &new_limit); + std::vector expanded1; + current_->GetOverlappingInputs(level+1, &new_start, &new_limit, + &expanded1); + if (expanded1.size() == c->inputs_[1].size()) { + Log(options_->info_log, + "Expanding@%d %d+%d (%ld+%ld bytes) to %d+%d (%ld+%ld bytes)\n", + level, + int(c->inputs_[0].size()), + int(c->inputs_[1].size()), + long(inputs0_size), long(inputs1_size), + int(expanded0.size()), + int(expanded1.size()), + long(expanded0_size), long(inputs1_size)); + smallest = new_start; + largest = new_limit; + c->inputs_[0] = expanded0; + c->inputs_[1] = expanded1; + GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit); + } + } + } + + // Compute the set of grandparent files that overlap this compaction + // (parent == level+1; grandparent == level+2) + if (level + 2 < config::kNumLevels) { + current_->GetOverlappingInputs(level + 2, &all_start, &all_limit, + &c->grandparents_); + } + + if (false) { + Log(options_->info_log, "Compacting %d '%s' .. '%s'", + level, + smallest.DebugString().c_str(), + largest.DebugString().c_str()); + } + + // Update the place where we will do the next compaction for this level. + // We update this immediately instead of waiting for the VersionEdit + // to be applied so that if the compaction fails, we will try a different + // key range next time. + compact_pointer_[level] = largest.Encode().ToString(); + c->edit_.SetCompactPointer(level, largest); +} + +Compaction* VersionSet::CompactRange( + int level, + const InternalKey* begin, + const InternalKey* end) { + std::vector inputs; + current_->GetOverlappingInputs(level, begin, end, &inputs); + if (inputs.empty()) { + return NULL; + } + + // Avoid compacting too much in one shot in case the range is large. + // But we cannot do this for level-0 since level-0 files can overlap + // and we must not pick one file and drop another older file if the + // two files overlap. + if (level > 0) { + const uint64_t limit = MaxFileSizeForLevel(options_, level); + uint64_t total = 0; + for (size_t i = 0; i < inputs.size(); i++) { + uint64_t s = inputs[i]->file_size; + total += s; + if (total >= limit) { + inputs.resize(i + 1); + break; + } + } + } + + Compaction* c = new Compaction(options_, level); + c->input_version_ = current_; + c->input_version_->Ref(); + c->inputs_[0] = inputs; + SetupOtherInputs(c); + return c; +} + +Compaction::Compaction(const Options* options, int level) + : level_(level), + max_output_file_size_(MaxFileSizeForLevel(options, level)), + input_version_(NULL), + grandparent_index_(0), + seen_key_(false), + overlapped_bytes_(0) { + for (int i = 0; i < config::kNumLevels; i++) { + level_ptrs_[i] = 0; + } +} + +Compaction::~Compaction() { + if (input_version_ != NULL) { + input_version_->Unref(); + } +} + +bool Compaction::IsTrivialMove() const { + const VersionSet* vset = input_version_->vset_; + // Avoid a move if there is lots of overlapping grandparent data. + // Otherwise, the move could create a parent file that will require + // a very expensive merge later on. + return (num_input_files(0) == 1 && num_input_files(1) == 0 && + TotalFileSize(grandparents_) <= + MaxGrandParentOverlapBytes(vset->options_)); +} + +void Compaction::AddInputDeletions(VersionEdit* edit) { + for (int which = 0; which < 2; which++) { + for (size_t i = 0; i < inputs_[which].size(); i++) { + edit->DeleteFile(level_ + which, inputs_[which][i]->number); + } + } +} + +bool Compaction::IsBaseLevelForKey(const Slice& user_key) { + // Maybe use binary search to find right entry instead of linear search? + const Comparator* user_cmp = input_version_->vset_->icmp_.user_comparator(); + for (int lvl = level_ + 2; lvl < config::kNumLevels; lvl++) { + const std::vector& files = input_version_->files_[lvl]; + for (; level_ptrs_[lvl] < files.size(); ) { + FileMetaData* f = files[level_ptrs_[lvl]]; + if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) { + // We've advanced far enough + if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) { + // Key falls in this file's range, so definitely not base level + return false; + } + break; + } + level_ptrs_[lvl]++; + } + } + return true; +} + +bool Compaction::ShouldStopBefore(const Slice& internal_key) { + const VersionSet* vset = input_version_->vset_; + // Scan to find earliest grandparent file that contains key. + const InternalKeyComparator* icmp = &vset->icmp_; + while (grandparent_index_ < grandparents_.size() && + icmp->Compare(internal_key, + grandparents_[grandparent_index_]->largest.Encode()) > 0) { + if (seen_key_) { + overlapped_bytes_ += grandparents_[grandparent_index_]->file_size; + } + grandparent_index_++; + } + seen_key_ = true; + + if (overlapped_bytes_ > MaxGrandParentOverlapBytes(vset->options_)) { + // Too much overlap for current output; start new output + overlapped_bytes_ = 0; + return true; + } else { + return false; + } +} + +void Compaction::ReleaseInputs() { + if (input_version_ != NULL) { + input_version_->Unref(); + input_version_ = NULL; + } +} + +} // namespace leveldb diff --git a/leveldb-library/db/version_set.h b/leveldb-library/db/version_set.h new file mode 100644 index 0000000..c4e7ac3 --- /dev/null +++ b/leveldb-library/db/version_set.h @@ -0,0 +1,398 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// The representation of a DBImpl consists of a set of Versions. The +// newest version is called "current". Older versions may be kept +// around to provide a consistent view to live iterators. +// +// Each Version keeps track of a set of Table files per level. The +// entire set of versions is maintained in a VersionSet. +// +// Version,VersionSet are thread-compatible, but require external +// synchronization on all accesses. + +#ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ +#define STORAGE_LEVELDB_DB_VERSION_SET_H_ + +#include +#include +#include +#include "db/dbformat.h" +#include "db/version_edit.h" +#include "port/port.h" +#include "port/thread_annotations.h" + +namespace leveldb { + +namespace log { class Writer; } + +class Compaction; +class Iterator; +class MemTable; +class TableBuilder; +class TableCache; +class Version; +class VersionSet; +class WritableFile; + +// Return the smallest index i such that files[i]->largest >= key. +// Return files.size() if there is no such file. +// REQUIRES: "files" contains a sorted list of non-overlapping files. +extern int FindFile(const InternalKeyComparator& icmp, + const std::vector& files, + const Slice& key); + +// Returns true iff some file in "files" overlaps the user key range +// [*smallest,*largest]. +// smallest==NULL represents a key smaller than all keys in the DB. +// largest==NULL represents a key largest than all keys in the DB. +// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges +// in sorted order. +extern bool SomeFileOverlapsRange( + const InternalKeyComparator& icmp, + bool disjoint_sorted_files, + const std::vector& files, + const Slice* smallest_user_key, + const Slice* largest_user_key); + +class Version { + public: + // Append to *iters a sequence of iterators that will + // yield the contents of this Version when merged together. + // REQUIRES: This version has been saved (see VersionSet::SaveTo) + void AddIterators(const ReadOptions&, std::vector* iters); + + // Lookup the value for key. If found, store it in *val and + // return OK. Else return a non-OK status. Fills *stats. + // REQUIRES: lock is not held + struct GetStats { + FileMetaData* seek_file; + int seek_file_level; + }; + Status Get(const ReadOptions&, const LookupKey& key, std::string* val, + GetStats* stats); + + // Adds "stats" into the current state. Returns true if a new + // compaction may need to be triggered, false otherwise. + // REQUIRES: lock is held + bool UpdateStats(const GetStats& stats); + + // Record a sample of bytes read at the specified internal key. + // Samples are taken approximately once every config::kReadBytesPeriod + // bytes. Returns true if a new compaction may need to be triggered. + // REQUIRES: lock is held + bool RecordReadSample(Slice key); + + // Reference count management (so Versions do not disappear out from + // under live iterators) + void Ref(); + void Unref(); + + void GetOverlappingInputs( + int level, + const InternalKey* begin, // NULL means before all keys + const InternalKey* end, // NULL means after all keys + std::vector* inputs); + + // Returns true iff some file in the specified level overlaps + // some part of [*smallest_user_key,*largest_user_key]. + // smallest_user_key==NULL represents a key smaller than all keys in the DB. + // largest_user_key==NULL represents a key largest than all keys in the DB. + bool OverlapInLevel(int level, + const Slice* smallest_user_key, + const Slice* largest_user_key); + + // Return the level at which we should place a new memtable compaction + // result that covers the range [smallest_user_key,largest_user_key]. + int PickLevelForMemTableOutput(const Slice& smallest_user_key, + const Slice& largest_user_key); + + int NumFiles(int level) const { return files_[level].size(); } + + // Return a human readable string that describes this version's contents. + std::string DebugString() const; + + private: + friend class Compaction; + friend class VersionSet; + + class LevelFileNumIterator; + Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; + + // Call func(arg, level, f) for every file that overlaps user_key in + // order from newest to oldest. If an invocation of func returns + // false, makes no more calls. + // + // REQUIRES: user portion of internal_key == user_key. + void ForEachOverlapping(Slice user_key, Slice internal_key, + void* arg, + bool (*func)(void*, int, FileMetaData*)); + + VersionSet* vset_; // VersionSet to which this Version belongs + Version* next_; // Next version in linked list + Version* prev_; // Previous version in linked list + int refs_; // Number of live refs to this version + + // List of files per level + std::vector files_[config::kNumLevels]; + + // Next file to compact based on seek stats. + FileMetaData* file_to_compact_; + int file_to_compact_level_; + + // Level that should be compacted next and its compaction score. + // Score < 1 means compaction is not strictly needed. These fields + // are initialized by Finalize(). + double compaction_score_; + int compaction_level_; + + explicit Version(VersionSet* vset) + : vset_(vset), next_(this), prev_(this), refs_(0), + file_to_compact_(NULL), + file_to_compact_level_(-1), + compaction_score_(-1), + compaction_level_(-1) { + } + + ~Version(); + + // No copying allowed + Version(const Version&); + void operator=(const Version&); +}; + +class VersionSet { + public: + VersionSet(const std::string& dbname, + const Options* options, + TableCache* table_cache, + const InternalKeyComparator*); + ~VersionSet(); + + // Apply *edit to the current version to form a new descriptor that + // is both saved to persistent state and installed as the new + // current version. Will release *mu while actually writing to the file. + // REQUIRES: *mu is held on entry. + // REQUIRES: no other thread concurrently calls LogAndApply() + Status LogAndApply(VersionEdit* edit, port::Mutex* mu) + EXCLUSIVE_LOCKS_REQUIRED(mu); + + // Recover the last saved descriptor from persistent storage. + Status Recover(bool *save_manifest); + + // Return the current version. + Version* current() const { return current_; } + + // Return the current manifest file number + uint64_t ManifestFileNumber() const { return manifest_file_number_; } + + // Allocate and return a new file number + uint64_t NewFileNumber() { return next_file_number_++; } + + // Arrange to reuse "file_number" unless a newer file number has + // already been allocated. + // REQUIRES: "file_number" was returned by a call to NewFileNumber(). + void ReuseFileNumber(uint64_t file_number) { + if (next_file_number_ == file_number + 1) { + next_file_number_ = file_number; + } + } + + // Return the number of Table files at the specified level. + int NumLevelFiles(int level) const; + + // Return the combined file size of all files at the specified level. + int64_t NumLevelBytes(int level) const; + + // Return the last sequence number. + uint64_t LastSequence() const { return last_sequence_; } + + // Set the last sequence number to s. + void SetLastSequence(uint64_t s) { + assert(s >= last_sequence_); + last_sequence_ = s; + } + + // Mark the specified file number as used. + void MarkFileNumberUsed(uint64_t number); + + // Return the current log file number. + uint64_t LogNumber() const { return log_number_; } + + // Return the log file number for the log file that is currently + // being compacted, or zero if there is no such log file. + uint64_t PrevLogNumber() const { return prev_log_number_; } + + // Pick level and inputs for a new compaction. + // Returns NULL if there is no compaction to be done. + // Otherwise returns a pointer to a heap-allocated object that + // describes the compaction. Caller should delete the result. + Compaction* PickCompaction(); + + // Return a compaction object for compacting the range [begin,end] in + // the specified level. Returns NULL if there is nothing in that + // level that overlaps the specified range. Caller should delete + // the result. + Compaction* CompactRange( + int level, + const InternalKey* begin, + const InternalKey* end); + + // Return the maximum overlapping data (in bytes) at next level for any + // file at a level >= 1. + int64_t MaxNextLevelOverlappingBytes(); + + // Create an iterator that reads over the compaction inputs for "*c". + // The caller should delete the iterator when no longer needed. + Iterator* MakeInputIterator(Compaction* c); + + // Returns true iff some level needs a compaction. + bool NeedsCompaction() const { + Version* v = current_; + return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL); + } + + // Add all files listed in any live version to *live. + // May also mutate some internal state. + void AddLiveFiles(std::set* live); + + // Return the approximate offset in the database of the data for + // "key" as of version "v". + uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); + + // Return a human-readable short (single-line) summary of the number + // of files per level. Uses *scratch as backing store. + struct LevelSummaryStorage { + char buffer[100]; + }; + const char* LevelSummary(LevelSummaryStorage* scratch) const; + + private: + class Builder; + + friend class Compaction; + friend class Version; + + bool ReuseManifest(const std::string& dscname, const std::string& dscbase); + + void Finalize(Version* v); + + void GetRange(const std::vector& inputs, + InternalKey* smallest, + InternalKey* largest); + + void GetRange2(const std::vector& inputs1, + const std::vector& inputs2, + InternalKey* smallest, + InternalKey* largest); + + void SetupOtherInputs(Compaction* c); + + // Save current contents to *log + Status WriteSnapshot(log::Writer* log); + + void AppendVersion(Version* v); + + Env* const env_; + const std::string dbname_; + const Options* const options_; + TableCache* const table_cache_; + const InternalKeyComparator icmp_; + uint64_t next_file_number_; + uint64_t manifest_file_number_; + uint64_t last_sequence_; + uint64_t log_number_; + uint64_t prev_log_number_; // 0 or backing store for memtable being compacted + + // Opened lazily + WritableFile* descriptor_file_; + log::Writer* descriptor_log_; + Version dummy_versions_; // Head of circular doubly-linked list of versions. + Version* current_; // == dummy_versions_.prev_ + + // Per-level key at which the next compaction at that level should start. + // Either an empty string, or a valid InternalKey. + std::string compact_pointer_[config::kNumLevels]; + + // No copying allowed + VersionSet(const VersionSet&); + void operator=(const VersionSet&); +}; + +// A Compaction encapsulates information about a compaction. +class Compaction { + public: + ~Compaction(); + + // Return the level that is being compacted. Inputs from "level" + // and "level+1" will be merged to produce a set of "level+1" files. + int level() const { return level_; } + + // Return the object that holds the edits to the descriptor done + // by this compaction. + VersionEdit* edit() { return &edit_; } + + // "which" must be either 0 or 1 + int num_input_files(int which) const { return inputs_[which].size(); } + + // Return the ith input file at "level()+which" ("which" must be 0 or 1). + FileMetaData* input(int which, int i) const { return inputs_[which][i]; } + + // Maximum size of files to build during this compaction. + uint64_t MaxOutputFileSize() const { return max_output_file_size_; } + + // Is this a trivial compaction that can be implemented by just + // moving a single input file to the next level (no merging or splitting) + bool IsTrivialMove() const; + + // Add all inputs to this compaction as delete operations to *edit. + void AddInputDeletions(VersionEdit* edit); + + // Returns true if the information we have available guarantees that + // the compaction is producing data in "level+1" for which no data exists + // in levels greater than "level+1". + bool IsBaseLevelForKey(const Slice& user_key); + + // Returns true iff we should stop building the current output + // before processing "internal_key". + bool ShouldStopBefore(const Slice& internal_key); + + // Release the input version for the compaction, once the compaction + // is successful. + void ReleaseInputs(); + + private: + friend class Version; + friend class VersionSet; + + Compaction(const Options* options, int level); + + int level_; + uint64_t max_output_file_size_; + Version* input_version_; + VersionEdit edit_; + + // Each compaction reads inputs from "level_" and "level_+1" + std::vector inputs_[2]; // The two sets of inputs + + // State used to check for number of of overlapping grandparent files + // (parent == level_ + 1, grandparent == level_ + 2) + std::vector grandparents_; + size_t grandparent_index_; // Index in grandparent_starts_ + bool seen_key_; // Some output key has been seen + int64_t overlapped_bytes_; // Bytes of overlap between current output + // and grandparent files + + // State for implementing IsBaseLevelForKey + + // level_ptrs_ holds indices into input_version_->levels_: our state + // is that we are positioned at one of the file ranges for each + // higher level than the ones involved in this compaction (i.e. for + // all L >= level_ + 2). + size_t level_ptrs_[config::kNumLevels]; +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_DB_VERSION_SET_H_ diff --git a/leveldb-library/db/write_batch.cc b/leveldb-library/db/write_batch.cc new file mode 100644 index 0000000..33f4a42 --- /dev/null +++ b/leveldb-library/db/write_batch.cc @@ -0,0 +1,147 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// WriteBatch::rep_ := +// sequence: fixed64 +// count: fixed32 +// data: record[count] +// record := +// kTypeValue varstring varstring | +// kTypeDeletion varstring +// varstring := +// len: varint32 +// data: uint8[len] + +#include "leveldb/write_batch.h" + +#include "leveldb/db.h" +#include "db/dbformat.h" +#include "db/memtable.h" +#include "db/write_batch_internal.h" +#include "util/coding.h" + +namespace leveldb { + +// WriteBatch header has an 8-byte sequence number followed by a 4-byte count. +static const size_t kHeader = 12; + +WriteBatch::WriteBatch() { + Clear(); +} + +WriteBatch::~WriteBatch() { } + +WriteBatch::Handler::~Handler() { } + +void WriteBatch::Clear() { + rep_.clear(); + rep_.resize(kHeader); +} + +Status WriteBatch::Iterate(Handler* handler) const { + Slice input(rep_); + if (input.size() < kHeader) { + return Status::Corruption("malformed WriteBatch (too small)"); + } + + input.remove_prefix(kHeader); + Slice key, value; + int found = 0; + while (!input.empty()) { + found++; + char tag = input[0]; + input.remove_prefix(1); + switch (tag) { + case kTypeValue: + if (GetLengthPrefixedSlice(&input, &key) && + GetLengthPrefixedSlice(&input, &value)) { + handler->Put(key, value); + } else { + return Status::Corruption("bad WriteBatch Put"); + } + break; + case kTypeDeletion: + if (GetLengthPrefixedSlice(&input, &key)) { + handler->Delete(key); + } else { + return Status::Corruption("bad WriteBatch Delete"); + } + break; + default: + return Status::Corruption("unknown WriteBatch tag"); + } + } + if (found != WriteBatchInternal::Count(this)) { + return Status::Corruption("WriteBatch has wrong count"); + } else { + return Status::OK(); + } +} + +int WriteBatchInternal::Count(const WriteBatch* b) { + return DecodeFixed32(b->rep_.data() + 8); +} + +void WriteBatchInternal::SetCount(WriteBatch* b, int n) { + EncodeFixed32(&b->rep_[8], n); +} + +SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) { + return SequenceNumber(DecodeFixed64(b->rep_.data())); +} + +void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) { + EncodeFixed64(&b->rep_[0], seq); +} + +void WriteBatch::Put(const Slice& key, const Slice& value) { + WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1); + rep_.push_back(static_cast(kTypeValue)); + PutLengthPrefixedSlice(&rep_, key); + PutLengthPrefixedSlice(&rep_, value); +} + +void WriteBatch::Delete(const Slice& key) { + WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1); + rep_.push_back(static_cast(kTypeDeletion)); + PutLengthPrefixedSlice(&rep_, key); +} + +namespace { +class MemTableInserter : public WriteBatch::Handler { + public: + SequenceNumber sequence_; + MemTable* mem_; + + virtual void Put(const Slice& key, const Slice& value) { + mem_->Add(sequence_, kTypeValue, key, value); + sequence_++; + } + virtual void Delete(const Slice& key) { + mem_->Add(sequence_, kTypeDeletion, key, Slice()); + sequence_++; + } +}; +} // namespace + +Status WriteBatchInternal::InsertInto(const WriteBatch* b, + MemTable* memtable) { + MemTableInserter inserter; + inserter.sequence_ = WriteBatchInternal::Sequence(b); + inserter.mem_ = memtable; + return b->Iterate(&inserter); +} + +void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) { + assert(contents.size() >= kHeader); + b->rep_.assign(contents.data(), contents.size()); +} + +void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) { + SetCount(dst, Count(dst) + Count(src)); + assert(src->rep_.size() >= kHeader); + dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader); +} + +} // namespace leveldb diff --git a/leveldb-library/db/write_batch_internal.h b/leveldb-library/db/write_batch_internal.h new file mode 100644 index 0000000..9448ef7 --- /dev/null +++ b/leveldb-library/db/write_batch_internal.h @@ -0,0 +1,50 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ +#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ + +#include "db/dbformat.h" +#include "leveldb/write_batch.h" + +namespace leveldb { + +class MemTable; + +// WriteBatchInternal provides static methods for manipulating a +// WriteBatch that we don't want in the public WriteBatch interface. +class WriteBatchInternal { + public: + // Return the number of entries in the batch. + static int Count(const WriteBatch* batch); + + // Set the count for the number of entries in the batch. + static void SetCount(WriteBatch* batch, int n); + + // Return the sequence number for the start of this batch. + static SequenceNumber Sequence(const WriteBatch* batch); + + // Store the specified number as the sequence number for the start of + // this batch. + static void SetSequence(WriteBatch* batch, SequenceNumber seq); + + static Slice Contents(const WriteBatch* batch) { + return Slice(batch->rep_); + } + + static size_t ByteSize(const WriteBatch* batch) { + return batch->rep_.size(); + } + + static void SetContents(WriteBatch* batch, const Slice& contents); + + static Status InsertInto(const WriteBatch* batch, MemTable* memtable); + + static void Append(WriteBatch* dst, const WriteBatch* src); +}; + +} // namespace leveldb + + +#endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ diff --git a/leveldb-library/leveldb/c.h b/leveldb-library/leveldb/c.h new file mode 100644 index 0000000..1048fe3 --- /dev/null +++ b/leveldb-library/leveldb/c.h @@ -0,0 +1,290 @@ +/* Copyright (c) 2011 The LevelDB Authors. All rights reserved. + Use of this source code is governed by a BSD-style license that can be + found in the LICENSE file. See the AUTHORS file for names of contributors. + + C bindings for leveldb. May be useful as a stable ABI that can be + used by programs that keep leveldb in a shared library, or for + a JNI api. + + Does not support: + . getters for the option types + . custom comparators that implement key shortening + . custom iter, db, env, cache implementations using just the C bindings + + Some conventions: + + (1) We expose just opaque struct pointers and functions to clients. + This allows us to change internal representations without having to + recompile clients. + + (2) For simplicity, there is no equivalent to the Slice type. Instead, + the caller has to pass the pointer and length as separate + arguments. + + (3) Errors are represented by a null-terminated c string. NULL + means no error. All operations that can raise an error are passed + a "char** errptr" as the last argument. One of the following must + be true on entry: + *errptr == NULL + *errptr points to a malloc()ed null-terminated error message + (On Windows, *errptr must have been malloc()-ed by this library.) + On success, a leveldb routine leaves *errptr unchanged. + On failure, leveldb frees the old value of *errptr and + set *errptr to a malloc()ed error message. + + (4) Bools have the type unsigned char (0 == false; rest == true) + + (5) All of the pointer arguments must be non-NULL. +*/ + +#ifndef STORAGE_LEVELDB_INCLUDE_C_H_ +#define STORAGE_LEVELDB_INCLUDE_C_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/* Exported types */ + +typedef struct leveldb_t leveldb_t; +typedef struct leveldb_cache_t leveldb_cache_t; +typedef struct leveldb_comparator_t leveldb_comparator_t; +typedef struct leveldb_env_t leveldb_env_t; +typedef struct leveldb_filelock_t leveldb_filelock_t; +typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t; +typedef struct leveldb_iterator_t leveldb_iterator_t; +typedef struct leveldb_logger_t leveldb_logger_t; +typedef struct leveldb_options_t leveldb_options_t; +typedef struct leveldb_randomfile_t leveldb_randomfile_t; +typedef struct leveldb_readoptions_t leveldb_readoptions_t; +typedef struct leveldb_seqfile_t leveldb_seqfile_t; +typedef struct leveldb_snapshot_t leveldb_snapshot_t; +typedef struct leveldb_writablefile_t leveldb_writablefile_t; +typedef struct leveldb_writebatch_t leveldb_writebatch_t; +typedef struct leveldb_writeoptions_t leveldb_writeoptions_t; + +/* DB operations */ + +extern leveldb_t* leveldb_open( + const leveldb_options_t* options, + const char* name, + char** errptr); + +extern void leveldb_close(leveldb_t* db); + +extern void leveldb_put( + leveldb_t* db, + const leveldb_writeoptions_t* options, + const char* key, size_t keylen, + const char* val, size_t vallen, + char** errptr); + +extern void leveldb_delete( + leveldb_t* db, + const leveldb_writeoptions_t* options, + const char* key, size_t keylen, + char** errptr); + +extern void leveldb_write( + leveldb_t* db, + const leveldb_writeoptions_t* options, + leveldb_writebatch_t* batch, + char** errptr); + +/* Returns NULL if not found. A malloc()ed array otherwise. + Stores the length of the array in *vallen. */ +extern char* leveldb_get( + leveldb_t* db, + const leveldb_readoptions_t* options, + const char* key, size_t keylen, + size_t* vallen, + char** errptr); + +extern leveldb_iterator_t* leveldb_create_iterator( + leveldb_t* db, + const leveldb_readoptions_t* options); + +extern const leveldb_snapshot_t* leveldb_create_snapshot( + leveldb_t* db); + +extern void leveldb_release_snapshot( + leveldb_t* db, + const leveldb_snapshot_t* snapshot); + +/* Returns NULL if property name is unknown. + Else returns a pointer to a malloc()-ed null-terminated value. */ +extern char* leveldb_property_value( + leveldb_t* db, + const char* propname); + +extern void leveldb_approximate_sizes( + leveldb_t* db, + int num_ranges, + const char* const* range_start_key, const size_t* range_start_key_len, + const char* const* range_limit_key, const size_t* range_limit_key_len, + uint64_t* sizes); + +extern void leveldb_compact_range( + leveldb_t* db, + const char* start_key, size_t start_key_len, + const char* limit_key, size_t limit_key_len); + +/* Management operations */ + +extern void leveldb_destroy_db( + const leveldb_options_t* options, + const char* name, + char** errptr); + +extern void leveldb_repair_db( + const leveldb_options_t* options, + const char* name, + char** errptr); + +/* Iterator */ + +extern void leveldb_iter_destroy(leveldb_iterator_t*); +extern unsigned char leveldb_iter_valid(const leveldb_iterator_t*); +extern void leveldb_iter_seek_to_first(leveldb_iterator_t*); +extern void leveldb_iter_seek_to_last(leveldb_iterator_t*); +extern void leveldb_iter_seek(leveldb_iterator_t*, const char* k, size_t klen); +extern void leveldb_iter_next(leveldb_iterator_t*); +extern void leveldb_iter_prev(leveldb_iterator_t*); +extern const char* leveldb_iter_key(const leveldb_iterator_t*, size_t* klen); +extern const char* leveldb_iter_value(const leveldb_iterator_t*, size_t* vlen); +extern void leveldb_iter_get_error(const leveldb_iterator_t*, char** errptr); + +/* Write batch */ + +extern leveldb_writebatch_t* leveldb_writebatch_create(); +extern void leveldb_writebatch_destroy(leveldb_writebatch_t*); +extern void leveldb_writebatch_clear(leveldb_writebatch_t*); +extern void leveldb_writebatch_put( + leveldb_writebatch_t*, + const char* key, size_t klen, + const char* val, size_t vlen); +extern void leveldb_writebatch_delete( + leveldb_writebatch_t*, + const char* key, size_t klen); +extern void leveldb_writebatch_iterate( + leveldb_writebatch_t*, + void* state, + void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), + void (*deleted)(void*, const char* k, size_t klen)); + +/* Options */ + +extern leveldb_options_t* leveldb_options_create(); +extern void leveldb_options_destroy(leveldb_options_t*); +extern void leveldb_options_set_comparator( + leveldb_options_t*, + leveldb_comparator_t*); +extern void leveldb_options_set_filter_policy( + leveldb_options_t*, + leveldb_filterpolicy_t*); +extern void leveldb_options_set_create_if_missing( + leveldb_options_t*, unsigned char); +extern void leveldb_options_set_error_if_exists( + leveldb_options_t*, unsigned char); +extern void leveldb_options_set_paranoid_checks( + leveldb_options_t*, unsigned char); +extern void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*); +extern void leveldb_options_set_info_log(leveldb_options_t*, leveldb_logger_t*); +extern void leveldb_options_set_write_buffer_size(leveldb_options_t*, size_t); +extern void leveldb_options_set_max_open_files(leveldb_options_t*, int); +extern void leveldb_options_set_cache(leveldb_options_t*, leveldb_cache_t*); +extern void leveldb_options_set_block_size(leveldb_options_t*, size_t); +extern void leveldb_options_set_block_restart_interval(leveldb_options_t*, int); + +enum { + leveldb_no_compression = 0, + leveldb_snappy_compression = 1 +}; +extern void leveldb_options_set_compression(leveldb_options_t*, int); + +/* Comparator */ + +extern leveldb_comparator_t* leveldb_comparator_create( + void* state, + void (*destructor)(void*), + int (*compare)( + void*, + const char* a, size_t alen, + const char* b, size_t blen), + const char* (*name)(void*)); +extern void leveldb_comparator_destroy(leveldb_comparator_t*); + +/* Filter policy */ + +extern leveldb_filterpolicy_t* leveldb_filterpolicy_create( + void* state, + void (*destructor)(void*), + char* (*create_filter)( + void*, + const char* const* key_array, const size_t* key_length_array, + int num_keys, + size_t* filter_length), + unsigned char (*key_may_match)( + void*, + const char* key, size_t length, + const char* filter, size_t filter_length), + const char* (*name)(void*)); +extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*); + +extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom( + int bits_per_key); + +/* Read options */ + +extern leveldb_readoptions_t* leveldb_readoptions_create(); +extern void leveldb_readoptions_destroy(leveldb_readoptions_t*); +extern void leveldb_readoptions_set_verify_checksums( + leveldb_readoptions_t*, + unsigned char); +extern void leveldb_readoptions_set_fill_cache( + leveldb_readoptions_t*, unsigned char); +extern void leveldb_readoptions_set_snapshot( + leveldb_readoptions_t*, + const leveldb_snapshot_t*); + +/* Write options */ + +extern leveldb_writeoptions_t* leveldb_writeoptions_create(); +extern void leveldb_writeoptions_destroy(leveldb_writeoptions_t*); +extern void leveldb_writeoptions_set_sync( + leveldb_writeoptions_t*, unsigned char); + +/* Cache */ + +extern leveldb_cache_t* leveldb_cache_create_lru(size_t capacity); +extern void leveldb_cache_destroy(leveldb_cache_t* cache); + +/* Env */ + +extern leveldb_env_t* leveldb_create_default_env(); +extern void leveldb_env_destroy(leveldb_env_t*); + +/* Utility */ + +/* Calls free(ptr). + REQUIRES: ptr was malloc()-ed and returned by one of the routines + in this file. Note that in certain cases (typically on Windows), you + may need to call this routine instead of free(ptr) to dispose of + malloc()-ed memory returned by this library. */ +extern void leveldb_free(void* ptr); + +/* Return the major version number for this release. */ +extern int leveldb_major_version(); + +/* Return the minor version number for this release. */ +extern int leveldb_minor_version(); + +#ifdef __cplusplus +} /* end extern "C" */ +#endif + +#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */ diff --git a/leveldb-library/leveldb/cache.h b/leveldb-library/leveldb/cache.h new file mode 100644 index 0000000..6819d5b --- /dev/null +++ b/leveldb-library/leveldb/cache.h @@ -0,0 +1,110 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// A Cache is an interface that maps keys to values. It has internal +// synchronization and may be safely accessed concurrently from +// multiple threads. It may automatically evict entries to make room +// for new entries. Values have a specified charge against the cache +// capacity. For example, a cache where the values are variable +// length strings, may use the length of the string as the charge for +// the string. +// +// A builtin cache implementation with a least-recently-used eviction +// policy is provided. Clients may use their own implementations if +// they want something more sophisticated (like scan-resistance, a +// custom eviction policy, variable cache sizing, etc.) + +#ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_ +#define STORAGE_LEVELDB_INCLUDE_CACHE_H_ + +#include +#include "leveldb/slice.h" + +namespace leveldb { + +class Cache; + +// Create a new cache with a fixed size capacity. This implementation +// of Cache uses a least-recently-used eviction policy. +extern Cache* NewLRUCache(size_t capacity); + +class Cache { + public: + Cache() { } + + // Destroys all existing entries by calling the "deleter" + // function that was passed to the constructor. + virtual ~Cache(); + + // Opaque handle to an entry stored in the cache. + struct Handle { }; + + // Insert a mapping from key->value into the cache and assign it + // the specified charge against the total cache capacity. + // + // Returns a handle that corresponds to the mapping. The caller + // must call this->Release(handle) when the returned mapping is no + // longer needed. + // + // When the inserted entry is no longer needed, the key and + // value will be passed to "deleter". + virtual Handle* Insert(const Slice& key, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value)) = 0; + + // If the cache has no mapping for "key", returns NULL. + // + // Else return a handle that corresponds to the mapping. The caller + // must call this->Release(handle) when the returned mapping is no + // longer needed. + virtual Handle* Lookup(const Slice& key) = 0; + + // Release a mapping returned by a previous Lookup(). + // REQUIRES: handle must not have been released yet. + // REQUIRES: handle must have been returned by a method on *this. + virtual void Release(Handle* handle) = 0; + + // Return the value encapsulated in a handle returned by a + // successful Lookup(). + // REQUIRES: handle must not have been released yet. + // REQUIRES: handle must have been returned by a method on *this. + virtual void* Value(Handle* handle) = 0; + + // If the cache contains entry for key, erase it. Note that the + // underlying entry will be kept around until all existing handles + // to it have been released. + virtual void Erase(const Slice& key) = 0; + + // Return a new numeric id. May be used by multiple clients who are + // sharing the same cache to partition the key space. Typically the + // client will allocate a new id at startup and prepend the id to + // its cache keys. + virtual uint64_t NewId() = 0; + + // Remove all cache entries that are not actively in use. Memory-constrained + // applications may wish to call this method to reduce memory usage. + // Default implementation of Prune() does nothing. Subclasses are strongly + // encouraged to override the default implementation. A future release of + // leveldb may change Prune() to a pure abstract method. + virtual void Prune() {} + + // Return an estimate of the combined charges of all elements stored in the + // cache. + virtual size_t TotalCharge() const = 0; + + private: + void LRU_Remove(Handle* e); + void LRU_Append(Handle* e); + void Unref(Handle* e); + + struct Rep; + Rep* rep_; + + // No copying allowed + Cache(const Cache&); + void operator=(const Cache&); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_CACHE_H_ diff --git a/leveldb-library/leveldb/comparator.h b/leveldb-library/leveldb/comparator.h new file mode 100644 index 0000000..556b984 --- /dev/null +++ b/leveldb-library/leveldb/comparator.h @@ -0,0 +1,63 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ +#define STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ + +#include + +namespace leveldb { + +class Slice; + +// A Comparator object provides a total order across slices that are +// used as keys in an sstable or a database. A Comparator implementation +// must be thread-safe since leveldb may invoke its methods concurrently +// from multiple threads. +class Comparator { + public: + virtual ~Comparator(); + + // Three-way comparison. Returns value: + // < 0 iff "a" < "b", + // == 0 iff "a" == "b", + // > 0 iff "a" > "b" + virtual int Compare(const Slice& a, const Slice& b) const = 0; + + // The name of the comparator. Used to check for comparator + // mismatches (i.e., a DB created with one comparator is + // accessed using a different comparator. + // + // The client of this package should switch to a new name whenever + // the comparator implementation changes in a way that will cause + // the relative ordering of any two keys to change. + // + // Names starting with "leveldb." are reserved and should not be used + // by any clients of this package. + virtual const char* Name() const = 0; + + // Advanced functions: these are used to reduce the space requirements + // for internal data structures like index blocks. + + // If *start < limit, changes *start to a short string in [start,limit). + // Simple comparator implementations may return with *start unchanged, + // i.e., an implementation of this method that does nothing is correct. + virtual void FindShortestSeparator( + std::string* start, + const Slice& limit) const = 0; + + // Changes *key to a short string >= *key. + // Simple comparator implementations may return with *key unchanged, + // i.e., an implementation of this method that does nothing is correct. + virtual void FindShortSuccessor(std::string* key) const = 0; +}; + +// Return a builtin comparator that uses lexicographic byte-wise +// ordering. The result remains the property of this module and +// must not be deleted. +extern const Comparator* BytewiseComparator(); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ diff --git a/leveldb-library/leveldb/db.h b/leveldb-library/leveldb/db.h new file mode 100644 index 0000000..bfab10a --- /dev/null +++ b/leveldb-library/leveldb/db.h @@ -0,0 +1,163 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_INCLUDE_DB_H_ +#define STORAGE_LEVELDB_INCLUDE_DB_H_ + +#include +#include +#include "leveldb/iterator.h" +#include "leveldb/options.h" + +namespace leveldb { + +// Update Makefile if you change these +static const int kMajorVersion = 1; +static const int kMinorVersion = 20; + +struct Options; +struct ReadOptions; +struct WriteOptions; +class WriteBatch; + +// Abstract handle to particular state of a DB. +// A Snapshot is an immutable object and can therefore be safely +// accessed from multiple threads without any external synchronization. +class Snapshot { + protected: + virtual ~Snapshot(); +}; + +// A range of keys +struct Range { + Slice start; // Included in the range + Slice limit; // Not included in the range + + Range() { } + Range(const Slice& s, const Slice& l) : start(s), limit(l) { } +}; + +// A DB is a persistent ordered map from keys to values. +// A DB is safe for concurrent access from multiple threads without +// any external synchronization. +class DB { + public: + // Open the database with the specified "name". + // Stores a pointer to a heap-allocated database in *dbptr and returns + // OK on success. + // Stores NULL in *dbptr and returns a non-OK status on error. + // Caller should delete *dbptr when it is no longer needed. + static Status Open(const Options& options, + const std::string& name, + DB** dbptr); + + DB() { } + virtual ~DB(); + + // Set the database entry for "key" to "value". Returns OK on success, + // and a non-OK status on error. + // Note: consider setting options.sync = true. + virtual Status Put(const WriteOptions& options, + const Slice& key, + const Slice& value) = 0; + + // Remove the database entry (if any) for "key". Returns OK on + // success, and a non-OK status on error. It is not an error if "key" + // did not exist in the database. + // Note: consider setting options.sync = true. + virtual Status Delete(const WriteOptions& options, const Slice& key) = 0; + + // Apply the specified updates to the database. + // Returns OK on success, non-OK on failure. + // Note: consider setting options.sync = true. + virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0; + + // If the database contains an entry for "key" store the + // corresponding value in *value and return OK. + // + // If there is no entry for "key" leave *value unchanged and return + // a status for which Status::IsNotFound() returns true. + // + // May return some other Status on an error. + virtual Status Get(const ReadOptions& options, + const Slice& key, std::string* value) = 0; + + // Return a heap-allocated iterator over the contents of the database. + // The result of NewIterator() is initially invalid (caller must + // call one of the Seek methods on the iterator before using it). + // + // Caller should delete the iterator when it is no longer needed. + // The returned iterator should be deleted before this db is deleted. + virtual Iterator* NewIterator(const ReadOptions& options) = 0; + + // Return a handle to the current DB state. Iterators created with + // this handle will all observe a stable snapshot of the current DB + // state. The caller must call ReleaseSnapshot(result) when the + // snapshot is no longer needed. + virtual const Snapshot* GetSnapshot() = 0; + + // Release a previously acquired snapshot. The caller must not + // use "snapshot" after this call. + virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; + + // DB implementations can export properties about their state + // via this method. If "property" is a valid property understood by this + // DB implementation, fills "*value" with its current value and returns + // true. Otherwise returns false. + // + // + // Valid property names include: + // + // "leveldb.num-files-at-level" - return the number of files at level , + // where is an ASCII representation of a level number (e.g. "0"). + // "leveldb.stats" - returns a multi-line string that describes statistics + // about the internal operation of the DB. + // "leveldb.sstables" - returns a multi-line string that describes all + // of the sstables that make up the db contents. + // "leveldb.approximate-memory-usage" - returns the approximate number of + // bytes of memory in use by the DB. + virtual bool GetProperty(const Slice& property, std::string* value) = 0; + + // For each i in [0,n-1], store in "sizes[i]", the approximate + // file system space used by keys in "[range[i].start .. range[i].limit)". + // + // Note that the returned sizes measure file system space usage, so + // if the user data compresses by a factor of ten, the returned + // sizes will be one-tenth the size of the corresponding user data size. + // + // The results may not include the sizes of recently written data. + virtual void GetApproximateSizes(const Range* range, int n, + uint64_t* sizes) = 0; + + // Compact the underlying storage for the key range [*begin,*end]. + // In particular, deleted and overwritten versions are discarded, + // and the data is rearranged to reduce the cost of operations + // needed to access the data. This operation should typically only + // be invoked by users who understand the underlying implementation. + // + // begin==NULL is treated as a key before all keys in the database. + // end==NULL is treated as a key after all keys in the database. + // Therefore the following call will compact the entire database: + // db->CompactRange(NULL, NULL); + virtual void CompactRange(const Slice* begin, const Slice* end) = 0; + + private: + // No copying allowed + DB(const DB&); + void operator=(const DB&); +}; + +// Destroy the contents of the specified database. +// Be very careful using this method. +Status DestroyDB(const std::string& name, const Options& options); + +// If a DB cannot be opened, you may attempt to call this method to +// resurrect as much of the contents of the database as possible. +// Some data may be lost, so be careful when calling this function +// on a database that contains important information. +Status RepairDB(const std::string& dbname, const Options& options); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_DB_H_ diff --git a/leveldb-library/leveldb/dumpfile.h b/leveldb-library/leveldb/dumpfile.h new file mode 100644 index 0000000..3f97fda --- /dev/null +++ b/leveldb-library/leveldb/dumpfile.h @@ -0,0 +1,25 @@ +// Copyright (c) 2014 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_ +#define STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_ + +#include +#include "leveldb/env.h" +#include "leveldb/status.h" + +namespace leveldb { + +// Dump the contents of the file named by fname in text format to +// *dst. Makes a sequence of dst->Append() calls; each call is passed +// the newline-terminated text corresponding to a single item found +// in the file. +// +// Returns a non-OK result if fname does not name a leveldb storage +// file, or if the file cannot be read. +Status DumpFile(Env* env, const std::string& fname, WritableFile* dst); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_DUMPFILE_H_ diff --git a/leveldb-library/leveldb/env.h b/leveldb-library/leveldb/env.h new file mode 100644 index 0000000..99b6c21 --- /dev/null +++ b/leveldb-library/leveldb/env.h @@ -0,0 +1,351 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// An Env is an interface used by the leveldb implementation to access +// operating system functionality like the filesystem etc. Callers +// may wish to provide a custom Env object when opening a database to +// get fine gain control; e.g., to rate limit file system operations. +// +// All Env implementations are safe for concurrent access from +// multiple threads without any external synchronization. + +#ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_ +#define STORAGE_LEVELDB_INCLUDE_ENV_H_ + +#include +#include +#include +#include +#include "leveldb/status.h" + +namespace leveldb { + +class FileLock; +class Logger; +class RandomAccessFile; +class SequentialFile; +class Slice; +class WritableFile; + +class Env { + public: + Env() { } + virtual ~Env(); + + // Return a default environment suitable for the current operating + // system. Sophisticated users may wish to provide their own Env + // implementation instead of relying on this default environment. + // + // The result of Default() belongs to leveldb and must never be deleted. + static Env* Default(); + + // Create a brand new sequentially-readable file with the specified name. + // On success, stores a pointer to the new file in *result and returns OK. + // On failure stores NULL in *result and returns non-OK. If the file does + // not exist, returns a non-OK status. + // + // The returned file will only be accessed by one thread at a time. + virtual Status NewSequentialFile(const std::string& fname, + SequentialFile** result) = 0; + + // Create a brand new random access read-only file with the + // specified name. On success, stores a pointer to the new file in + // *result and returns OK. On failure stores NULL in *result and + // returns non-OK. If the file does not exist, returns a non-OK + // status. + // + // The returned file may be concurrently accessed by multiple threads. + virtual Status NewRandomAccessFile(const std::string& fname, + RandomAccessFile** result) = 0; + + // Create an object that writes to a new file with the specified + // name. Deletes any existing file with the same name and creates a + // new file. On success, stores a pointer to the new file in + // *result and returns OK. On failure stores NULL in *result and + // returns non-OK. + // + // The returned file will only be accessed by one thread at a time. + virtual Status NewWritableFile(const std::string& fname, + WritableFile** result) = 0; + + // Create an object that either appends to an existing file, or + // writes to a new file (if the file does not exist to begin with). + // On success, stores a pointer to the new file in *result and + // returns OK. On failure stores NULL in *result and returns + // non-OK. + // + // The returned file will only be accessed by one thread at a time. + // + // May return an IsNotSupportedError error if this Env does + // not allow appending to an existing file. Users of Env (including + // the leveldb implementation) must be prepared to deal with + // an Env that does not support appending. + virtual Status NewAppendableFile(const std::string& fname, + WritableFile** result); + + // Returns true iff the named file exists. + virtual bool FileExists(const std::string& fname) = 0; + + // Store in *result the names of the children of the specified directory. + // The names are relative to "dir". + // Original contents of *results are dropped. + virtual Status GetChildren(const std::string& dir, + std::vector* result) = 0; + + // Delete the named file. + virtual Status DeleteFile(const std::string& fname) = 0; + + // Create the specified directory. + virtual Status CreateDir(const std::string& dirname) = 0; + + // Delete the specified directory. + virtual Status DeleteDir(const std::string& dirname) = 0; + + // Store the size of fname in *file_size. + virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0; + + // Rename file src to target. + virtual Status RenameFile(const std::string& src, + const std::string& target) = 0; + + // Lock the specified file. Used to prevent concurrent access to + // the same db by multiple processes. On failure, stores NULL in + // *lock and returns non-OK. + // + // On success, stores a pointer to the object that represents the + // acquired lock in *lock and returns OK. The caller should call + // UnlockFile(*lock) to release the lock. If the process exits, + // the lock will be automatically released. + // + // If somebody else already holds the lock, finishes immediately + // with a failure. I.e., this call does not wait for existing locks + // to go away. + // + // May create the named file if it does not already exist. + virtual Status LockFile(const std::string& fname, FileLock** lock) = 0; + + // Release the lock acquired by a previous successful call to LockFile. + // REQUIRES: lock was returned by a successful LockFile() call + // REQUIRES: lock has not already been unlocked. + virtual Status UnlockFile(FileLock* lock) = 0; + + // Arrange to run "(*function)(arg)" once in a background thread. + // + // "function" may run in an unspecified thread. Multiple functions + // added to the same Env may run concurrently in different threads. + // I.e., the caller may not assume that background work items are + // serialized. + virtual void Schedule( + void (*function)(void* arg), + void* arg) = 0; + + // Start a new thread, invoking "function(arg)" within the new thread. + // When "function(arg)" returns, the thread will be destroyed. + virtual void StartThread(void (*function)(void* arg), void* arg) = 0; + + // *path is set to a temporary directory that can be used for testing. It may + // or many not have just been created. The directory may or may not differ + // between runs of the same process, but subsequent calls will return the + // same directory. + virtual Status GetTestDirectory(std::string* path) = 0; + + // Create and return a log file for storing informational messages. + virtual Status NewLogger(const std::string& fname, Logger** result) = 0; + + // Returns the number of micro-seconds since some fixed point in time. Only + // useful for computing deltas of time. + virtual uint64_t NowMicros() = 0; + + // Sleep/delay the thread for the prescribed number of micro-seconds. + virtual void SleepForMicroseconds(int micros) = 0; + + private: + // No copying allowed + Env(const Env&); + void operator=(const Env&); +}; + +// A file abstraction for reading sequentially through a file +class SequentialFile { + public: + SequentialFile() { } + virtual ~SequentialFile(); + + // Read up to "n" bytes from the file. "scratch[0..n-1]" may be + // written by this routine. Sets "*result" to the data that was + // read (including if fewer than "n" bytes were successfully read). + // May set "*result" to point at data in "scratch[0..n-1]", so + // "scratch[0..n-1]" must be live when "*result" is used. + // If an error was encountered, returns a non-OK status. + // + // REQUIRES: External synchronization + virtual Status Read(size_t n, Slice* result, char* scratch) = 0; + + // Skip "n" bytes from the file. This is guaranteed to be no + // slower that reading the same data, but may be faster. + // + // If end of file is reached, skipping will stop at the end of the + // file, and Skip will return OK. + // + // REQUIRES: External synchronization + virtual Status Skip(uint64_t n) = 0; + + private: + // No copying allowed + SequentialFile(const SequentialFile&); + void operator=(const SequentialFile&); +}; + +// A file abstraction for randomly reading the contents of a file. +class RandomAccessFile { + public: + RandomAccessFile() { } + virtual ~RandomAccessFile(); + + // Read up to "n" bytes from the file starting at "offset". + // "scratch[0..n-1]" may be written by this routine. Sets "*result" + // to the data that was read (including if fewer than "n" bytes were + // successfully read). May set "*result" to point at data in + // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when + // "*result" is used. If an error was encountered, returns a non-OK + // status. + // + // Safe for concurrent use by multiple threads. + virtual Status Read(uint64_t offset, size_t n, Slice* result, + char* scratch) const = 0; + + private: + // No copying allowed + RandomAccessFile(const RandomAccessFile&); + void operator=(const RandomAccessFile&); +}; + +// A file abstraction for sequential writing. The implementation +// must provide buffering since callers may append small fragments +// at a time to the file. +class WritableFile { + public: + WritableFile() { } + virtual ~WritableFile(); + + virtual Status Append(const Slice& data) = 0; + virtual Status Close() = 0; + virtual Status Flush() = 0; + virtual Status Sync() = 0; + + private: + // No copying allowed + WritableFile(const WritableFile&); + void operator=(const WritableFile&); +}; + +// An interface for writing log messages. +class Logger { + public: + Logger() { } + virtual ~Logger(); + + // Write an entry to the log file with the specified format. + virtual void Logv(const char* format, va_list ap) = 0; + + private: + // No copying allowed + Logger(const Logger&); + void operator=(const Logger&); +}; + + +// Identifies a locked file. +class FileLock { + public: + FileLock() { } + virtual ~FileLock(); + private: + // No copying allowed + FileLock(const FileLock&); + void operator=(const FileLock&); +}; + +// Log the specified data to *info_log if info_log is non-NULL. +extern void Log(Logger* info_log, const char* format, ...) +# if defined(__GNUC__) || defined(__clang__) + __attribute__((__format__ (__printf__, 2, 3))) +# endif + ; + +// A utility routine: write "data" to the named file. +extern Status WriteStringToFile(Env* env, const Slice& data, + const std::string& fname); + +// A utility routine: read contents of named file into *data +extern Status ReadFileToString(Env* env, const std::string& fname, + std::string* data); + +// An implementation of Env that forwards all calls to another Env. +// May be useful to clients who wish to override just part of the +// functionality of another Env. +class EnvWrapper : public Env { + public: + // Initialize an EnvWrapper that delegates all calls to *t + explicit EnvWrapper(Env* t) : target_(t) { } + virtual ~EnvWrapper(); + + // Return the target to which this Env forwards all calls + Env* target() const { return target_; } + + // The following text is boilerplate that forwards all methods to target() + Status NewSequentialFile(const std::string& f, SequentialFile** r) { + return target_->NewSequentialFile(f, r); + } + Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) { + return target_->NewRandomAccessFile(f, r); + } + Status NewWritableFile(const std::string& f, WritableFile** r) { + return target_->NewWritableFile(f, r); + } + Status NewAppendableFile(const std::string& f, WritableFile** r) { + return target_->NewAppendableFile(f, r); + } + bool FileExists(const std::string& f) { return target_->FileExists(f); } + Status GetChildren(const std::string& dir, std::vector* r) { + return target_->GetChildren(dir, r); + } + Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); } + Status CreateDir(const std::string& d) { return target_->CreateDir(d); } + Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); } + Status GetFileSize(const std::string& f, uint64_t* s) { + return target_->GetFileSize(f, s); + } + Status RenameFile(const std::string& s, const std::string& t) { + return target_->RenameFile(s, t); + } + Status LockFile(const std::string& f, FileLock** l) { + return target_->LockFile(f, l); + } + Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); } + void Schedule(void (*f)(void*), void* a) { + return target_->Schedule(f, a); + } + void StartThread(void (*f)(void*), void* a) { + return target_->StartThread(f, a); + } + virtual Status GetTestDirectory(std::string* path) { + return target_->GetTestDirectory(path); + } + virtual Status NewLogger(const std::string& fname, Logger** result) { + return target_->NewLogger(fname, result); + } + uint64_t NowMicros() { + return target_->NowMicros(); + } + void SleepForMicroseconds(int micros) { + target_->SleepForMicroseconds(micros); + } + private: + Env* target_; +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_ENV_H_ diff --git a/leveldb-library/leveldb/filter_policy.h b/leveldb-library/leveldb/filter_policy.h new file mode 100644 index 0000000..1fba080 --- /dev/null +++ b/leveldb-library/leveldb/filter_policy.h @@ -0,0 +1,70 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// A database can be configured with a custom FilterPolicy object. +// This object is responsible for creating a small filter from a set +// of keys. These filters are stored in leveldb and are consulted +// automatically by leveldb to decide whether or not to read some +// information from disk. In many cases, a filter can cut down the +// number of disk seeks form a handful to a single disk seek per +// DB::Get() call. +// +// Most people will want to use the builtin bloom filter support (see +// NewBloomFilterPolicy() below). + +#ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ +#define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ + +#include + +namespace leveldb { + +class Slice; + +class FilterPolicy { + public: + virtual ~FilterPolicy(); + + // Return the name of this policy. Note that if the filter encoding + // changes in an incompatible way, the name returned by this method + // must be changed. Otherwise, old incompatible filters may be + // passed to methods of this type. + virtual const char* Name() const = 0; + + // keys[0,n-1] contains a list of keys (potentially with duplicates) + // that are ordered according to the user supplied comparator. + // Append a filter that summarizes keys[0,n-1] to *dst. + // + // Warning: do not change the initial contents of *dst. Instead, + // append the newly constructed filter to *dst. + virtual void CreateFilter(const Slice* keys, int n, std::string* dst) + const = 0; + + // "filter" contains the data appended by a preceding call to + // CreateFilter() on this class. This method must return true if + // the key was in the list of keys passed to CreateFilter(). + // This method may return true or false if the key was not on the + // list, but it should aim to return false with a high probability. + virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; +}; + +// Return a new filter policy that uses a bloom filter with approximately +// the specified number of bits per key. A good value for bits_per_key +// is 10, which yields a filter with ~ 1% false positive rate. +// +// Callers must delete the result after any database that is using the +// result has been closed. +// +// Note: if you are using a custom comparator that ignores some parts +// of the keys being compared, you must not use NewBloomFilterPolicy() +// and must provide your own FilterPolicy that also ignores the +// corresponding parts of the keys. For example, if the comparator +// ignores trailing spaces, it would be incorrect to use a +// FilterPolicy (like NewBloomFilterPolicy) that does not ignore +// trailing spaces in keys. +extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key); + +} + +#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ diff --git a/leveldb-library/leveldb/iterator.h b/leveldb-library/leveldb/iterator.h new file mode 100644 index 0000000..da631ed --- /dev/null +++ b/leveldb-library/leveldb/iterator.h @@ -0,0 +1,100 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// An iterator yields a sequence of key/value pairs from a source. +// The following class defines the interface. Multiple implementations +// are provided by this library. In particular, iterators are provided +// to access the contents of a Table or a DB. +// +// Multiple threads can invoke const methods on an Iterator without +// external synchronization, but if any of the threads may call a +// non-const method, all threads accessing the same Iterator must use +// external synchronization. + +#ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ +#define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ + +#include "leveldb/slice.h" +#include "leveldb/status.h" + +namespace leveldb { + +class Iterator { + public: + Iterator(); + virtual ~Iterator(); + + // An iterator is either positioned at a key/value pair, or + // not valid. This method returns true iff the iterator is valid. + virtual bool Valid() const = 0; + + // Position at the first key in the source. The iterator is Valid() + // after this call iff the source is not empty. + virtual void SeekToFirst() = 0; + + // Position at the last key in the source. The iterator is + // Valid() after this call iff the source is not empty. + virtual void SeekToLast() = 0; + + // Position at the first key in the source that is at or past target. + // The iterator is Valid() after this call iff the source contains + // an entry that comes at or past target. + virtual void Seek(const Slice& target) = 0; + + // Moves to the next entry in the source. After this call, Valid() is + // true iff the iterator was not positioned at the last entry in the source. + // REQUIRES: Valid() + virtual void Next() = 0; + + // Moves to the previous entry in the source. After this call, Valid() is + // true iff the iterator was not positioned at the first entry in source. + // REQUIRES: Valid() + virtual void Prev() = 0; + + // Return the key for the current entry. The underlying storage for + // the returned slice is valid only until the next modification of + // the iterator. + // REQUIRES: Valid() + virtual Slice key() const = 0; + + // Return the value for the current entry. The underlying storage for + // the returned slice is valid only until the next modification of + // the iterator. + // REQUIRES: Valid() + virtual Slice value() const = 0; + + // If an error has occurred, return it. Else return an ok status. + virtual Status status() const = 0; + + // Clients are allowed to register function/arg1/arg2 triples that + // will be invoked when this iterator is destroyed. + // + // Note that unlike all of the preceding methods, this method is + // not abstract and therefore clients should not override it. + typedef void (*CleanupFunction)(void* arg1, void* arg2); + void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); + + private: + struct Cleanup { + CleanupFunction function; + void* arg1; + void* arg2; + Cleanup* next; + }; + Cleanup cleanup_; + + // No copying allowed + Iterator(const Iterator&); + void operator=(const Iterator&); +}; + +// Return an empty iterator (yields nothing). +extern Iterator* NewEmptyIterator(); + +// Return an empty iterator with the specified status. +extern Iterator* NewErrorIterator(const Status& status); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ diff --git a/leveldb-library/leveldb/options.h b/leveldb-library/leveldb/options.h new file mode 100644 index 0000000..976e381 --- /dev/null +++ b/leveldb-library/leveldb/options.h @@ -0,0 +1,213 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ +#define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ + +#include + +namespace leveldb { + +class Cache; +class Comparator; +class Env; +class FilterPolicy; +class Logger; +class Snapshot; + +// DB contents are stored in a set of blocks, each of which holds a +// sequence of key,value pairs. Each block may be compressed before +// being stored in a file. The following enum describes which +// compression method (if any) is used to compress a block. +enum CompressionType { + // NOTE: do not change the values of existing entries, as these are + // part of the persistent format on disk. + kNoCompression = 0x0, + kSnappyCompression = 0x1 +}; + +// Options to control the behavior of a database (passed to DB::Open) +struct Options { + // ------------------- + // Parameters that affect behavior + + // Comparator used to define the order of keys in the table. + // Default: a comparator that uses lexicographic byte-wise ordering + // + // REQUIRES: The client must ensure that the comparator supplied + // here has the same name and orders keys *exactly* the same as the + // comparator provided to previous open calls on the same DB. + const Comparator* comparator; + + // If true, the database will be created if it is missing. + // Default: false + bool create_if_missing; + + // If true, an error is raised if the database already exists. + // Default: false + bool error_if_exists; + + // If true, the implementation will do aggressive checking of the + // data it is processing and will stop early if it detects any + // errors. This may have unforeseen ramifications: for example, a + // corruption of one DB entry may cause a large number of entries to + // become unreadable or for the entire DB to become unopenable. + // Default: false + bool paranoid_checks; + + // Use the specified object to interact with the environment, + // e.g. to read/write files, schedule background work, etc. + // Default: Env::Default() + Env* env; + + // Any internal progress/error information generated by the db will + // be written to info_log if it is non-NULL, or to a file stored + // in the same directory as the DB contents if info_log is NULL. + // Default: NULL + Logger* info_log; + + // ------------------- + // Parameters that affect performance + + // Amount of data to build up in memory (backed by an unsorted log + // on disk) before converting to a sorted on-disk file. + // + // Larger values increase performance, especially during bulk loads. + // Up to two write buffers may be held in memory at the same time, + // so you may wish to adjust this parameter to control memory usage. + // Also, a larger write buffer will result in a longer recovery time + // the next time the database is opened. + // + // Default: 4MB + size_t write_buffer_size; + + // Number of open files that can be used by the DB. You may need to + // increase this if your database has a large working set (budget + // one open file per 2MB of working set). + // + // Default: 1000 + int max_open_files; + + // Control over blocks (user data is stored in a set of blocks, and + // a block is the unit of reading from disk). + + // If non-NULL, use the specified cache for blocks. + // If NULL, leveldb will automatically create and use an 8MB internal cache. + // Default: NULL + Cache* block_cache; + + // Approximate size of user data packed per block. Note that the + // block size specified here corresponds to uncompressed data. The + // actual size of the unit read from disk may be smaller if + // compression is enabled. This parameter can be changed dynamically. + // + // Default: 4K + size_t block_size; + + // Number of keys between restart points for delta encoding of keys. + // This parameter can be changed dynamically. Most clients should + // leave this parameter alone. + // + // Default: 16 + int block_restart_interval; + + // Leveldb will write up to this amount of bytes to a file before + // switching to a new one. + // Most clients should leave this parameter alone. However if your + // filesystem is more efficient with larger files, you could + // consider increasing the value. The downside will be longer + // compactions and hence longer latency/performance hiccups. + // Another reason to increase this parameter might be when you are + // initially populating a large database. + // + // Default: 2MB + size_t max_file_size; + + // Compress blocks using the specified compression algorithm. This + // parameter can be changed dynamically. + // + // Default: kSnappyCompression, which gives lightweight but fast + // compression. + // + // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz: + // ~200-500MB/s compression + // ~400-800MB/s decompression + // Note that these speeds are significantly faster than most + // persistent storage speeds, and therefore it is typically never + // worth switching to kNoCompression. Even if the input data is + // incompressible, the kSnappyCompression implementation will + // efficiently detect that and will switch to uncompressed mode. + CompressionType compression; + + // EXPERIMENTAL: If true, append to existing MANIFEST and log files + // when a database is opened. This can significantly speed up open. + // + // Default: currently false, but may become true later. + bool reuse_logs; + + // If non-NULL, use the specified filter policy to reduce disk reads. + // Many applications will benefit from passing the result of + // NewBloomFilterPolicy() here. + // + // Default: NULL + const FilterPolicy* filter_policy; + + // Create an Options object with default values for all fields. + Options(); +}; + +// Options that control read operations +struct ReadOptions { + // If true, all data read from underlying storage will be + // verified against corresponding checksums. + // Default: false + bool verify_checksums; + + // Should the data read for this iteration be cached in memory? + // Callers may wish to set this field to false for bulk scans. + // Default: true + bool fill_cache; + + // If "snapshot" is non-NULL, read as of the supplied snapshot + // (which must belong to the DB that is being read and which must + // not have been released). If "snapshot" is NULL, use an implicit + // snapshot of the state at the beginning of this read operation. + // Default: NULL + const Snapshot* snapshot; + + ReadOptions() + : verify_checksums(false), + fill_cache(true), + snapshot(NULL) { + } +}; + +// Options that control write operations +struct WriteOptions { + // If true, the write will be flushed from the operating system + // buffer cache (by calling WritableFile::Sync()) before the write + // is considered complete. If this flag is true, writes will be + // slower. + // + // If this flag is false, and the machine crashes, some recent + // writes may be lost. Note that if it is just the process that + // crashes (i.e., the machine does not reboot), no writes will be + // lost even if sync==false. + // + // In other words, a DB write with sync==false has similar + // crash semantics as the "write()" system call. A DB write + // with sync==true has similar crash semantics to a "write()" + // system call followed by "fsync()". + // + // Default: false + bool sync; + + WriteOptions() + : sync(false) { + } +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ diff --git a/leveldb-library/leveldb/slice.h b/leveldb-library/leveldb/slice.h new file mode 100644 index 0000000..bc36798 --- /dev/null +++ b/leveldb-library/leveldb/slice.h @@ -0,0 +1,109 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Slice is a simple structure containing a pointer into some external +// storage and a size. The user of a Slice must ensure that the slice +// is not used after the corresponding external storage has been +// deallocated. +// +// Multiple threads can invoke const methods on a Slice without +// external synchronization, but if any of the threads may call a +// non-const method, all threads accessing the same Slice must use +// external synchronization. + +#ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_ +#define STORAGE_LEVELDB_INCLUDE_SLICE_H_ + +#include +#include +#include +#include + +namespace leveldb { + +class Slice { + public: + // Create an empty slice. + Slice() : data_(""), size_(0) { } + + // Create a slice that refers to d[0,n-1]. + Slice(const char* d, size_t n) : data_(d), size_(n) { } + + // Create a slice that refers to the contents of "s" + Slice(const std::string& s) : data_(s.data()), size_(s.size()) { } + + // Create a slice that refers to s[0,strlen(s)-1] + Slice(const char* s) : data_(s), size_(strlen(s)) { } + + // Return a pointer to the beginning of the referenced data + const char* data() const { return data_; } + + // Return the length (in bytes) of the referenced data + size_t size() const { return size_; } + + // Return true iff the length of the referenced data is zero + bool empty() const { return size_ == 0; } + + // Return the ith byte in the referenced data. + // REQUIRES: n < size() + char operator[](size_t n) const { + assert(n < size()); + return data_[n]; + } + + // Change this slice to refer to an empty array + void clear() { data_ = ""; size_ = 0; } + + // Drop the first "n" bytes from this slice. + void remove_prefix(size_t n) { + assert(n <= size()); + data_ += n; + size_ -= n; + } + + // Return a string that contains the copy of the referenced data. + std::string ToString() const { return std::string(data_, size_); } + + // Three-way comparison. Returns value: + // < 0 iff "*this" < "b", + // == 0 iff "*this" == "b", + // > 0 iff "*this" > "b" + int compare(const Slice& b) const; + + // Return true iff "x" is a prefix of "*this" + bool starts_with(const Slice& x) const { + return ((size_ >= x.size_) && + (memcmp(data_, x.data_, x.size_) == 0)); + } + + private: + const char* data_; + size_t size_; + + // Intentionally copyable +}; + +inline bool operator==(const Slice& x, const Slice& y) { + return ((x.size() == y.size()) && + (memcmp(x.data(), y.data(), x.size()) == 0)); +} + +inline bool operator!=(const Slice& x, const Slice& y) { + return !(x == y); +} + +inline int Slice::compare(const Slice& b) const { + const size_t min_len = (size_ < b.size_) ? size_ : b.size_; + int r = memcmp(data_, b.data_, min_len); + if (r == 0) { + if (size_ < b.size_) r = -1; + else if (size_ > b.size_) r = +1; + } + return r; +} + +} // namespace leveldb + + +#endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_ diff --git a/leveldb-library/leveldb/status.h b/leveldb-library/leveldb/status.h new file mode 100644 index 0000000..d9575f9 --- /dev/null +++ b/leveldb-library/leveldb/status.h @@ -0,0 +1,112 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// A Status encapsulates the result of an operation. It may indicate success, +// or it may indicate an error with an associated error message. +// +// Multiple threads can invoke const methods on a Status without +// external synchronization, but if any of the threads may call a +// non-const method, all threads accessing the same Status must use +// external synchronization. + +#ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_ +#define STORAGE_LEVELDB_INCLUDE_STATUS_H_ + +#include +#include "leveldb/slice.h" + +namespace leveldb { + +class Status { + public: + // Create a success status. + Status() : state_(NULL) { } + ~Status() { delete[] state_; } + + // Copy the specified status. + Status(const Status& s); + void operator=(const Status& s); + + // Return a success status. + static Status OK() { return Status(); } + + // Return error status of an appropriate type. + static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) { + return Status(kNotFound, msg, msg2); + } + static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) { + return Status(kCorruption, msg, msg2); + } + static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) { + return Status(kNotSupported, msg, msg2); + } + static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) { + return Status(kInvalidArgument, msg, msg2); + } + static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) { + return Status(kIOError, msg, msg2); + } + + // Returns true iff the status indicates success. + bool ok() const { return (state_ == NULL); } + + // Returns true iff the status indicates a NotFound error. + bool IsNotFound() const { return code() == kNotFound; } + + // Returns true iff the status indicates a Corruption error. + bool IsCorruption() const { return code() == kCorruption; } + + // Returns true iff the status indicates an IOError. + bool IsIOError() const { return code() == kIOError; } + + // Returns true iff the status indicates a NotSupportedError. + bool IsNotSupportedError() const { return code() == kNotSupported; } + + // Returns true iff the status indicates an InvalidArgument. + bool IsInvalidArgument() const { return code() == kInvalidArgument; } + + // Return a string representation of this status suitable for printing. + // Returns the string "OK" for success. + std::string ToString() const; + + private: + // OK status has a NULL state_. Otherwise, state_ is a new[] array + // of the following form: + // state_[0..3] == length of message + // state_[4] == code + // state_[5..] == message + const char* state_; + + enum Code { + kOk = 0, + kNotFound = 1, + kCorruption = 2, + kNotSupported = 3, + kInvalidArgument = 4, + kIOError = 5 + }; + + Code code() const { + return (state_ == NULL) ? kOk : static_cast(state_[4]); + } + + Status(Code code, const Slice& msg, const Slice& msg2); + static const char* CopyState(const char* s); +}; + +inline Status::Status(const Status& s) { + state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); +} +inline void Status::operator=(const Status& s) { + // The following condition catches both aliasing (when this == &s), + // and the common case where both s and *this are ok. + if (state_ != s.state_) { + delete[] state_; + state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); + } +} + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_STATUS_H_ diff --git a/leveldb-library/leveldb/table.h b/leveldb-library/leveldb/table.h new file mode 100644 index 0000000..a9746c3 --- /dev/null +++ b/leveldb-library/leveldb/table.h @@ -0,0 +1,85 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_INCLUDE_TABLE_H_ +#define STORAGE_LEVELDB_INCLUDE_TABLE_H_ + +#include +#include "leveldb/iterator.h" + +namespace leveldb { + +class Block; +class BlockHandle; +class Footer; +struct Options; +class RandomAccessFile; +struct ReadOptions; +class TableCache; + +// A Table is a sorted map from strings to strings. Tables are +// immutable and persistent. A Table may be safely accessed from +// multiple threads without external synchronization. +class Table { + public: + // Attempt to open the table that is stored in bytes [0..file_size) + // of "file", and read the metadata entries necessary to allow + // retrieving data from the table. + // + // If successful, returns ok and sets "*table" to the newly opened + // table. The client should delete "*table" when no longer needed. + // If there was an error while initializing the table, sets "*table" + // to NULL and returns a non-ok status. Does not take ownership of + // "*source", but the client must ensure that "source" remains live + // for the duration of the returned table's lifetime. + // + // *file must remain live while this Table is in use. + static Status Open(const Options& options, + RandomAccessFile* file, + uint64_t file_size, + Table** table); + + ~Table(); + + // Returns a new iterator over the table contents. + // The result of NewIterator() is initially invalid (caller must + // call one of the Seek methods on the iterator before using it). + Iterator* NewIterator(const ReadOptions&) const; + + // Given a key, return an approximate byte offset in the file where + // the data for that key begins (or would begin if the key were + // present in the file). The returned value is in terms of file + // bytes, and so includes effects like compression of the underlying data. + // E.g., the approximate offset of the last key in the table will + // be close to the file length. + uint64_t ApproximateOffsetOf(const Slice& key) const; + + private: + struct Rep; + Rep* rep_; + + explicit Table(Rep* rep) { rep_ = rep; } + static Iterator* BlockReader(void*, const ReadOptions&, const Slice&); + + // Calls (*handle_result)(arg, ...) with the entry found after a call + // to Seek(key). May not make such a call if filter policy says + // that key is not present. + friend class TableCache; + Status InternalGet( + const ReadOptions&, const Slice& key, + void* arg, + void (*handle_result)(void* arg, const Slice& k, const Slice& v)); + + + void ReadMeta(const Footer& footer); + void ReadFilter(const Slice& filter_handle_value); + + // No copying allowed + Table(const Table&); + void operator=(const Table&); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_TABLE_H_ diff --git a/leveldb-library/leveldb/table_builder.h b/leveldb-library/leveldb/table_builder.h new file mode 100644 index 0000000..5fd1dc7 --- /dev/null +++ b/leveldb-library/leveldb/table_builder.h @@ -0,0 +1,92 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// TableBuilder provides the interface used to build a Table +// (an immutable and sorted map from keys to values). +// +// Multiple threads can invoke const methods on a TableBuilder without +// external synchronization, but if any of the threads may call a +// non-const method, all threads accessing the same TableBuilder must use +// external synchronization. + +#ifndef STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ +#define STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ + +#include +#include "leveldb/options.h" +#include "leveldb/status.h" + +namespace leveldb { + +class BlockBuilder; +class BlockHandle; +class WritableFile; + +class TableBuilder { + public: + // Create a builder that will store the contents of the table it is + // building in *file. Does not close the file. It is up to the + // caller to close the file after calling Finish(). + TableBuilder(const Options& options, WritableFile* file); + + // REQUIRES: Either Finish() or Abandon() has been called. + ~TableBuilder(); + + // Change the options used by this builder. Note: only some of the + // option fields can be changed after construction. If a field is + // not allowed to change dynamically and its value in the structure + // passed to the constructor is different from its value in the + // structure passed to this method, this method will return an error + // without changing any fields. + Status ChangeOptions(const Options& options); + + // Add key,value to the table being constructed. + // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: Finish(), Abandon() have not been called + void Add(const Slice& key, const Slice& value); + + // Advanced operation: flush any buffered key/value pairs to file. + // Can be used to ensure that two adjacent entries never live in + // the same data block. Most clients should not need to use this method. + // REQUIRES: Finish(), Abandon() have not been called + void Flush(); + + // Return non-ok iff some error has been detected. + Status status() const; + + // Finish building the table. Stops using the file passed to the + // constructor after this function returns. + // REQUIRES: Finish(), Abandon() have not been called + Status Finish(); + + // Indicate that the contents of this builder should be abandoned. Stops + // using the file passed to the constructor after this function returns. + // If the caller is not going to call Finish(), it must call Abandon() + // before destroying this builder. + // REQUIRES: Finish(), Abandon() have not been called + void Abandon(); + + // Number of calls to Add() so far. + uint64_t NumEntries() const; + + // Size of the file generated so far. If invoked after a successful + // Finish() call, returns the size of the final generated file. + uint64_t FileSize() const; + + private: + bool ok() const { return status().ok(); } + void WriteBlock(BlockBuilder* block, BlockHandle* handle); + void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle); + + struct Rep; + Rep* rep_; + + // No copying allowed + TableBuilder(const TableBuilder&); + void operator=(const TableBuilder&); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ diff --git a/leveldb-library/leveldb/write_batch.h b/leveldb-library/leveldb/write_batch.h new file mode 100644 index 0000000..ee9aab6 --- /dev/null +++ b/leveldb-library/leveldb/write_batch.h @@ -0,0 +1,64 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// WriteBatch holds a collection of updates to apply atomically to a DB. +// +// The updates are applied in the order in which they are added +// to the WriteBatch. For example, the value of "key" will be "v3" +// after the following batch is written: +// +// batch.Put("key", "v1"); +// batch.Delete("key"); +// batch.Put("key", "v2"); +// batch.Put("key", "v3"); +// +// Multiple threads can invoke const methods on a WriteBatch without +// external synchronization, but if any of the threads may call a +// non-const method, all threads accessing the same WriteBatch must use +// external synchronization. + +#ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ +#define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ + +#include +#include "leveldb/status.h" + +namespace leveldb { + +class Slice; + +class WriteBatch { + public: + WriteBatch(); + ~WriteBatch(); + + // Store the mapping "key->value" in the database. + void Put(const Slice& key, const Slice& value); + + // If the database contains a mapping for "key", erase it. Else do nothing. + void Delete(const Slice& key); + + // Clear all updates buffered in this batch. + void Clear(); + + // Support for iterating over the contents of a batch. + class Handler { + public: + virtual ~Handler(); + virtual void Put(const Slice& key, const Slice& value) = 0; + virtual void Delete(const Slice& key) = 0; + }; + Status Iterate(Handler* handler) const; + + private: + friend class WriteBatchInternal; + + std::string rep_; // See comment in write_batch.cc for the format of rep_ + + // Intentionally copyable +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ diff --git a/leveldb-library/port/atomic_pointer.h b/leveldb-library/port/atomic_pointer.h new file mode 100644 index 0000000..1c4c7aa --- /dev/null +++ b/leveldb-library/port/atomic_pointer.h @@ -0,0 +1,242 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +// AtomicPointer provides storage for a lock-free pointer. +// Platform-dependent implementation of AtomicPointer: +// - If the platform provides a cheap barrier, we use it with raw pointers +// - If is present (on newer versions of gcc, it is), we use +// a -based AtomicPointer. However we prefer the memory +// barrier based version, because at least on a gcc 4.4 32-bit build +// on linux, we have encountered a buggy implementation. +// Also, some implementations are much slower than a memory-barrier +// based implementation (~16ns for based acquire-load vs. ~1ns for +// a barrier based acquire-load). +// This code is based on atomicops-internals-* in Google's perftools: +// http://code.google.com/p/google-perftools/source/browse/#svn%2Ftrunk%2Fsrc%2Fbase + +#ifndef PORT_ATOMIC_POINTER_H_ +#define PORT_ATOMIC_POINTER_H_ + +#include +#ifdef LEVELDB_ATOMIC_PRESENT +#include +#endif +#ifdef OS_WIN +#include +#endif +#ifdef OS_MACOSX +#include +#endif + +#if defined(_M_X64) || defined(__x86_64__) +#define ARCH_CPU_X86_FAMILY 1 +#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) +#define ARCH_CPU_X86_FAMILY 1 +#elif defined(__ARMEL__) +#define ARCH_CPU_ARM_FAMILY 1 +#elif defined(__aarch64__) +#define ARCH_CPU_ARM64_FAMILY 1 +#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__) +#define ARCH_CPU_PPC_FAMILY 1 +#elif defined(__mips__) +#define ARCH_CPU_MIPS_FAMILY 1 +#endif + +namespace leveldb { +namespace port { + +// Define MemoryBarrier() if available +// Windows on x86 +#if defined(OS_WIN) && defined(COMPILER_MSVC) && defined(ARCH_CPU_X86_FAMILY) +// windows.h already provides a MemoryBarrier(void) macro +// http://msdn.microsoft.com/en-us/library/ms684208(v=vs.85).aspx +#define LEVELDB_HAVE_MEMORY_BARRIER + +// Mac OS +#elif defined(OS_MACOSX) +inline void MemoryBarrier() { + OSMemoryBarrier(); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + +// Gcc on x86 +#elif defined(ARCH_CPU_X86_FAMILY) && defined(__GNUC__) +inline void MemoryBarrier() { + // See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on + // this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering. + __asm__ __volatile__("" : : : "memory"); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + +// Sun Studio +#elif defined(ARCH_CPU_X86_FAMILY) && defined(__SUNPRO_CC) +inline void MemoryBarrier() { + // See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on + // this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering. + asm volatile("" : : : "memory"); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + +// ARM Linux +#elif defined(ARCH_CPU_ARM_FAMILY) && defined(__linux__) +typedef void (*LinuxKernelMemoryBarrierFunc)(void); +// The Linux ARM kernel provides a highly optimized device-specific memory +// barrier function at a fixed memory address that is mapped in every +// user-level process. +// +// This beats using CPU-specific instructions which are, on single-core +// devices, un-necessary and very costly (e.g. ARMv7-A "dmb" takes more +// than 180ns on a Cortex-A8 like the one on a Nexus One). Benchmarking +// shows that the extra function call cost is completely negligible on +// multi-core devices. +// +inline void MemoryBarrier() { + (*(LinuxKernelMemoryBarrierFunc)0xffff0fa0)(); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + +// ARM64 +#elif defined(ARCH_CPU_ARM64_FAMILY) +inline void MemoryBarrier() { + asm volatile("dmb sy" : : : "memory"); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + +// PPC +#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__) +inline void MemoryBarrier() { + // TODO for some powerpc expert: is there a cheaper suitable variant? + // Perhaps by having separate barriers for acquire and release ops. + asm volatile("sync" : : : "memory"); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + +// MIPS +#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(__GNUC__) +inline void MemoryBarrier() { + __asm__ __volatile__("sync" : : : "memory"); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + +#endif + +// AtomicPointer built using platform-specific MemoryBarrier() +#if defined(LEVELDB_HAVE_MEMORY_BARRIER) +class AtomicPointer { + private: + void* rep_; + public: + AtomicPointer() { } + explicit AtomicPointer(void* p) : rep_(p) {} + inline void* NoBarrier_Load() const { return rep_; } + inline void NoBarrier_Store(void* v) { rep_ = v; } + inline void* Acquire_Load() const { + void* result = rep_; + MemoryBarrier(); + return result; + } + inline void Release_Store(void* v) { + MemoryBarrier(); + rep_ = v; + } +}; + +// AtomicPointer based on +#elif defined(LEVELDB_ATOMIC_PRESENT) +class AtomicPointer { + private: + std::atomic rep_; + public: + AtomicPointer() { } + explicit AtomicPointer(void* v) : rep_(v) { } + inline void* Acquire_Load() const { + return rep_.load(std::memory_order_acquire); + } + inline void Release_Store(void* v) { + rep_.store(v, std::memory_order_release); + } + inline void* NoBarrier_Load() const { + return rep_.load(std::memory_order_relaxed); + } + inline void NoBarrier_Store(void* v) { + rep_.store(v, std::memory_order_relaxed); + } +}; + +// Atomic pointer based on sparc memory barriers +#elif defined(__sparcv9) && defined(__GNUC__) +class AtomicPointer { + private: + void* rep_; + public: + AtomicPointer() { } + explicit AtomicPointer(void* v) : rep_(v) { } + inline void* Acquire_Load() const { + void* val; + __asm__ __volatile__ ( + "ldx [%[rep_]], %[val] \n\t" + "membar #LoadLoad|#LoadStore \n\t" + : [val] "=r" (val) + : [rep_] "r" (&rep_) + : "memory"); + return val; + } + inline void Release_Store(void* v) { + __asm__ __volatile__ ( + "membar #LoadStore|#StoreStore \n\t" + "stx %[v], [%[rep_]] \n\t" + : + : [rep_] "r" (&rep_), [v] "r" (v) + : "memory"); + } + inline void* NoBarrier_Load() const { return rep_; } + inline void NoBarrier_Store(void* v) { rep_ = v; } +}; + +// Atomic pointer based on ia64 acq/rel +#elif defined(__ia64) && defined(__GNUC__) +class AtomicPointer { + private: + void* rep_; + public: + AtomicPointer() { } + explicit AtomicPointer(void* v) : rep_(v) { } + inline void* Acquire_Load() const { + void* val ; + __asm__ __volatile__ ( + "ld8.acq %[val] = [%[rep_]] \n\t" + : [val] "=r" (val) + : [rep_] "r" (&rep_) + : "memory" + ); + return val; + } + inline void Release_Store(void* v) { + __asm__ __volatile__ ( + "st8.rel [%[rep_]] = %[v] \n\t" + : + : [rep_] "r" (&rep_), [v] "r" (v) + : "memory" + ); + } + inline void* NoBarrier_Load() const { return rep_; } + inline void NoBarrier_Store(void* v) { rep_ = v; } +}; + +// We have neither MemoryBarrier(), nor +#else +#error Please implement AtomicPointer for this platform. + +#endif + +#undef LEVELDB_HAVE_MEMORY_BARRIER +#undef ARCH_CPU_X86_FAMILY +#undef ARCH_CPU_ARM_FAMILY +#undef ARCH_CPU_ARM64_FAMILY +#undef ARCH_CPU_PPC_FAMILY + +} // namespace port +} // namespace leveldb + +#endif // PORT_ATOMIC_POINTER_H_ diff --git a/leveldb-library/port/port.h b/leveldb-library/port/port.h new file mode 100644 index 0000000..e667db4 --- /dev/null +++ b/leveldb-library/port/port.h @@ -0,0 +1,19 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_PORT_PORT_H_ +#define STORAGE_LEVELDB_PORT_PORT_H_ + +#include + +// Include the appropriate platform specific file below. If you are +// porting to a new platform, see "port_example.h" for documentation +// of what the new port_.h file must provide. +#if defined(LEVELDB_PLATFORM_POSIX) +# include "port/port_posix.h" +#elif defined(LEVELDB_PLATFORM_CHROMIUM) +# include "port/port_chromium.h" +#endif + +#endif // STORAGE_LEVELDB_PORT_PORT_H_ diff --git a/leveldb-library/port/port_example.h b/leveldb-library/port/port_example.h new file mode 100644 index 0000000..97bd669 --- /dev/null +++ b/leveldb-library/port/port_example.h @@ -0,0 +1,141 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// This file contains the specification, but not the implementations, +// of the types/operations/etc. that should be defined by a platform +// specific port_.h file. Use this file as a reference for +// how to port this package to a new platform. + +#ifndef STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ +#define STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ + +namespace leveldb { +namespace port { + +// TODO(jorlow): Many of these belong more in the environment class rather than +// here. We should try moving them and see if it affects perf. + +// The following boolean constant must be true on a little-endian machine +// and false otherwise. +static const bool kLittleEndian = true /* or some other expression */; + +// ------------------ Threading ------------------- + +// A Mutex represents an exclusive lock. +class Mutex { + public: + Mutex(); + ~Mutex(); + + // Lock the mutex. Waits until other lockers have exited. + // Will deadlock if the mutex is already locked by this thread. + void Lock(); + + // Unlock the mutex. + // REQUIRES: This mutex was locked by this thread. + void Unlock(); + + // Optionally crash if this thread does not hold this mutex. + // The implementation must be fast, especially if NDEBUG is + // defined. The implementation is allowed to skip all checks. + void AssertHeld(); +}; + +class CondVar { + public: + explicit CondVar(Mutex* mu); + ~CondVar(); + + // Atomically release *mu and block on this condition variable until + // either a call to SignalAll(), or a call to Signal() that picks + // this thread to wakeup. + // REQUIRES: this thread holds *mu + void Wait(); + + // If there are some threads waiting, wake up at least one of them. + void Signal(); + + // Wake up all waiting threads. + void SignallAll(); +}; + +// Thread-safe initialization. +// Used as follows: +// static port::OnceType init_control = LEVELDB_ONCE_INIT; +// static void Initializer() { ... do something ...; } +// ... +// port::InitOnce(&init_control, &Initializer); +typedef intptr_t OnceType; +#define LEVELDB_ONCE_INIT 0 +extern void InitOnce(port::OnceType*, void (*initializer)()); + +// A type that holds a pointer that can be read or written atomically +// (i.e., without word-tearing.) +class AtomicPointer { + private: + intptr_t rep_; + public: + // Initialize to arbitrary value + AtomicPointer(); + + // Initialize to hold v + explicit AtomicPointer(void* v) : rep_(v) { } + + // Read and return the stored pointer with the guarantee that no + // later memory access (read or write) by this thread can be + // reordered ahead of this read. + void* Acquire_Load() const; + + // Set v as the stored pointer with the guarantee that no earlier + // memory access (read or write) by this thread can be reordered + // after this store. + void Release_Store(void* v); + + // Read the stored pointer with no ordering guarantees. + void* NoBarrier_Load() const; + + // Set va as the stored pointer with no ordering guarantees. + void NoBarrier_Store(void* v); +}; + +// ------------------ Compression ------------------- + +// Store the snappy compression of "input[0,input_length-1]" in *output. +// Returns false if snappy is not supported by this port. +extern bool Snappy_Compress(const char* input, size_t input_length, + std::string* output); + +// If input[0,input_length-1] looks like a valid snappy compressed +// buffer, store the size of the uncompressed data in *result and +// return true. Else return false. +extern bool Snappy_GetUncompressedLength(const char* input, size_t length, + size_t* result); + +// Attempt to snappy uncompress input[0,input_length-1] into *output. +// Returns true if successful, false if the input is invalid lightweight +// compressed data. +// +// REQUIRES: at least the first "n" bytes of output[] must be writable +// where "n" is the result of a successful call to +// Snappy_GetUncompressedLength. +extern bool Snappy_Uncompress(const char* input_data, size_t input_length, + char* output); + +// ------------------ Miscellaneous ------------------- + +// If heap profiling is not supported, returns false. +// Else repeatedly calls (*func)(arg, data, n) and then returns true. +// The concatenation of all "data[0,n-1]" fragments is the heap profile. +extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg); + +// Extend the CRC to include the first n bytes of buf. +// +// Returns zero if the CRC cannot be extended using acceleration, else returns +// the newly extended CRC value (which may also be zero). +uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size); + +} // namespace port +} // namespace leveldb + +#endif // STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ diff --git a/leveldb-library/port/port_posix.cc b/leveldb-library/port/port_posix.cc new file mode 100644 index 0000000..30e8007 --- /dev/null +++ b/leveldb-library/port/port_posix.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "port/port_posix.h" + +#include +#include +#include + +namespace leveldb { +namespace port { + +static void PthreadCall(const char* label, int result) { + if (result != 0) { + fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); + abort(); + } +} + +Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); } + +Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } + +void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); } + +void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } + +CondVar::CondVar(Mutex* mu) + : mu_(mu) { + PthreadCall("init cv", pthread_cond_init(&cv_, NULL)); +} + +CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); } + +void CondVar::Wait() { + PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_)); +} + +void CondVar::Signal() { + PthreadCall("signal", pthread_cond_signal(&cv_)); +} + +void CondVar::SignalAll() { + PthreadCall("broadcast", pthread_cond_broadcast(&cv_)); +} + +void InitOnce(OnceType* once, void (*initializer)()) { + PthreadCall("once", pthread_once(once, initializer)); +} + +} // namespace port +} // namespace leveldb diff --git a/leveldb-library/port/port_posix.h b/leveldb-library/port/port_posix.h new file mode 100644 index 0000000..d67ab68 --- /dev/null +++ b/leveldb-library/port/port_posix.h @@ -0,0 +1,156 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// See port_example.h for documentation for the following types/functions. + +#ifndef STORAGE_LEVELDB_PORT_PORT_POSIX_H_ +#define STORAGE_LEVELDB_PORT_PORT_POSIX_H_ + +#undef PLATFORM_IS_LITTLE_ENDIAN +#if defined(OS_MACOSX) + #include + #if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER) + #define PLATFORM_IS_LITTLE_ENDIAN \ + (__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN) + #endif +#elif defined(OS_SOLARIS) + #include + #ifdef _LITTLE_ENDIAN + #define PLATFORM_IS_LITTLE_ENDIAN true + #else + #define PLATFORM_IS_LITTLE_ENDIAN false + #endif +#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) ||\ + defined(OS_NETBSD) || defined(OS_DRAGONFLYBSD) + #include + #include + #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) +#elif defined(OS_HPUX) + #define PLATFORM_IS_LITTLE_ENDIAN false +#elif defined(OS_ANDROID) + // Due to a bug in the NDK x86 definition, + // _BYTE_ORDER must be used instead of __BYTE_ORDER on Android. + // See http://code.google.com/p/android/issues/detail?id=39824 + #include + #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) +#else + #include +#endif + +#include +#ifdef SNAPPY +#include +#endif +#include +#include +#include "port/atomic_pointer.h" + +#ifndef PLATFORM_IS_LITTLE_ENDIAN +#define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) +#endif + +#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\ + defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\ + defined(OS_ANDROID) || defined(OS_HPUX) || defined(CYGWIN) +// Use fread/fwrite/fflush on platforms without _unlocked variants +#define fread_unlocked fread +#define fwrite_unlocked fwrite +#define fflush_unlocked fflush +#endif + +#if defined(OS_MACOSX) || defined(OS_FREEBSD) ||\ + defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) +// Use fsync() on platforms without fdatasync() +#define fdatasync fsync +#endif + +#if defined(OS_ANDROID) && __ANDROID_API__ < 9 +// fdatasync() was only introduced in API level 9 on Android. Use fsync() +// when targetting older platforms. +#define fdatasync fsync +#endif + +namespace leveldb { +namespace port { + +static const bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN; +#undef PLATFORM_IS_LITTLE_ENDIAN + +class CondVar; + +class Mutex { + public: + Mutex(); + ~Mutex(); + + void Lock(); + void Unlock(); + void AssertHeld() { } + + private: + friend class CondVar; + pthread_mutex_t mu_; + + // No copying + Mutex(const Mutex&); + void operator=(const Mutex&); +}; + +class CondVar { + public: + explicit CondVar(Mutex* mu); + ~CondVar(); + void Wait(); + void Signal(); + void SignalAll(); + private: + pthread_cond_t cv_; + Mutex* mu_; +}; + +typedef pthread_once_t OnceType; +#define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT +extern void InitOnce(OnceType* once, void (*initializer)()); + +inline bool Snappy_Compress(const char* input, size_t length, + ::std::string* output) { +#ifdef SNAPPY + output->resize(snappy::MaxCompressedLength(length)); + size_t outlen; + snappy::RawCompress(input, length, &(*output)[0], &outlen); + output->resize(outlen); + return true; +#endif + + return false; +} + +inline bool Snappy_GetUncompressedLength(const char* input, size_t length, + size_t* result) { +#ifdef SNAPPY + return snappy::GetUncompressedLength(input, length, result); +#else + return false; +#endif +} + +inline bool Snappy_Uncompress(const char* input, size_t length, + char* output) { +#ifdef SNAPPY + return snappy::RawUncompress(input, length, output); +#else + return false; +#endif +} + +inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { + return false; +} + +uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size); + +} // namespace port +} // namespace leveldb + +#endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_ diff --git a/leveldb-library/port/port_posix_sse.cc b/leveldb-library/port/port_posix_sse.cc new file mode 100644 index 0000000..1e519ba --- /dev/null +++ b/leveldb-library/port/port_posix_sse.cc @@ -0,0 +1,129 @@ +// Copyright 2016 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// A portable implementation of crc32c, optimized to handle +// four bytes at a time. +// +// In a separate source file to allow this accelerated CRC32C function to be +// compiled with the appropriate compiler flags to enable x86 SSE 4.2 +// instructions. + +#include +#include +#include "port/port.h" + +#if defined(LEVELDB_PLATFORM_POSIX_SSE) + +#if defined(_MSC_VER) +#include +#elif defined(__GNUC__) && defined(__SSE4_2__) +#include +#include +#endif + +#endif // defined(LEVELDB_PLATFORM_POSIX_SSE) + +namespace leveldb { +namespace port { + +#if defined(LEVELDB_PLATFORM_POSIX_SSE) + +// Used to fetch a naturally-aligned 32-bit word in little endian byte-order +static inline uint32_t LE_LOAD32(const uint8_t *p) { + // SSE is x86 only, so ensured that |p| is always little-endian. + uint32_t word; + memcpy(&word, p, sizeof(word)); + return word; +} + +#if defined(_M_X64) || defined(__x86_64__) // LE_LOAD64 is only used on x64. + +// Used to fetch a naturally-aligned 64-bit word in little endian byte-order +static inline uint64_t LE_LOAD64(const uint8_t *p) { + uint64_t dword; + memcpy(&dword, p, sizeof(dword)); + return dword; +} + +#endif // defined(_M_X64) || defined(__x86_64__) + +static inline bool HaveSSE42() { +#if defined(_MSC_VER) + int cpu_info[4]; + __cpuid(cpu_info, 1); + return (cpu_info[2] & (1 << 20)) != 0; +#elif defined(__GNUC__) + unsigned int eax, ebx, ecx, edx; + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + return (ecx & (1 << 20)) != 0; +#else + return false; +#endif +} + +#endif // defined(LEVELDB_PLATFORM_POSIX_SSE) + +// For further improvements see Intel publication at: +// http://download.intel.com/design/intarch/papers/323405.pdf +uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) { +#if !defined(LEVELDB_PLATFORM_POSIX_SSE) + return 0; +#else + static bool have = HaveSSE42(); + if (!have) { + return 0; + } + + const uint8_t *p = reinterpret_cast(buf); + const uint8_t *e = p + size; + uint32_t l = crc ^ 0xffffffffu; + +#define STEP1 do { \ + l = _mm_crc32_u8(l, *p++); \ +} while (0) +#define STEP4 do { \ + l = _mm_crc32_u32(l, LE_LOAD32(p)); \ + p += 4; \ +} while (0) +#define STEP8 do { \ + l = _mm_crc32_u64(l, LE_LOAD64(p)); \ + p += 8; \ +} while (0) + + if (size > 16) { + // Process unaligned bytes + for (unsigned int i = reinterpret_cast(p) % 8; i; --i) { + STEP1; + } + + // _mm_crc32_u64 is only available on x64. +#if defined(_M_X64) || defined(__x86_64__) + // Process 8 bytes at a time + while ((e-p) >= 8) { + STEP8; + } + // Process 4 bytes at a time + if ((e-p) >= 4) { + STEP4; + } +#else // !(defined(_M_X64) || defined(__x86_64__)) + // Process 4 bytes at a time + while ((e-p) >= 4) { + STEP4; + } +#endif // defined(_M_X64) || defined(__x86_64__) + } + // Process the last few bytes + while (p != e) { + STEP1; + } +#undef STEP8 +#undef STEP4 +#undef STEP1 + return l ^ 0xffffffffu; +#endif // defined(LEVELDB_PLATFORM_POSIX_SSE) +} + +} // namespace port +} // namespace leveldb diff --git a/leveldb-library/port/thread_annotations.h b/leveldb-library/port/thread_annotations.h new file mode 100644 index 0000000..9470ef5 --- /dev/null +++ b/leveldb-library/port/thread_annotations.h @@ -0,0 +1,60 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_ +#define STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_ + +// Some environments provide custom macros to aid in static thread-safety +// analysis. Provide empty definitions of such macros unless they are already +// defined. + +#ifndef EXCLUSIVE_LOCKS_REQUIRED +#define EXCLUSIVE_LOCKS_REQUIRED(...) +#endif + +#ifndef SHARED_LOCKS_REQUIRED +#define SHARED_LOCKS_REQUIRED(...) +#endif + +#ifndef LOCKS_EXCLUDED +#define LOCKS_EXCLUDED(...) +#endif + +#ifndef LOCK_RETURNED +#define LOCK_RETURNED(x) +#endif + +#ifndef LOCKABLE +#define LOCKABLE +#endif + +#ifndef SCOPED_LOCKABLE +#define SCOPED_LOCKABLE +#endif + +#ifndef EXCLUSIVE_LOCK_FUNCTION +#define EXCLUSIVE_LOCK_FUNCTION(...) +#endif + +#ifndef SHARED_LOCK_FUNCTION +#define SHARED_LOCK_FUNCTION(...) +#endif + +#ifndef EXCLUSIVE_TRYLOCK_FUNCTION +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) +#endif + +#ifndef SHARED_TRYLOCK_FUNCTION +#define SHARED_TRYLOCK_FUNCTION(...) +#endif + +#ifndef UNLOCK_FUNCTION +#define UNLOCK_FUNCTION(...) +#endif + +#ifndef NO_THREAD_SAFETY_ANALYSIS +#define NO_THREAD_SAFETY_ANALYSIS +#endif + +#endif // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H_ diff --git a/leveldb-library/table/block.cc b/leveldb-library/table/block.cc new file mode 100644 index 0000000..43e402c --- /dev/null +++ b/leveldb-library/table/block.cc @@ -0,0 +1,268 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Decodes the blocks generated by block_builder.cc. + +#include "table/block.h" + +#include +#include +#include "leveldb/comparator.h" +#include "table/format.h" +#include "util/coding.h" +#include "util/logging.h" + +namespace leveldb { + +inline uint32_t Block::NumRestarts() const { + assert(size_ >= sizeof(uint32_t)); + return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); +} + +Block::Block(const BlockContents& contents) + : data_(contents.data.data()), + size_(contents.data.size()), + owned_(contents.heap_allocated) { + if (size_ < sizeof(uint32_t)) { + size_ = 0; // Error marker + } else { + size_t max_restarts_allowed = (size_-sizeof(uint32_t)) / sizeof(uint32_t); + if (NumRestarts() > max_restarts_allowed) { + // The size is too small for NumRestarts() + size_ = 0; + } else { + restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); + } + } +} + +Block::~Block() { + if (owned_) { + delete[] data_; + } +} + +// Helper routine: decode the next block entry starting at "p", +// storing the number of shared key bytes, non_shared key bytes, +// and the length of the value in "*shared", "*non_shared", and +// "*value_length", respectively. Will not dereference past "limit". +// +// If any errors are detected, returns NULL. Otherwise, returns a +// pointer to the key delta (just past the three decoded values). +static inline const char* DecodeEntry(const char* p, const char* limit, + uint32_t* shared, + uint32_t* non_shared, + uint32_t* value_length) { + if (limit - p < 3) return NULL; + *shared = reinterpret_cast(p)[0]; + *non_shared = reinterpret_cast(p)[1]; + *value_length = reinterpret_cast(p)[2]; + if ((*shared | *non_shared | *value_length) < 128) { + // Fast path: all three values are encoded in one byte each + p += 3; + } else { + if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL; + if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL; + if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL; + } + + if (static_cast(limit - p) < (*non_shared + *value_length)) { + return NULL; + } + return p; +} + +class Block::Iter : public Iterator { + private: + const Comparator* const comparator_; + const char* const data_; // underlying block contents + uint32_t const restarts_; // Offset of restart array (list of fixed32) + uint32_t const num_restarts_; // Number of uint32_t entries in restart array + + // current_ is offset in data_ of current entry. >= restarts_ if !Valid + uint32_t current_; + uint32_t restart_index_; // Index of restart block in which current_ falls + std::string key_; + Slice value_; + Status status_; + + inline int Compare(const Slice& a, const Slice& b) const { + return comparator_->Compare(a, b); + } + + // Return the offset in data_ just past the end of the current entry. + inline uint32_t NextEntryOffset() const { + return (value_.data() + value_.size()) - data_; + } + + uint32_t GetRestartPoint(uint32_t index) { + assert(index < num_restarts_); + return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); + } + + void SeekToRestartPoint(uint32_t index) { + key_.clear(); + restart_index_ = index; + // current_ will be fixed by ParseNextKey(); + + // ParseNextKey() starts at the end of value_, so set value_ accordingly + uint32_t offset = GetRestartPoint(index); + value_ = Slice(data_ + offset, 0); + } + + public: + Iter(const Comparator* comparator, + const char* data, + uint32_t restarts, + uint32_t num_restarts) + : comparator_(comparator), + data_(data), + restarts_(restarts), + num_restarts_(num_restarts), + current_(restarts_), + restart_index_(num_restarts_) { + assert(num_restarts_ > 0); + } + + virtual bool Valid() const { return current_ < restarts_; } + virtual Status status() const { return status_; } + virtual Slice key() const { + assert(Valid()); + return key_; + } + virtual Slice value() const { + assert(Valid()); + return value_; + } + + virtual void Next() { + assert(Valid()); + ParseNextKey(); + } + + virtual void Prev() { + assert(Valid()); + + // Scan backwards to a restart point before current_ + const uint32_t original = current_; + while (GetRestartPoint(restart_index_) >= original) { + if (restart_index_ == 0) { + // No more entries + current_ = restarts_; + restart_index_ = num_restarts_; + return; + } + restart_index_--; + } + + SeekToRestartPoint(restart_index_); + do { + // Loop until end of current entry hits the start of original entry + } while (ParseNextKey() && NextEntryOffset() < original); + } + + virtual void Seek(const Slice& target) { + // Binary search in restart array to find the last restart point + // with a key < target + uint32_t left = 0; + uint32_t right = num_restarts_ - 1; + while (left < right) { + uint32_t mid = (left + right + 1) / 2; + uint32_t region_offset = GetRestartPoint(mid); + uint32_t shared, non_shared, value_length; + const char* key_ptr = DecodeEntry(data_ + region_offset, + data_ + restarts_, + &shared, &non_shared, &value_length); + if (key_ptr == NULL || (shared != 0)) { + CorruptionError(); + return; + } + Slice mid_key(key_ptr, non_shared); + if (Compare(mid_key, target) < 0) { + // Key at "mid" is smaller than "target". Therefore all + // blocks before "mid" are uninteresting. + left = mid; + } else { + // Key at "mid" is >= "target". Therefore all blocks at or + // after "mid" are uninteresting. + right = mid - 1; + } + } + + // Linear search (within restart block) for first key >= target + SeekToRestartPoint(left); + while (true) { + if (!ParseNextKey()) { + return; + } + if (Compare(key_, target) >= 0) { + return; + } + } + } + + virtual void SeekToFirst() { + SeekToRestartPoint(0); + ParseNextKey(); + } + + virtual void SeekToLast() { + SeekToRestartPoint(num_restarts_ - 1); + while (ParseNextKey() && NextEntryOffset() < restarts_) { + // Keep skipping + } + } + + private: + void CorruptionError() { + current_ = restarts_; + restart_index_ = num_restarts_; + status_ = Status::Corruption("bad entry in block"); + key_.clear(); + value_.clear(); + } + + bool ParseNextKey() { + current_ = NextEntryOffset(); + const char* p = data_ + current_; + const char* limit = data_ + restarts_; // Restarts come right after data + if (p >= limit) { + // No more entries to return. Mark as invalid. + current_ = restarts_; + restart_index_ = num_restarts_; + return false; + } + + // Decode next entry + uint32_t shared, non_shared, value_length; + p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); + if (p == NULL || key_.size() < shared) { + CorruptionError(); + return false; + } else { + key_.resize(shared); + key_.append(p, non_shared); + value_ = Slice(p + non_shared, value_length); + while (restart_index_ + 1 < num_restarts_ && + GetRestartPoint(restart_index_ + 1) < current_) { + ++restart_index_; + } + return true; + } + } +}; + +Iterator* Block::NewIterator(const Comparator* cmp) { + if (size_ < sizeof(uint32_t)) { + return NewErrorIterator(Status::Corruption("bad block contents")); + } + const uint32_t num_restarts = NumRestarts(); + if (num_restarts == 0) { + return NewEmptyIterator(); + } else { + return new Iter(cmp, data_, restart_offset_, num_restarts); + } +} + +} // namespace leveldb diff --git a/leveldb-library/table/block.h b/leveldb-library/table/block.h new file mode 100644 index 0000000..2493eb9 --- /dev/null +++ b/leveldb-library/table/block.h @@ -0,0 +1,44 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_TABLE_BLOCK_H_ +#define STORAGE_LEVELDB_TABLE_BLOCK_H_ + +#include +#include +#include "leveldb/iterator.h" + +namespace leveldb { + +struct BlockContents; +class Comparator; + +class Block { + public: + // Initialize the block with the specified contents. + explicit Block(const BlockContents& contents); + + ~Block(); + + size_t size() const { return size_; } + Iterator* NewIterator(const Comparator* comparator); + + private: + uint32_t NumRestarts() const; + + const char* data_; + size_t size_; + uint32_t restart_offset_; // Offset in data_ of restart array + bool owned_; // Block owns data_[] + + // No copying allowed + Block(const Block&); + void operator=(const Block&); + + class Iter; +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_TABLE_BLOCK_H_ diff --git a/leveldb-library/table/block_builder.cc b/leveldb-library/table/block_builder.cc new file mode 100644 index 0000000..db660cd --- /dev/null +++ b/leveldb-library/table/block_builder.cc @@ -0,0 +1,109 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// BlockBuilder generates blocks where keys are prefix-compressed: +// +// When we store a key, we drop the prefix shared with the previous +// string. This helps reduce the space requirement significantly. +// Furthermore, once every K keys, we do not apply the prefix +// compression and store the entire key. We call this a "restart +// point". The tail end of the block stores the offsets of all of the +// restart points, and can be used to do a binary search when looking +// for a particular key. Values are stored as-is (without compression) +// immediately following the corresponding key. +// +// An entry for a particular key-value pair has the form: +// shared_bytes: varint32 +// unshared_bytes: varint32 +// value_length: varint32 +// key_delta: char[unshared_bytes] +// value: char[value_length] +// shared_bytes == 0 for restart points. +// +// The trailer of the block has the form: +// restarts: uint32[num_restarts] +// num_restarts: uint32 +// restarts[i] contains the offset within the block of the ith restart point. + +#include "table/block_builder.h" + +#include +#include +#include "leveldb/comparator.h" +#include "leveldb/table_builder.h" +#include "util/coding.h" + +namespace leveldb { + +BlockBuilder::BlockBuilder(const Options* options) + : options_(options), + restarts_(), + counter_(0), + finished_(false) { + assert(options->block_restart_interval >= 1); + restarts_.push_back(0); // First restart point is at offset 0 +} + +void BlockBuilder::Reset() { + buffer_.clear(); + restarts_.clear(); + restarts_.push_back(0); // First restart point is at offset 0 + counter_ = 0; + finished_ = false; + last_key_.clear(); +} + +size_t BlockBuilder::CurrentSizeEstimate() const { + return (buffer_.size() + // Raw data buffer + restarts_.size() * sizeof(uint32_t) + // Restart array + sizeof(uint32_t)); // Restart array length +} + +Slice BlockBuilder::Finish() { + // Append restart array + for (size_t i = 0; i < restarts_.size(); i++) { + PutFixed32(&buffer_, restarts_[i]); + } + PutFixed32(&buffer_, restarts_.size()); + finished_ = true; + return Slice(buffer_); +} + +void BlockBuilder::Add(const Slice& key, const Slice& value) { + Slice last_key_piece(last_key_); + assert(!finished_); + assert(counter_ <= options_->block_restart_interval); + assert(buffer_.empty() // No values yet? + || options_->comparator->Compare(key, last_key_piece) > 0); + size_t shared = 0; + if (counter_ < options_->block_restart_interval) { + // See how much sharing to do with previous string + const size_t min_length = std::min(last_key_piece.size(), key.size()); + while ((shared < min_length) && (last_key_piece[shared] == key[shared])) { + shared++; + } + } else { + // Restart compression + restarts_.push_back(buffer_.size()); + counter_ = 0; + } + const size_t non_shared = key.size() - shared; + + // Add "" to buffer_ + PutVarint32(&buffer_, shared); + PutVarint32(&buffer_, non_shared); + PutVarint32(&buffer_, value.size()); + + // Add string delta to buffer_ followed by value + buffer_.append(key.data() + shared, non_shared); + buffer_.append(value.data(), value.size()); + + // Update state + last_key_.resize(shared); + last_key_.append(key.data() + shared, non_shared); + assert(Slice(last_key_) == key); + counter_++; +} + +} // namespace leveldb diff --git a/leveldb-library/table/block_builder.h b/leveldb-library/table/block_builder.h new file mode 100644 index 0000000..4fbcb33 --- /dev/null +++ b/leveldb-library/table/block_builder.h @@ -0,0 +1,57 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ +#define STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ + +#include + +#include +#include "leveldb/slice.h" + +namespace leveldb { + +struct Options; + +class BlockBuilder { + public: + explicit BlockBuilder(const Options* options); + + // Reset the contents as if the BlockBuilder was just constructed. + void Reset(); + + // REQUIRES: Finish() has not been called since the last call to Reset(). + // REQUIRES: key is larger than any previously added key + void Add(const Slice& key, const Slice& value); + + // Finish building the block and return a slice that refers to the + // block contents. The returned slice will remain valid for the + // lifetime of this builder or until Reset() is called. + Slice Finish(); + + // Returns an estimate of the current (uncompressed) size of the block + // we are building. + size_t CurrentSizeEstimate() const; + + // Return true iff no entries have been added since the last Reset() + bool empty() const { + return buffer_.empty(); + } + + private: + const Options* options_; + std::string buffer_; // Destination buffer + std::vector restarts_; // Restart points + int counter_; // Number of entries emitted since restart + bool finished_; // Has Finish() been called? + std::string last_key_; + + // No copying allowed + BlockBuilder(const BlockBuilder&); + void operator=(const BlockBuilder&); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ diff --git a/leveldb-library/table/filter_block.cc b/leveldb-library/table/filter_block.cc new file mode 100644 index 0000000..1ed5134 --- /dev/null +++ b/leveldb-library/table/filter_block.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "table/filter_block.h" + +#include "leveldb/filter_policy.h" +#include "util/coding.h" + +namespace leveldb { + +// See doc/table_format.md for an explanation of the filter block format. + +// Generate new filter every 2KB of data +static const size_t kFilterBaseLg = 11; +static const size_t kFilterBase = 1 << kFilterBaseLg; + +FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) + : policy_(policy) { +} + +void FilterBlockBuilder::StartBlock(uint64_t block_offset) { + uint64_t filter_index = (block_offset / kFilterBase); + assert(filter_index >= filter_offsets_.size()); + while (filter_index > filter_offsets_.size()) { + GenerateFilter(); + } +} + +void FilterBlockBuilder::AddKey(const Slice& key) { + Slice k = key; + start_.push_back(keys_.size()); + keys_.append(k.data(), k.size()); +} + +Slice FilterBlockBuilder::Finish() { + if (!start_.empty()) { + GenerateFilter(); + } + + // Append array of per-filter offsets + const uint32_t array_offset = result_.size(); + for (size_t i = 0; i < filter_offsets_.size(); i++) { + PutFixed32(&result_, filter_offsets_[i]); + } + + PutFixed32(&result_, array_offset); + result_.push_back(kFilterBaseLg); // Save encoding parameter in result + return Slice(result_); +} + +void FilterBlockBuilder::GenerateFilter() { + const size_t num_keys = start_.size(); + if (num_keys == 0) { + // Fast path if there are no keys for this filter + filter_offsets_.push_back(result_.size()); + return; + } + + // Make list of keys from flattened key structure + start_.push_back(keys_.size()); // Simplify length computation + tmp_keys_.resize(num_keys); + for (size_t i = 0; i < num_keys; i++) { + const char* base = keys_.data() + start_[i]; + size_t length = start_[i+1] - start_[i]; + tmp_keys_[i] = Slice(base, length); + } + + // Generate filter for current set of keys and append to result_. + filter_offsets_.push_back(result_.size()); + policy_->CreateFilter(&tmp_keys_[0], static_cast(num_keys), &result_); + + tmp_keys_.clear(); + keys_.clear(); + start_.clear(); +} + +FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, + const Slice& contents) + : policy_(policy), + data_(NULL), + offset_(NULL), + num_(0), + base_lg_(0) { + size_t n = contents.size(); + if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array + base_lg_ = contents[n-1]; + uint32_t last_word = DecodeFixed32(contents.data() + n - 5); + if (last_word > n - 5) return; + data_ = contents.data(); + offset_ = data_ + last_word; + num_ = (n - 5 - last_word) / 4; +} + +bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) { + uint64_t index = block_offset >> base_lg_; + if (index < num_) { + uint32_t start = DecodeFixed32(offset_ + index*4); + uint32_t limit = DecodeFixed32(offset_ + index*4 + 4); + if (start <= limit && limit <= static_cast(offset_ - data_)) { + Slice filter = Slice(data_ + start, limit - start); + return policy_->KeyMayMatch(key, filter); + } else if (start == limit) { + // Empty filters do not match any keys + return false; + } + } + return true; // Errors are treated as potential matches +} + +} diff --git a/leveldb-library/table/filter_block.h b/leveldb-library/table/filter_block.h new file mode 100644 index 0000000..c67d010 --- /dev/null +++ b/leveldb-library/table/filter_block.h @@ -0,0 +1,68 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// A filter block is stored near the end of a Table file. It contains +// filters (e.g., bloom filters) for all data blocks in the table combined +// into a single filter block. + +#ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ +#define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ + +#include +#include +#include +#include +#include "leveldb/slice.h" +#include "util/hash.h" + +namespace leveldb { + +class FilterPolicy; + +// A FilterBlockBuilder is used to construct all of the filters for a +// particular Table. It generates a single string which is stored as +// a special block in the Table. +// +// The sequence of calls to FilterBlockBuilder must match the regexp: +// (StartBlock AddKey*)* Finish +class FilterBlockBuilder { + public: + explicit FilterBlockBuilder(const FilterPolicy*); + + void StartBlock(uint64_t block_offset); + void AddKey(const Slice& key); + Slice Finish(); + + private: + void GenerateFilter(); + + const FilterPolicy* policy_; + std::string keys_; // Flattened key contents + std::vector start_; // Starting index in keys_ of each key + std::string result_; // Filter data computed so far + std::vector tmp_keys_; // policy_->CreateFilter() argument + std::vector filter_offsets_; + + // No copying allowed + FilterBlockBuilder(const FilterBlockBuilder&); + void operator=(const FilterBlockBuilder&); +}; + +class FilterBlockReader { + public: + // REQUIRES: "contents" and *policy must stay live while *this is live. + FilterBlockReader(const FilterPolicy* policy, const Slice& contents); + bool KeyMayMatch(uint64_t block_offset, const Slice& key); + + private: + const FilterPolicy* policy_; + const char* data_; // Pointer to filter data (at block-start) + const char* offset_; // Pointer to beginning of offset array (at block-end) + size_t num_; // Number of entries in offset array + size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file) +}; + +} + +#endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ diff --git a/leveldb-library/table/format.cc b/leveldb-library/table/format.cc new file mode 100644 index 0000000..24e4e02 --- /dev/null +++ b/leveldb-library/table/format.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "table/format.h" + +#include "leveldb/env.h" +#include "port/port.h" +#include "table/block.h" +#include "util/coding.h" +#include "util/crc32c.h" + +namespace leveldb { + +void BlockHandle::EncodeTo(std::string* dst) const { + // Sanity check that all fields have been set + assert(offset_ != ~static_cast(0)); + assert(size_ != ~static_cast(0)); + PutVarint64(dst, offset_); + PutVarint64(dst, size_); +} + +Status BlockHandle::DecodeFrom(Slice* input) { + if (GetVarint64(input, &offset_) && + GetVarint64(input, &size_)) { + return Status::OK(); + } else { + return Status::Corruption("bad block handle"); + } +} + +void Footer::EncodeTo(std::string* dst) const { + const size_t original_size = dst->size(); + metaindex_handle_.EncodeTo(dst); + index_handle_.EncodeTo(dst); + dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding + PutFixed32(dst, static_cast(kTableMagicNumber & 0xffffffffu)); + PutFixed32(dst, static_cast(kTableMagicNumber >> 32)); + assert(dst->size() == original_size + kEncodedLength); + (void)original_size; // Disable unused variable warning. +} + +Status Footer::DecodeFrom(Slice* input) { + const char* magic_ptr = input->data() + kEncodedLength - 8; + const uint32_t magic_lo = DecodeFixed32(magic_ptr); + const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4); + const uint64_t magic = ((static_cast(magic_hi) << 32) | + (static_cast(magic_lo))); + if (magic != kTableMagicNumber) { + return Status::Corruption("not an sstable (bad magic number)"); + } + + Status result = metaindex_handle_.DecodeFrom(input); + if (result.ok()) { + result = index_handle_.DecodeFrom(input); + } + if (result.ok()) { + // We skip over any leftover data (just padding for now) in "input" + const char* end = magic_ptr + 8; + *input = Slice(end, input->data() + input->size() - end); + } + return result; +} + +Status ReadBlock(RandomAccessFile* file, + const ReadOptions& options, + const BlockHandle& handle, + BlockContents* result) { + result->data = Slice(); + result->cachable = false; + result->heap_allocated = false; + + // Read the block contents as well as the type/crc footer. + // See table_builder.cc for the code that built this structure. + size_t n = static_cast(handle.size()); + char* buf = new char[n + kBlockTrailerSize]; + Slice contents; + Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf); + if (!s.ok()) { + delete[] buf; + return s; + } + if (contents.size() != n + kBlockTrailerSize) { + delete[] buf; + return Status::Corruption("truncated block read"); + } + + // Check the crc of the type and the block contents + const char* data = contents.data(); // Pointer to where Read put the data + if (options.verify_checksums) { + const uint32_t crc = crc32c::Unmask(DecodeFixed32(data + n + 1)); + const uint32_t actual = crc32c::Value(data, n + 1); + if (actual != crc) { + delete[] buf; + s = Status::Corruption("block checksum mismatch"); + return s; + } + } + + switch (data[n]) { + case kNoCompression: + if (data != buf) { + // File implementation gave us pointer to some other data. + // Use it directly under the assumption that it will be live + // while the file is open. + delete[] buf; + result->data = Slice(data, n); + result->heap_allocated = false; + result->cachable = false; // Do not double-cache + } else { + result->data = Slice(buf, n); + result->heap_allocated = true; + result->cachable = true; + } + + // Ok + break; + case kSnappyCompression: { + size_t ulength = 0; + if (!port::Snappy_GetUncompressedLength(data, n, &ulength)) { + delete[] buf; + return Status::Corruption("corrupted compressed block contents"); + } + char* ubuf = new char[ulength]; + if (!port::Snappy_Uncompress(data, n, ubuf)) { + delete[] buf; + delete[] ubuf; + return Status::Corruption("corrupted compressed block contents"); + } + delete[] buf; + result->data = Slice(ubuf, ulength); + result->heap_allocated = true; + result->cachable = true; + break; + } + default: + delete[] buf; + return Status::Corruption("bad block type"); + } + + return Status::OK(); +} + +} // namespace leveldb diff --git a/leveldb-library/table/format.h b/leveldb-library/table/format.h new file mode 100644 index 0000000..6c0b80c --- /dev/null +++ b/leveldb-library/table/format.h @@ -0,0 +1,108 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_TABLE_FORMAT_H_ +#define STORAGE_LEVELDB_TABLE_FORMAT_H_ + +#include +#include +#include "leveldb/slice.h" +#include "leveldb/status.h" +#include "leveldb/table_builder.h" + +namespace leveldb { + +class Block; +class RandomAccessFile; +struct ReadOptions; + +// BlockHandle is a pointer to the extent of a file that stores a data +// block or a meta block. +class BlockHandle { + public: + BlockHandle(); + + // The offset of the block in the file. + uint64_t offset() const { return offset_; } + void set_offset(uint64_t offset) { offset_ = offset; } + + // The size of the stored block + uint64_t size() const { return size_; } + void set_size(uint64_t size) { size_ = size; } + + void EncodeTo(std::string* dst) const; + Status DecodeFrom(Slice* input); + + // Maximum encoding length of a BlockHandle + enum { kMaxEncodedLength = 10 + 10 }; + + private: + uint64_t offset_; + uint64_t size_; +}; + +// Footer encapsulates the fixed information stored at the tail +// end of every table file. +class Footer { + public: + Footer() { } + + // The block handle for the metaindex block of the table + const BlockHandle& metaindex_handle() const { return metaindex_handle_; } + void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } + + // The block handle for the index block of the table + const BlockHandle& index_handle() const { + return index_handle_; + } + void set_index_handle(const BlockHandle& h) { + index_handle_ = h; + } + + void EncodeTo(std::string* dst) const; + Status DecodeFrom(Slice* input); + + // Encoded length of a Footer. Note that the serialization of a + // Footer will always occupy exactly this many bytes. It consists + // of two block handles and a magic number. + enum { + kEncodedLength = 2*BlockHandle::kMaxEncodedLength + 8 + }; + + private: + BlockHandle metaindex_handle_; + BlockHandle index_handle_; +}; + +// kTableMagicNumber was picked by running +// echo http://code.google.com/p/leveldb/ | sha1sum +// and taking the leading 64 bits. +static const uint64_t kTableMagicNumber = 0xdb4775248b80fb57ull; + +// 1-byte type + 32-bit crc +static const size_t kBlockTrailerSize = 5; + +struct BlockContents { + Slice data; // Actual contents of data + bool cachable; // True iff data can be cached + bool heap_allocated; // True iff caller should delete[] data.data() +}; + +// Read the block identified by "handle" from "file". On failure +// return non-OK. On success fill *result and return OK. +extern Status ReadBlock(RandomAccessFile* file, + const ReadOptions& options, + const BlockHandle& handle, + BlockContents* result); + +// Implementation details follow. Clients should ignore, + +inline BlockHandle::BlockHandle() + : offset_(~static_cast(0)), + size_(~static_cast(0)) { +} + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_TABLE_FORMAT_H_ diff --git a/leveldb-library/table/iterator.cc b/leveldb-library/table/iterator.cc new file mode 100644 index 0000000..3d1c87f --- /dev/null +++ b/leveldb-library/table/iterator.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/iterator.h" + +namespace leveldb { + +Iterator::Iterator() { + cleanup_.function = NULL; + cleanup_.next = NULL; +} + +Iterator::~Iterator() { + if (cleanup_.function != NULL) { + (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); + for (Cleanup* c = cleanup_.next; c != NULL; ) { + (*c->function)(c->arg1, c->arg2); + Cleanup* next = c->next; + delete c; + c = next; + } + } +} + +void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { + assert(func != NULL); + Cleanup* c; + if (cleanup_.function == NULL) { + c = &cleanup_; + } else { + c = new Cleanup; + c->next = cleanup_.next; + cleanup_.next = c; + } + c->function = func; + c->arg1 = arg1; + c->arg2 = arg2; +} + +namespace { +class EmptyIterator : public Iterator { + public: + EmptyIterator(const Status& s) : status_(s) { } + virtual bool Valid() const { return false; } + virtual void Seek(const Slice& target) { } + virtual void SeekToFirst() { } + virtual void SeekToLast() { } + virtual void Next() { assert(false); } + virtual void Prev() { assert(false); } + Slice key() const { assert(false); return Slice(); } + Slice value() const { assert(false); return Slice(); } + virtual Status status() const { return status_; } + private: + Status status_; +}; +} // namespace + +Iterator* NewEmptyIterator() { + return new EmptyIterator(Status::OK()); +} + +Iterator* NewErrorIterator(const Status& status) { + return new EmptyIterator(status); +} + +} // namespace leveldb diff --git a/leveldb-library/table/iterator_wrapper.h b/leveldb-library/table/iterator_wrapper.h new file mode 100644 index 0000000..f410c3f --- /dev/null +++ b/leveldb-library/table/iterator_wrapper.h @@ -0,0 +1,66 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ +#define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ + +#include "leveldb/iterator.h" +#include "leveldb/slice.h" + +namespace leveldb { + +// A internal wrapper class with an interface similar to Iterator that +// caches the valid() and key() results for an underlying iterator. +// This can help avoid virtual function calls and also gives better +// cache locality. +class IteratorWrapper { + public: + IteratorWrapper(): iter_(NULL), valid_(false) { } + explicit IteratorWrapper(Iterator* iter): iter_(NULL) { + Set(iter); + } + ~IteratorWrapper() { delete iter_; } + Iterator* iter() const { return iter_; } + + // Takes ownership of "iter" and will delete it when destroyed, or + // when Set() is invoked again. + void Set(Iterator* iter) { + delete iter_; + iter_ = iter; + if (iter_ == NULL) { + valid_ = false; + } else { + Update(); + } + } + + + // Iterator interface methods + bool Valid() const { return valid_; } + Slice key() const { assert(Valid()); return key_; } + Slice value() const { assert(Valid()); return iter_->value(); } + // Methods below require iter() != NULL + Status status() const { assert(iter_); return iter_->status(); } + void Next() { assert(iter_); iter_->Next(); Update(); } + void Prev() { assert(iter_); iter_->Prev(); Update(); } + void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } + void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } + void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } + + private: + void Update() { + valid_ = iter_->Valid(); + if (valid_) { + key_ = iter_->key(); + } + } + + Iterator* iter_; + bool valid_; + Slice key_; +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ diff --git a/leveldb-library/table/merger.cc b/leveldb-library/table/merger.cc new file mode 100644 index 0000000..2dde4dc --- /dev/null +++ b/leveldb-library/table/merger.cc @@ -0,0 +1,197 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "table/merger.h" + +#include "leveldb/comparator.h" +#include "leveldb/iterator.h" +#include "table/iterator_wrapper.h" + +namespace leveldb { + +namespace { +class MergingIterator : public Iterator { + public: + MergingIterator(const Comparator* comparator, Iterator** children, int n) + : comparator_(comparator), + children_(new IteratorWrapper[n]), + n_(n), + current_(NULL), + direction_(kForward) { + for (int i = 0; i < n; i++) { + children_[i].Set(children[i]); + } + } + + virtual ~MergingIterator() { + delete[] children_; + } + + virtual bool Valid() const { + return (current_ != NULL); + } + + virtual void SeekToFirst() { + for (int i = 0; i < n_; i++) { + children_[i].SeekToFirst(); + } + FindSmallest(); + direction_ = kForward; + } + + virtual void SeekToLast() { + for (int i = 0; i < n_; i++) { + children_[i].SeekToLast(); + } + FindLargest(); + direction_ = kReverse; + } + + virtual void Seek(const Slice& target) { + for (int i = 0; i < n_; i++) { + children_[i].Seek(target); + } + FindSmallest(); + direction_ = kForward; + } + + virtual void Next() { + assert(Valid()); + + // Ensure that all children are positioned after key(). + // If we are moving in the forward direction, it is already + // true for all of the non-current_ children since current_ is + // the smallest child and key() == current_->key(). Otherwise, + // we explicitly position the non-current_ children. + if (direction_ != kForward) { + for (int i = 0; i < n_; i++) { + IteratorWrapper* child = &children_[i]; + if (child != current_) { + child->Seek(key()); + if (child->Valid() && + comparator_->Compare(key(), child->key()) == 0) { + child->Next(); + } + } + } + direction_ = kForward; + } + + current_->Next(); + FindSmallest(); + } + + virtual void Prev() { + assert(Valid()); + + // Ensure that all children are positioned before key(). + // If we are moving in the reverse direction, it is already + // true for all of the non-current_ children since current_ is + // the largest child and key() == current_->key(). Otherwise, + // we explicitly position the non-current_ children. + if (direction_ != kReverse) { + for (int i = 0; i < n_; i++) { + IteratorWrapper* child = &children_[i]; + if (child != current_) { + child->Seek(key()); + if (child->Valid()) { + // Child is at first entry >= key(). Step back one to be < key() + child->Prev(); + } else { + // Child has no entries >= key(). Position at last entry. + child->SeekToLast(); + } + } + } + direction_ = kReverse; + } + + current_->Prev(); + FindLargest(); + } + + virtual Slice key() const { + assert(Valid()); + return current_->key(); + } + + virtual Slice value() const { + assert(Valid()); + return current_->value(); + } + + virtual Status status() const { + Status status; + for (int i = 0; i < n_; i++) { + status = children_[i].status(); + if (!status.ok()) { + break; + } + } + return status; + } + + private: + void FindSmallest(); + void FindLargest(); + + // We might want to use a heap in case there are lots of children. + // For now we use a simple array since we expect a very small number + // of children in leveldb. + const Comparator* comparator_; + IteratorWrapper* children_; + int n_; + IteratorWrapper* current_; + + // Which direction is the iterator moving? + enum Direction { + kForward, + kReverse + }; + Direction direction_; +}; + +void MergingIterator::FindSmallest() { + IteratorWrapper* smallest = NULL; + for (int i = 0; i < n_; i++) { + IteratorWrapper* child = &children_[i]; + if (child->Valid()) { + if (smallest == NULL) { + smallest = child; + } else if (comparator_->Compare(child->key(), smallest->key()) < 0) { + smallest = child; + } + } + } + current_ = smallest; +} + +void MergingIterator::FindLargest() { + IteratorWrapper* largest = NULL; + for (int i = n_-1; i >= 0; i--) { + IteratorWrapper* child = &children_[i]; + if (child->Valid()) { + if (largest == NULL) { + largest = child; + } else if (comparator_->Compare(child->key(), largest->key()) > 0) { + largest = child; + } + } + } + current_ = largest; +} +} // namespace + +Iterator* NewMergingIterator(const Comparator* cmp, Iterator** list, int n) { + assert(n >= 0); + if (n == 0) { + return NewEmptyIterator(); + } else if (n == 1) { + return list[0]; + } else { + return new MergingIterator(cmp, list, n); + } +} + +} // namespace leveldb diff --git a/leveldb-library/table/merger.h b/leveldb-library/table/merger.h new file mode 100644 index 0000000..91ddd80 --- /dev/null +++ b/leveldb-library/table/merger.h @@ -0,0 +1,26 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_TABLE_MERGER_H_ +#define STORAGE_LEVELDB_TABLE_MERGER_H_ + +namespace leveldb { + +class Comparator; +class Iterator; + +// Return an iterator that provided the union of the data in +// children[0,n-1]. Takes ownership of the child iterators and +// will delete them when the result iterator is deleted. +// +// The result does no duplicate suppression. I.e., if a particular +// key is present in K child iterators, it will be yielded K times. +// +// REQUIRES: n >= 0 +extern Iterator* NewMergingIterator( + const Comparator* comparator, Iterator** children, int n); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_TABLE_MERGER_H_ diff --git a/leveldb-library/table/table.cc b/leveldb-library/table/table.cc new file mode 100644 index 0000000..decf808 --- /dev/null +++ b/leveldb-library/table/table.cc @@ -0,0 +1,285 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/table.h" + +#include "leveldb/cache.h" +#include "leveldb/comparator.h" +#include "leveldb/env.h" +#include "leveldb/filter_policy.h" +#include "leveldb/options.h" +#include "table/block.h" +#include "table/filter_block.h" +#include "table/format.h" +#include "table/two_level_iterator.h" +#include "util/coding.h" + +namespace leveldb { + +struct Table::Rep { + ~Rep() { + delete filter; + delete [] filter_data; + delete index_block; + } + + Options options; + Status status; + RandomAccessFile* file; + uint64_t cache_id; + FilterBlockReader* filter; + const char* filter_data; + + BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer + Block* index_block; +}; + +Status Table::Open(const Options& options, + RandomAccessFile* file, + uint64_t size, + Table** table) { + *table = NULL; + if (size < Footer::kEncodedLength) { + return Status::Corruption("file is too short to be an sstable"); + } + + char footer_space[Footer::kEncodedLength]; + Slice footer_input; + Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, + &footer_input, footer_space); + if (!s.ok()) return s; + + Footer footer; + s = footer.DecodeFrom(&footer_input); + if (!s.ok()) return s; + + // Read the index block + BlockContents contents; + Block* index_block = NULL; + if (s.ok()) { + ReadOptions opt; + if (options.paranoid_checks) { + opt.verify_checksums = true; + } + s = ReadBlock(file, opt, footer.index_handle(), &contents); + if (s.ok()) { + index_block = new Block(contents); + } + } + + if (s.ok()) { + // We've successfully read the footer and the index block: we're + // ready to serve requests. + Rep* rep = new Table::Rep; + rep->options = options; + rep->file = file; + rep->metaindex_handle = footer.metaindex_handle(); + rep->index_block = index_block; + rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); + rep->filter_data = NULL; + rep->filter = NULL; + *table = new Table(rep); + (*table)->ReadMeta(footer); + } else { + delete index_block; + } + + return s; +} + +void Table::ReadMeta(const Footer& footer) { + if (rep_->options.filter_policy == NULL) { + return; // Do not need any metadata + } + + // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates + // it is an empty block. + ReadOptions opt; + if (rep_->options.paranoid_checks) { + opt.verify_checksums = true; + } + BlockContents contents; + if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) { + // Do not propagate errors since meta info is not needed for operation + return; + } + Block* meta = new Block(contents); + + Iterator* iter = meta->NewIterator(BytewiseComparator()); + std::string key = "filter."; + key.append(rep_->options.filter_policy->Name()); + iter->Seek(key); + if (iter->Valid() && iter->key() == Slice(key)) { + ReadFilter(iter->value()); + } + delete iter; + delete meta; +} + +void Table::ReadFilter(const Slice& filter_handle_value) { + Slice v = filter_handle_value; + BlockHandle filter_handle; + if (!filter_handle.DecodeFrom(&v).ok()) { + return; + } + + // We might want to unify with ReadBlock() if we start + // requiring checksum verification in Table::Open. + ReadOptions opt; + if (rep_->options.paranoid_checks) { + opt.verify_checksums = true; + } + BlockContents block; + if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) { + return; + } + if (block.heap_allocated) { + rep_->filter_data = block.data.data(); // Will need to delete later + } + rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data); +} + +Table::~Table() { + delete rep_; +} + +static void DeleteBlock(void* arg, void* ignored) { + delete reinterpret_cast(arg); +} + +static void DeleteCachedBlock(const Slice& key, void* value) { + Block* block = reinterpret_cast(value); + delete block; +} + +static void ReleaseBlock(void* arg, void* h) { + Cache* cache = reinterpret_cast(arg); + Cache::Handle* handle = reinterpret_cast(h); + cache->Release(handle); +} + +// Convert an index iterator value (i.e., an encoded BlockHandle) +// into an iterator over the contents of the corresponding block. +Iterator* Table::BlockReader(void* arg, + const ReadOptions& options, + const Slice& index_value) { + Table* table = reinterpret_cast(arg); + Cache* block_cache = table->rep_->options.block_cache; + Block* block = NULL; + Cache::Handle* cache_handle = NULL; + + BlockHandle handle; + Slice input = index_value; + Status s = handle.DecodeFrom(&input); + // We intentionally allow extra stuff in index_value so that we + // can add more features in the future. + + if (s.ok()) { + BlockContents contents; + if (block_cache != NULL) { + char cache_key_buffer[16]; + EncodeFixed64(cache_key_buffer, table->rep_->cache_id); + EncodeFixed64(cache_key_buffer+8, handle.offset()); + Slice key(cache_key_buffer, sizeof(cache_key_buffer)); + cache_handle = block_cache->Lookup(key); + if (cache_handle != NULL) { + block = reinterpret_cast(block_cache->Value(cache_handle)); + } else { + s = ReadBlock(table->rep_->file, options, handle, &contents); + if (s.ok()) { + block = new Block(contents); + if (contents.cachable && options.fill_cache) { + cache_handle = block_cache->Insert( + key, block, block->size(), &DeleteCachedBlock); + } + } + } + } else { + s = ReadBlock(table->rep_->file, options, handle, &contents); + if (s.ok()) { + block = new Block(contents); + } + } + } + + Iterator* iter; + if (block != NULL) { + iter = block->NewIterator(table->rep_->options.comparator); + if (cache_handle == NULL) { + iter->RegisterCleanup(&DeleteBlock, block, NULL); + } else { + iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle); + } + } else { + iter = NewErrorIterator(s); + } + return iter; +} + +Iterator* Table::NewIterator(const ReadOptions& options) const { + return NewTwoLevelIterator( + rep_->index_block->NewIterator(rep_->options.comparator), + &Table::BlockReader, const_cast(this), options); +} + +Status Table::InternalGet(const ReadOptions& options, const Slice& k, + void* arg, + void (*saver)(void*, const Slice&, const Slice&)) { + Status s; + Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator); + iiter->Seek(k); + if (iiter->Valid()) { + Slice handle_value = iiter->value(); + FilterBlockReader* filter = rep_->filter; + BlockHandle handle; + if (filter != NULL && + handle.DecodeFrom(&handle_value).ok() && + !filter->KeyMayMatch(handle.offset(), k)) { + // Not found + } else { + Iterator* block_iter = BlockReader(this, options, iiter->value()); + block_iter->Seek(k); + if (block_iter->Valid()) { + (*saver)(arg, block_iter->key(), block_iter->value()); + } + s = block_iter->status(); + delete block_iter; + } + } + if (s.ok()) { + s = iiter->status(); + } + delete iiter; + return s; +} + + +uint64_t Table::ApproximateOffsetOf(const Slice& key) const { + Iterator* index_iter = + rep_->index_block->NewIterator(rep_->options.comparator); + index_iter->Seek(key); + uint64_t result; + if (index_iter->Valid()) { + BlockHandle handle; + Slice input = index_iter->value(); + Status s = handle.DecodeFrom(&input); + if (s.ok()) { + result = handle.offset(); + } else { + // Strange: we can't decode the block handle in the index block. + // We'll just return the offset of the metaindex block, which is + // close to the whole file size for this case. + result = rep_->metaindex_handle.offset(); + } + } else { + // key is past the last key in the file. Approximate the offset + // by returning the offset of the metaindex block (which is + // right near the end of the file). + result = rep_->metaindex_handle.offset(); + } + delete index_iter; + return result; +} + +} // namespace leveldb diff --git a/leveldb-library/table/table_builder.cc b/leveldb-library/table/table_builder.cc new file mode 100644 index 0000000..62002c8 --- /dev/null +++ b/leveldb-library/table/table_builder.cc @@ -0,0 +1,270 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/table_builder.h" + +#include +#include "leveldb/comparator.h" +#include "leveldb/env.h" +#include "leveldb/filter_policy.h" +#include "leveldb/options.h" +#include "table/block_builder.h" +#include "table/filter_block.h" +#include "table/format.h" +#include "util/coding.h" +#include "util/crc32c.h" + +namespace leveldb { + +struct TableBuilder::Rep { + Options options; + Options index_block_options; + WritableFile* file; + uint64_t offset; + Status status; + BlockBuilder data_block; + BlockBuilder index_block; + std::string last_key; + int64_t num_entries; + bool closed; // Either Finish() or Abandon() has been called. + FilterBlockBuilder* filter_block; + + // We do not emit the index entry for a block until we have seen the + // first key for the next data block. This allows us to use shorter + // keys in the index block. For example, consider a block boundary + // between the keys "the quick brown fox" and "the who". We can use + // "the r" as the key for the index block entry since it is >= all + // entries in the first block and < all entries in subsequent + // blocks. + // + // Invariant: r->pending_index_entry is true only if data_block is empty. + bool pending_index_entry; + BlockHandle pending_handle; // Handle to add to index block + + std::string compressed_output; + + Rep(const Options& opt, WritableFile* f) + : options(opt), + index_block_options(opt), + file(f), + offset(0), + data_block(&options), + index_block(&index_block_options), + num_entries(0), + closed(false), + filter_block(opt.filter_policy == NULL ? NULL + : new FilterBlockBuilder(opt.filter_policy)), + pending_index_entry(false) { + index_block_options.block_restart_interval = 1; + } +}; + +TableBuilder::TableBuilder(const Options& options, WritableFile* file) + : rep_(new Rep(options, file)) { + if (rep_->filter_block != NULL) { + rep_->filter_block->StartBlock(0); + } +} + +TableBuilder::~TableBuilder() { + assert(rep_->closed); // Catch errors where caller forgot to call Finish() + delete rep_->filter_block; + delete rep_; +} + +Status TableBuilder::ChangeOptions(const Options& options) { + // Note: if more fields are added to Options, update + // this function to catch changes that should not be allowed to + // change in the middle of building a Table. + if (options.comparator != rep_->options.comparator) { + return Status::InvalidArgument("changing comparator while building table"); + } + + // Note that any live BlockBuilders point to rep_->options and therefore + // will automatically pick up the updated options. + rep_->options = options; + rep_->index_block_options = options; + rep_->index_block_options.block_restart_interval = 1; + return Status::OK(); +} + +void TableBuilder::Add(const Slice& key, const Slice& value) { + Rep* r = rep_; + assert(!r->closed); + if (!ok()) return; + if (r->num_entries > 0) { + assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0); + } + + if (r->pending_index_entry) { + assert(r->data_block.empty()); + r->options.comparator->FindShortestSeparator(&r->last_key, key); + std::string handle_encoding; + r->pending_handle.EncodeTo(&handle_encoding); + r->index_block.Add(r->last_key, Slice(handle_encoding)); + r->pending_index_entry = false; + } + + if (r->filter_block != NULL) { + r->filter_block->AddKey(key); + } + + r->last_key.assign(key.data(), key.size()); + r->num_entries++; + r->data_block.Add(key, value); + + const size_t estimated_block_size = r->data_block.CurrentSizeEstimate(); + if (estimated_block_size >= r->options.block_size) { + Flush(); + } +} + +void TableBuilder::Flush() { + Rep* r = rep_; + assert(!r->closed); + if (!ok()) return; + if (r->data_block.empty()) return; + assert(!r->pending_index_entry); + WriteBlock(&r->data_block, &r->pending_handle); + if (ok()) { + r->pending_index_entry = true; + r->status = r->file->Flush(); + } + if (r->filter_block != NULL) { + r->filter_block->StartBlock(r->offset); + } +} + +void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) { + // File format contains a sequence of blocks where each block has: + // block_data: uint8[n] + // type: uint8 + // crc: uint32 + assert(ok()); + Rep* r = rep_; + Slice raw = block->Finish(); + + Slice block_contents; + CompressionType type = r->options.compression; + // TODO(postrelease): Support more compression options: zlib? + switch (type) { + case kNoCompression: + block_contents = raw; + break; + + case kSnappyCompression: { + std::string* compressed = &r->compressed_output; + if (port::Snappy_Compress(raw.data(), raw.size(), compressed) && + compressed->size() < raw.size() - (raw.size() / 8u)) { + block_contents = *compressed; + } else { + // Snappy not supported, or compressed less than 12.5%, so just + // store uncompressed form + block_contents = raw; + type = kNoCompression; + } + break; + } + } + WriteRawBlock(block_contents, type, handle); + r->compressed_output.clear(); + block->Reset(); +} + +void TableBuilder::WriteRawBlock(const Slice& block_contents, + CompressionType type, + BlockHandle* handle) { + Rep* r = rep_; + handle->set_offset(r->offset); + handle->set_size(block_contents.size()); + r->status = r->file->Append(block_contents); + if (r->status.ok()) { + char trailer[kBlockTrailerSize]; + trailer[0] = type; + uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size()); + crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type + EncodeFixed32(trailer+1, crc32c::Mask(crc)); + r->status = r->file->Append(Slice(trailer, kBlockTrailerSize)); + if (r->status.ok()) { + r->offset += block_contents.size() + kBlockTrailerSize; + } + } +} + +Status TableBuilder::status() const { + return rep_->status; +} + +Status TableBuilder::Finish() { + Rep* r = rep_; + Flush(); + assert(!r->closed); + r->closed = true; + + BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle; + + // Write filter block + if (ok() && r->filter_block != NULL) { + WriteRawBlock(r->filter_block->Finish(), kNoCompression, + &filter_block_handle); + } + + // Write metaindex block + if (ok()) { + BlockBuilder meta_index_block(&r->options); + if (r->filter_block != NULL) { + // Add mapping from "filter.Name" to location of filter data + std::string key = "filter."; + key.append(r->options.filter_policy->Name()); + std::string handle_encoding; + filter_block_handle.EncodeTo(&handle_encoding); + meta_index_block.Add(key, handle_encoding); + } + + // TODO(postrelease): Add stats and other meta blocks + WriteBlock(&meta_index_block, &metaindex_block_handle); + } + + // Write index block + if (ok()) { + if (r->pending_index_entry) { + r->options.comparator->FindShortSuccessor(&r->last_key); + std::string handle_encoding; + r->pending_handle.EncodeTo(&handle_encoding); + r->index_block.Add(r->last_key, Slice(handle_encoding)); + r->pending_index_entry = false; + } + WriteBlock(&r->index_block, &index_block_handle); + } + + // Write footer + if (ok()) { + Footer footer; + footer.set_metaindex_handle(metaindex_block_handle); + footer.set_index_handle(index_block_handle); + std::string footer_encoding; + footer.EncodeTo(&footer_encoding); + r->status = r->file->Append(footer_encoding); + if (r->status.ok()) { + r->offset += footer_encoding.size(); + } + } + return r->status; +} + +void TableBuilder::Abandon() { + Rep* r = rep_; + assert(!r->closed); + r->closed = true; +} + +uint64_t TableBuilder::NumEntries() const { + return rep_->num_entries; +} + +uint64_t TableBuilder::FileSize() const { + return rep_->offset; +} + +} // namespace leveldb diff --git a/leveldb-library/table/two_level_iterator.cc b/leveldb-library/table/two_level_iterator.cc new file mode 100644 index 0000000..7822eba --- /dev/null +++ b/leveldb-library/table/two_level_iterator.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "table/two_level_iterator.h" + +#include "leveldb/table.h" +#include "table/block.h" +#include "table/format.h" +#include "table/iterator_wrapper.h" + +namespace leveldb { + +namespace { + +typedef Iterator* (*BlockFunction)(void*, const ReadOptions&, const Slice&); + +class TwoLevelIterator: public Iterator { + public: + TwoLevelIterator( + Iterator* index_iter, + BlockFunction block_function, + void* arg, + const ReadOptions& options); + + virtual ~TwoLevelIterator(); + + virtual void Seek(const Slice& target); + virtual void SeekToFirst(); + virtual void SeekToLast(); + virtual void Next(); + virtual void Prev(); + + virtual bool Valid() const { + return data_iter_.Valid(); + } + virtual Slice key() const { + assert(Valid()); + return data_iter_.key(); + } + virtual Slice value() const { + assert(Valid()); + return data_iter_.value(); + } + virtual Status status() const { + // It'd be nice if status() returned a const Status& instead of a Status + if (!index_iter_.status().ok()) { + return index_iter_.status(); + } else if (data_iter_.iter() != NULL && !data_iter_.status().ok()) { + return data_iter_.status(); + } else { + return status_; + } + } + + private: + void SaveError(const Status& s) { + if (status_.ok() && !s.ok()) status_ = s; + } + void SkipEmptyDataBlocksForward(); + void SkipEmptyDataBlocksBackward(); + void SetDataIterator(Iterator* data_iter); + void InitDataBlock(); + + BlockFunction block_function_; + void* arg_; + const ReadOptions options_; + Status status_; + IteratorWrapper index_iter_; + IteratorWrapper data_iter_; // May be NULL + // If data_iter_ is non-NULL, then "data_block_handle_" holds the + // "index_value" passed to block_function_ to create the data_iter_. + std::string data_block_handle_; +}; + +TwoLevelIterator::TwoLevelIterator( + Iterator* index_iter, + BlockFunction block_function, + void* arg, + const ReadOptions& options) + : block_function_(block_function), + arg_(arg), + options_(options), + index_iter_(index_iter), + data_iter_(NULL) { +} + +TwoLevelIterator::~TwoLevelIterator() { +} + +void TwoLevelIterator::Seek(const Slice& target) { + index_iter_.Seek(target); + InitDataBlock(); + if (data_iter_.iter() != NULL) data_iter_.Seek(target); + SkipEmptyDataBlocksForward(); +} + +void TwoLevelIterator::SeekToFirst() { + index_iter_.SeekToFirst(); + InitDataBlock(); + if (data_iter_.iter() != NULL) data_iter_.SeekToFirst(); + SkipEmptyDataBlocksForward(); +} + +void TwoLevelIterator::SeekToLast() { + index_iter_.SeekToLast(); + InitDataBlock(); + if (data_iter_.iter() != NULL) data_iter_.SeekToLast(); + SkipEmptyDataBlocksBackward(); +} + +void TwoLevelIterator::Next() { + assert(Valid()); + data_iter_.Next(); + SkipEmptyDataBlocksForward(); +} + +void TwoLevelIterator::Prev() { + assert(Valid()); + data_iter_.Prev(); + SkipEmptyDataBlocksBackward(); +} + + +void TwoLevelIterator::SkipEmptyDataBlocksForward() { + while (data_iter_.iter() == NULL || !data_iter_.Valid()) { + // Move to next block + if (!index_iter_.Valid()) { + SetDataIterator(NULL); + return; + } + index_iter_.Next(); + InitDataBlock(); + if (data_iter_.iter() != NULL) data_iter_.SeekToFirst(); + } +} + +void TwoLevelIterator::SkipEmptyDataBlocksBackward() { + while (data_iter_.iter() == NULL || !data_iter_.Valid()) { + // Move to next block + if (!index_iter_.Valid()) { + SetDataIterator(NULL); + return; + } + index_iter_.Prev(); + InitDataBlock(); + if (data_iter_.iter() != NULL) data_iter_.SeekToLast(); + } +} + +void TwoLevelIterator::SetDataIterator(Iterator* data_iter) { + if (data_iter_.iter() != NULL) SaveError(data_iter_.status()); + data_iter_.Set(data_iter); +} + +void TwoLevelIterator::InitDataBlock() { + if (!index_iter_.Valid()) { + SetDataIterator(NULL); + } else { + Slice handle = index_iter_.value(); + if (data_iter_.iter() != NULL && handle.compare(data_block_handle_) == 0) { + // data_iter_ is already constructed with this iterator, so + // no need to change anything + } else { + Iterator* iter = (*block_function_)(arg_, options_, handle); + data_block_handle_.assign(handle.data(), handle.size()); + SetDataIterator(iter); + } + } +} + +} // namespace + +Iterator* NewTwoLevelIterator( + Iterator* index_iter, + BlockFunction block_function, + void* arg, + const ReadOptions& options) { + return new TwoLevelIterator(index_iter, block_function, arg, options); +} + +} // namespace leveldb diff --git a/leveldb-library/table/two_level_iterator.h b/leveldb-library/table/two_level_iterator.h new file mode 100644 index 0000000..629ca34 --- /dev/null +++ b/leveldb-library/table/two_level_iterator.h @@ -0,0 +1,34 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ +#define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ + +#include "leveldb/iterator.h" + +namespace leveldb { + +struct ReadOptions; + +// Return a new two level iterator. A two-level iterator contains an +// index iterator whose values point to a sequence of blocks where +// each block is itself a sequence of key,value pairs. The returned +// two-level iterator yields the concatenation of all key/value pairs +// in the sequence of blocks. Takes ownership of "index_iter" and +// will delete it when no longer needed. +// +// Uses a supplied function to convert an index_iter value into +// an iterator over the contents of the corresponding block. +extern Iterator* NewTwoLevelIterator( + Iterator* index_iter, + Iterator* (*block_function)( + void* arg, + const ReadOptions& options, + const Slice& index_value), + void* arg, + const ReadOptions& options); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ diff --git a/leveldb-library/util/arena.cc b/leveldb-library/util/arena.cc new file mode 100644 index 0000000..7407821 --- /dev/null +++ b/leveldb-library/util/arena.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/arena.h" +#include + +namespace leveldb { + +static const int kBlockSize = 4096; + +Arena::Arena() : memory_usage_(0) { + alloc_ptr_ = NULL; // First allocation will allocate a block + alloc_bytes_remaining_ = 0; +} + +Arena::~Arena() { + for (size_t i = 0; i < blocks_.size(); i++) { + delete[] blocks_[i]; + } +} + +char* Arena::AllocateFallback(size_t bytes) { + if (bytes > kBlockSize / 4) { + // Object is more than a quarter of our block size. Allocate it separately + // to avoid wasting too much space in leftover bytes. + char* result = AllocateNewBlock(bytes); + return result; + } + + // We waste the remaining space in the current block. + alloc_ptr_ = AllocateNewBlock(kBlockSize); + alloc_bytes_remaining_ = kBlockSize; + + char* result = alloc_ptr_; + alloc_ptr_ += bytes; + alloc_bytes_remaining_ -= bytes; + return result; +} + +char* Arena::AllocateAligned(size_t bytes) { + const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8; + assert((align & (align-1)) == 0); // Pointer size should be a power of 2 + size_t current_mod = reinterpret_cast(alloc_ptr_) & (align-1); + size_t slop = (current_mod == 0 ? 0 : align - current_mod); + size_t needed = bytes + slop; + char* result; + if (needed <= alloc_bytes_remaining_) { + result = alloc_ptr_ + slop; + alloc_ptr_ += needed; + alloc_bytes_remaining_ -= needed; + } else { + // AllocateFallback always returned aligned memory + result = AllocateFallback(bytes); + } + assert((reinterpret_cast(result) & (align-1)) == 0); + return result; +} + +char* Arena::AllocateNewBlock(size_t block_bytes) { + char* result = new char[block_bytes]; + blocks_.push_back(result); + memory_usage_.NoBarrier_Store( + reinterpret_cast(MemoryUsage() + block_bytes + sizeof(char*))); + return result; +} + +} // namespace leveldb diff --git a/leveldb-library/util/arena.h b/leveldb-library/util/arena.h new file mode 100644 index 0000000..48bab33 --- /dev/null +++ b/leveldb-library/util/arena.h @@ -0,0 +1,68 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_ARENA_H_ +#define STORAGE_LEVELDB_UTIL_ARENA_H_ + +#include +#include +#include +#include +#include "port/port.h" + +namespace leveldb { + +class Arena { + public: + Arena(); + ~Arena(); + + // Return a pointer to a newly allocated memory block of "bytes" bytes. + char* Allocate(size_t bytes); + + // Allocate memory with the normal alignment guarantees provided by malloc + char* AllocateAligned(size_t bytes); + + // Returns an estimate of the total memory usage of data allocated + // by the arena. + size_t MemoryUsage() const { + return reinterpret_cast(memory_usage_.NoBarrier_Load()); + } + + private: + char* AllocateFallback(size_t bytes); + char* AllocateNewBlock(size_t block_bytes); + + // Allocation state + char* alloc_ptr_; + size_t alloc_bytes_remaining_; + + // Array of new[] allocated memory blocks + std::vector blocks_; + + // Total memory usage of the arena. + port::AtomicPointer memory_usage_; + + // No copying allowed + Arena(const Arena&); + void operator=(const Arena&); +}; + +inline char* Arena::Allocate(size_t bytes) { + // The semantics of what to return are a bit messy if we allow + // 0-byte allocations, so we disallow them here (we don't need + // them for our internal use). + assert(bytes > 0); + if (bytes <= alloc_bytes_remaining_) { + char* result = alloc_ptr_; + alloc_ptr_ += bytes; + alloc_bytes_remaining_ -= bytes; + return result; + } + return AllocateFallback(bytes); +} + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_ARENA_H_ diff --git a/leveldb-library/util/bloom.cc b/leveldb-library/util/bloom.cc new file mode 100644 index 0000000..bf3e4ca --- /dev/null +++ b/leveldb-library/util/bloom.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/filter_policy.h" + +#include "leveldb/slice.h" +#include "util/hash.h" + +namespace leveldb { + +namespace { +static uint32_t BloomHash(const Slice& key) { + return Hash(key.data(), key.size(), 0xbc9f1d34); +} + +class BloomFilterPolicy : public FilterPolicy { + private: + size_t bits_per_key_; + size_t k_; + + public: + explicit BloomFilterPolicy(int bits_per_key) + : bits_per_key_(bits_per_key) { + // We intentionally round down to reduce probing cost a little bit + k_ = static_cast(bits_per_key * 0.69); // 0.69 =~ ln(2) + if (k_ < 1) k_ = 1; + if (k_ > 30) k_ = 30; + } + + virtual const char* Name() const { + return "leveldb.BuiltinBloomFilter2"; + } + + virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { + // Compute bloom filter size (in both bits and bytes) + size_t bits = n * bits_per_key_; + + // For small n, we can see a very high false positive rate. Fix it + // by enforcing a minimum bloom filter length. + if (bits < 64) bits = 64; + + size_t bytes = (bits + 7) / 8; + bits = bytes * 8; + + const size_t init_size = dst->size(); + dst->resize(init_size + bytes, 0); + dst->push_back(static_cast(k_)); // Remember # of probes in filter + char* array = &(*dst)[init_size]; + for (int i = 0; i < n; i++) { + // Use double-hashing to generate a sequence of hash values. + // See analysis in [Kirsch,Mitzenmacher 2006]. + uint32_t h = BloomHash(keys[i]); + const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits + for (size_t j = 0; j < k_; j++) { + const uint32_t bitpos = h % bits; + array[bitpos/8] |= (1 << (bitpos % 8)); + h += delta; + } + } + } + + virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { + const size_t len = bloom_filter.size(); + if (len < 2) return false; + + const char* array = bloom_filter.data(); + const size_t bits = (len - 1) * 8; + + // Use the encoded k so that we can read filters generated by + // bloom filters created using different parameters. + const size_t k = array[len-1]; + if (k > 30) { + // Reserved for potentially new encodings for short bloom filters. + // Consider it a match. + return true; + } + + uint32_t h = BloomHash(key); + const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits + for (size_t j = 0; j < k; j++) { + const uint32_t bitpos = h % bits; + if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; + h += delta; + } + return true; + } +}; +} + +const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { + return new BloomFilterPolicy(bits_per_key); +} + +} // namespace leveldb diff --git a/leveldb-library/util/cache.cc b/leveldb-library/util/cache.cc new file mode 100644 index 0000000..ce46886 --- /dev/null +++ b/leveldb-library/util/cache.cc @@ -0,0 +1,405 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include +#include + +#include "leveldb/cache.h" +#include "port/port.h" +#include "util/hash.h" +#include "util/mutexlock.h" + +namespace leveldb { + +Cache::~Cache() { +} + +namespace { + +// LRU cache implementation +// +// Cache entries have an "in_cache" boolean indicating whether the cache has a +// reference on the entry. The only ways that this can become false without the +// entry being passed to its "deleter" are via Erase(), via Insert() when +// an element with a duplicate key is inserted, or on destruction of the cache. +// +// The cache keeps two linked lists of items in the cache. All items in the +// cache are in one list or the other, and never both. Items still referenced +// by clients but erased from the cache are in neither list. The lists are: +// - in-use: contains the items currently referenced by clients, in no +// particular order. (This list is used for invariant checking. If we +// removed the check, elements that would otherwise be on this list could be +// left as disconnected singleton lists.) +// - LRU: contains the items not currently referenced by clients, in LRU order +// Elements are moved between these lists by the Ref() and Unref() methods, +// when they detect an element in the cache acquiring or losing its only +// external reference. + +// An entry is a variable length heap-allocated structure. Entries +// are kept in a circular doubly linked list ordered by access time. +struct LRUHandle { + void* value; + void (*deleter)(const Slice&, void* value); + LRUHandle* next_hash; + LRUHandle* next; + LRUHandle* prev; + size_t charge; // TODO(opt): Only allow uint32_t? + size_t key_length; + bool in_cache; // Whether entry is in the cache. + uint32_t refs; // References, including cache reference, if present. + uint32_t hash; // Hash of key(); used for fast sharding and comparisons + char key_data[1]; // Beginning of key + + Slice key() const { + // For cheaper lookups, we allow a temporary Handle object + // to store a pointer to a key in "value". + if (next == this) { + return *(reinterpret_cast(value)); + } else { + return Slice(key_data, key_length); + } + } +}; + +// We provide our own simple hash table since it removes a whole bunch +// of porting hacks and is also faster than some of the built-in hash +// table implementations in some of the compiler/runtime combinations +// we have tested. E.g., readrandom speeds up by ~5% over the g++ +// 4.4.3's builtin hashtable. +class HandleTable { + public: + HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); } + ~HandleTable() { delete[] list_; } + + LRUHandle* Lookup(const Slice& key, uint32_t hash) { + return *FindPointer(key, hash); + } + + LRUHandle* Insert(LRUHandle* h) { + LRUHandle** ptr = FindPointer(h->key(), h->hash); + LRUHandle* old = *ptr; + h->next_hash = (old == NULL ? NULL : old->next_hash); + *ptr = h; + if (old == NULL) { + ++elems_; + if (elems_ > length_) { + // Since each cache entry is fairly large, we aim for a small + // average linked list length (<= 1). + Resize(); + } + } + return old; + } + + LRUHandle* Remove(const Slice& key, uint32_t hash) { + LRUHandle** ptr = FindPointer(key, hash); + LRUHandle* result = *ptr; + if (result != NULL) { + *ptr = result->next_hash; + --elems_; + } + return result; + } + + private: + // The table consists of an array of buckets where each bucket is + // a linked list of cache entries that hash into the bucket. + uint32_t length_; + uint32_t elems_; + LRUHandle** list_; + + // Return a pointer to slot that points to a cache entry that + // matches key/hash. If there is no such cache entry, return a + // pointer to the trailing slot in the corresponding linked list. + LRUHandle** FindPointer(const Slice& key, uint32_t hash) { + LRUHandle** ptr = &list_[hash & (length_ - 1)]; + while (*ptr != NULL && + ((*ptr)->hash != hash || key != (*ptr)->key())) { + ptr = &(*ptr)->next_hash; + } + return ptr; + } + + void Resize() { + uint32_t new_length = 4; + while (new_length < elems_) { + new_length *= 2; + } + LRUHandle** new_list = new LRUHandle*[new_length]; + memset(new_list, 0, sizeof(new_list[0]) * new_length); + uint32_t count = 0; + for (uint32_t i = 0; i < length_; i++) { + LRUHandle* h = list_[i]; + while (h != NULL) { + LRUHandle* next = h->next_hash; + uint32_t hash = h->hash; + LRUHandle** ptr = &new_list[hash & (new_length - 1)]; + h->next_hash = *ptr; + *ptr = h; + h = next; + count++; + } + } + assert(elems_ == count); + delete[] list_; + list_ = new_list; + length_ = new_length; + } +}; + +// A single shard of sharded cache. +class LRUCache { + public: + LRUCache(); + ~LRUCache(); + + // Separate from constructor so caller can easily make an array of LRUCache + void SetCapacity(size_t capacity) { capacity_ = capacity; } + + // Like Cache methods, but with an extra "hash" parameter. + Cache::Handle* Insert(const Slice& key, uint32_t hash, + void* value, size_t charge, + void (*deleter)(const Slice& key, void* value)); + Cache::Handle* Lookup(const Slice& key, uint32_t hash); + void Release(Cache::Handle* handle); + void Erase(const Slice& key, uint32_t hash); + void Prune(); + size_t TotalCharge() const { + MutexLock l(&mutex_); + return usage_; + } + + private: + void LRU_Remove(LRUHandle* e); + void LRU_Append(LRUHandle*list, LRUHandle* e); + void Ref(LRUHandle* e); + void Unref(LRUHandle* e); + bool FinishErase(LRUHandle* e); + + // Initialized before use. + size_t capacity_; + + // mutex_ protects the following state. + mutable port::Mutex mutex_; + size_t usage_; + + // Dummy head of LRU list. + // lru.prev is newest entry, lru.next is oldest entry. + // Entries have refs==1 and in_cache==true. + LRUHandle lru_; + + // Dummy head of in-use list. + // Entries are in use by clients, and have refs >= 2 and in_cache==true. + LRUHandle in_use_; + + HandleTable table_; +}; + +LRUCache::LRUCache() + : usage_(0) { + // Make empty circular linked lists. + lru_.next = &lru_; + lru_.prev = &lru_; + in_use_.next = &in_use_; + in_use_.prev = &in_use_; +} + +LRUCache::~LRUCache() { + assert(in_use_.next == &in_use_); // Error if caller has an unreleased handle + for (LRUHandle* e = lru_.next; e != &lru_; ) { + LRUHandle* next = e->next; + assert(e->in_cache); + e->in_cache = false; + assert(e->refs == 1); // Invariant of lru_ list. + Unref(e); + e = next; + } +} + +void LRUCache::Ref(LRUHandle* e) { + if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list. + LRU_Remove(e); + LRU_Append(&in_use_, e); + } + e->refs++; +} + +void LRUCache::Unref(LRUHandle* e) { + assert(e->refs > 0); + e->refs--; + if (e->refs == 0) { // Deallocate. + assert(!e->in_cache); + (*e->deleter)(e->key(), e->value); + free(e); + } else if (e->in_cache && e->refs == 1) { // No longer in use; move to lru_ list. + LRU_Remove(e); + LRU_Append(&lru_, e); + } +} + +void LRUCache::LRU_Remove(LRUHandle* e) { + e->next->prev = e->prev; + e->prev->next = e->next; +} + +void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) { + // Make "e" newest entry by inserting just before *list + e->next = list; + e->prev = list->prev; + e->prev->next = e; + e->next->prev = e; +} + +Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) { + MutexLock l(&mutex_); + LRUHandle* e = table_.Lookup(key, hash); + if (e != NULL) { + Ref(e); + } + return reinterpret_cast(e); +} + +void LRUCache::Release(Cache::Handle* handle) { + MutexLock l(&mutex_); + Unref(reinterpret_cast(handle)); +} + +Cache::Handle* LRUCache::Insert( + const Slice& key, uint32_t hash, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value)) { + MutexLock l(&mutex_); + + LRUHandle* e = reinterpret_cast( + malloc(sizeof(LRUHandle)-1 + key.size())); + e->value = value; + e->deleter = deleter; + e->charge = charge; + e->key_length = key.size(); + e->hash = hash; + e->in_cache = false; + e->refs = 1; // for the returned handle. + memcpy(e->key_data, key.data(), key.size()); + + if (capacity_ > 0) { + e->refs++; // for the cache's reference. + e->in_cache = true; + LRU_Append(&in_use_, e); + usage_ += charge; + FinishErase(table_.Insert(e)); + } // else don't cache. (Tests use capacity_==0 to turn off caching.) + + while (usage_ > capacity_ && lru_.next != &lru_) { + LRUHandle* old = lru_.next; + assert(old->refs == 1); + bool erased = FinishErase(table_.Remove(old->key(), old->hash)); + if (!erased) { // to avoid unused variable when compiled NDEBUG + assert(erased); + } + } + + return reinterpret_cast(e); +} + +// If e != NULL, finish removing *e from the cache; it has already been removed +// from the hash table. Return whether e != NULL. Requires mutex_ held. +bool LRUCache::FinishErase(LRUHandle* e) { + if (e != NULL) { + assert(e->in_cache); + LRU_Remove(e); + e->in_cache = false; + usage_ -= e->charge; + Unref(e); + } + return e != NULL; +} + +void LRUCache::Erase(const Slice& key, uint32_t hash) { + MutexLock l(&mutex_); + FinishErase(table_.Remove(key, hash)); +} + +void LRUCache::Prune() { + MutexLock l(&mutex_); + while (lru_.next != &lru_) { + LRUHandle* e = lru_.next; + assert(e->refs == 1); + bool erased = FinishErase(table_.Remove(e->key(), e->hash)); + if (!erased) { // to avoid unused variable when compiled NDEBUG + assert(erased); + } + } +} + +static const int kNumShardBits = 4; +static const int kNumShards = 1 << kNumShardBits; + +class ShardedLRUCache : public Cache { + private: + LRUCache shard_[kNumShards]; + port::Mutex id_mutex_; + uint64_t last_id_; + + static inline uint32_t HashSlice(const Slice& s) { + return Hash(s.data(), s.size(), 0); + } + + static uint32_t Shard(uint32_t hash) { + return hash >> (32 - kNumShardBits); + } + + public: + explicit ShardedLRUCache(size_t capacity) + : last_id_(0) { + const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards; + for (int s = 0; s < kNumShards; s++) { + shard_[s].SetCapacity(per_shard); + } + } + virtual ~ShardedLRUCache() { } + virtual Handle* Insert(const Slice& key, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value)) { + const uint32_t hash = HashSlice(key); + return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter); + } + virtual Handle* Lookup(const Slice& key) { + const uint32_t hash = HashSlice(key); + return shard_[Shard(hash)].Lookup(key, hash); + } + virtual void Release(Handle* handle) { + LRUHandle* h = reinterpret_cast(handle); + shard_[Shard(h->hash)].Release(handle); + } + virtual void Erase(const Slice& key) { + const uint32_t hash = HashSlice(key); + shard_[Shard(hash)].Erase(key, hash); + } + virtual void* Value(Handle* handle) { + return reinterpret_cast(handle)->value; + } + virtual uint64_t NewId() { + MutexLock l(&id_mutex_); + return ++(last_id_); + } + virtual void Prune() { + for (int s = 0; s < kNumShards; s++) { + shard_[s].Prune(); + } + } + virtual size_t TotalCharge() const { + size_t total = 0; + for (int s = 0; s < kNumShards; s++) { + total += shard_[s].TotalCharge(); + } + return total; + } +}; + +} // end anonymous namespace + +Cache* NewLRUCache(size_t capacity) { + return new ShardedLRUCache(capacity); +} + +} // namespace leveldb diff --git a/leveldb-library/util/coding.cc b/leveldb-library/util/coding.cc new file mode 100644 index 0000000..21e3186 --- /dev/null +++ b/leveldb-library/util/coding.cc @@ -0,0 +1,194 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/coding.h" + +namespace leveldb { + +void EncodeFixed32(char* buf, uint32_t value) { + if (port::kLittleEndian) { + memcpy(buf, &value, sizeof(value)); + } else { + buf[0] = value & 0xff; + buf[1] = (value >> 8) & 0xff; + buf[2] = (value >> 16) & 0xff; + buf[3] = (value >> 24) & 0xff; + } +} + +void EncodeFixed64(char* buf, uint64_t value) { + if (port::kLittleEndian) { + memcpy(buf, &value, sizeof(value)); + } else { + buf[0] = value & 0xff; + buf[1] = (value >> 8) & 0xff; + buf[2] = (value >> 16) & 0xff; + buf[3] = (value >> 24) & 0xff; + buf[4] = (value >> 32) & 0xff; + buf[5] = (value >> 40) & 0xff; + buf[6] = (value >> 48) & 0xff; + buf[7] = (value >> 56) & 0xff; + } +} + +void PutFixed32(std::string* dst, uint32_t value) { + char buf[sizeof(value)]; + EncodeFixed32(buf, value); + dst->append(buf, sizeof(buf)); +} + +void PutFixed64(std::string* dst, uint64_t value) { + char buf[sizeof(value)]; + EncodeFixed64(buf, value); + dst->append(buf, sizeof(buf)); +} + +char* EncodeVarint32(char* dst, uint32_t v) { + // Operate on characters as unsigneds + unsigned char* ptr = reinterpret_cast(dst); + static const int B = 128; + if (v < (1<<7)) { + *(ptr++) = v; + } else if (v < (1<<14)) { + *(ptr++) = v | B; + *(ptr++) = v>>7; + } else if (v < (1<<21)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = v>>14; + } else if (v < (1<<28)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = v>>21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = (v>>21) | B; + *(ptr++) = v>>28; + } + return reinterpret_cast(ptr); +} + +void PutVarint32(std::string* dst, uint32_t v) { + char buf[5]; + char* ptr = EncodeVarint32(buf, v); + dst->append(buf, ptr - buf); +} + +char* EncodeVarint64(char* dst, uint64_t v) { + static const int B = 128; + unsigned char* ptr = reinterpret_cast(dst); + while (v >= B) { + *(ptr++) = (v & (B-1)) | B; + v >>= 7; + } + *(ptr++) = static_cast(v); + return reinterpret_cast(ptr); +} + +void PutVarint64(std::string* dst, uint64_t v) { + char buf[10]; + char* ptr = EncodeVarint64(buf, v); + dst->append(buf, ptr - buf); +} + +void PutLengthPrefixedSlice(std::string* dst, const Slice& value) { + PutVarint32(dst, value.size()); + dst->append(value.data(), value.size()); +} + +int VarintLength(uint64_t v) { + int len = 1; + while (v >= 128) { + v >>= 7; + len++; + } + return len; +} + +const char* GetVarint32PtrFallback(const char* p, + const char* limit, + uint32_t* value) { + uint32_t result = 0; + for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) { + uint32_t byte = *(reinterpret_cast(p)); + p++; + if (byte & 128) { + // More bytes are present + result |= ((byte & 127) << shift); + } else { + result |= (byte << shift); + *value = result; + return reinterpret_cast(p); + } + } + return NULL; +} + +bool GetVarint32(Slice* input, uint32_t* value) { + const char* p = input->data(); + const char* limit = p + input->size(); + const char* q = GetVarint32Ptr(p, limit, value); + if (q == NULL) { + return false; + } else { + *input = Slice(q, limit - q); + return true; + } +} + +const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) { + uint64_t result = 0; + for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) { + uint64_t byte = *(reinterpret_cast(p)); + p++; + if (byte & 128) { + // More bytes are present + result |= ((byte & 127) << shift); + } else { + result |= (byte << shift); + *value = result; + return reinterpret_cast(p); + } + } + return NULL; +} + +bool GetVarint64(Slice* input, uint64_t* value) { + const char* p = input->data(); + const char* limit = p + input->size(); + const char* q = GetVarint64Ptr(p, limit, value); + if (q == NULL) { + return false; + } else { + *input = Slice(q, limit - q); + return true; + } +} + +const char* GetLengthPrefixedSlice(const char* p, const char* limit, + Slice* result) { + uint32_t len; + p = GetVarint32Ptr(p, limit, &len); + if (p == NULL) return NULL; + if (p + len > limit) return NULL; + *result = Slice(p, len); + return p + len; +} + +bool GetLengthPrefixedSlice(Slice* input, Slice* result) { + uint32_t len; + if (GetVarint32(input, &len) && + input->size() >= len) { + *result = Slice(input->data(), len); + input->remove_prefix(len); + return true; + } else { + return false; + } +} + +} // namespace leveldb diff --git a/leveldb-library/util/coding.h b/leveldb-library/util/coding.h new file mode 100644 index 0000000..3993c4a --- /dev/null +++ b/leveldb-library/util/coding.h @@ -0,0 +1,104 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Endian-neutral encoding: +// * Fixed-length numbers are encoded with least-significant byte first +// * In addition we support variable length "varint" encoding +// * Strings are encoded prefixed by their length in varint format + +#ifndef STORAGE_LEVELDB_UTIL_CODING_H_ +#define STORAGE_LEVELDB_UTIL_CODING_H_ + +#include +#include +#include +#include "leveldb/slice.h" +#include "port/port.h" + +namespace leveldb { + +// Standard Put... routines append to a string +extern void PutFixed32(std::string* dst, uint32_t value); +extern void PutFixed64(std::string* dst, uint64_t value); +extern void PutVarint32(std::string* dst, uint32_t value); +extern void PutVarint64(std::string* dst, uint64_t value); +extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value); + +// Standard Get... routines parse a value from the beginning of a Slice +// and advance the slice past the parsed value. +extern bool GetVarint32(Slice* input, uint32_t* value); +extern bool GetVarint64(Slice* input, uint64_t* value); +extern bool GetLengthPrefixedSlice(Slice* input, Slice* result); + +// Pointer-based variants of GetVarint... These either store a value +// in *v and return a pointer just past the parsed value, or return +// NULL on error. These routines only look at bytes in the range +// [p..limit-1] +extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v); +extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v); + +// Returns the length of the varint32 or varint64 encoding of "v" +extern int VarintLength(uint64_t v); + +// Lower-level versions of Put... that write directly into a character buffer +// REQUIRES: dst has enough space for the value being written +extern void EncodeFixed32(char* dst, uint32_t value); +extern void EncodeFixed64(char* dst, uint64_t value); + +// Lower-level versions of Put... that write directly into a character buffer +// and return a pointer just past the last byte written. +// REQUIRES: dst has enough space for the value being written +extern char* EncodeVarint32(char* dst, uint32_t value); +extern char* EncodeVarint64(char* dst, uint64_t value); + +// Lower-level versions of Get... that read directly from a character buffer +// without any bounds checking. + +inline uint32_t DecodeFixed32(const char* ptr) { + if (port::kLittleEndian) { + // Load the raw bytes + uint32_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; + } else { + return ((static_cast(static_cast(ptr[0]))) + | (static_cast(static_cast(ptr[1])) << 8) + | (static_cast(static_cast(ptr[2])) << 16) + | (static_cast(static_cast(ptr[3])) << 24)); + } +} + +inline uint64_t DecodeFixed64(const char* ptr) { + if (port::kLittleEndian) { + // Load the raw bytes + uint64_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; + } else { + uint64_t lo = DecodeFixed32(ptr); + uint64_t hi = DecodeFixed32(ptr + 4); + return (hi << 32) | lo; + } +} + +// Internal routine for use by fallback path of GetVarint32Ptr +extern const char* GetVarint32PtrFallback(const char* p, + const char* limit, + uint32_t* value); +inline const char* GetVarint32Ptr(const char* p, + const char* limit, + uint32_t* value) { + if (p < limit) { + uint32_t result = *(reinterpret_cast(p)); + if ((result & 128) == 0) { + *value = result; + return p + 1; + } + } + return GetVarint32PtrFallback(p, limit, value); +} + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_CODING_H_ diff --git a/leveldb-library/util/comparator.cc b/leveldb-library/util/comparator.cc new file mode 100644 index 0000000..4b7b572 --- /dev/null +++ b/leveldb-library/util/comparator.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include +#include "leveldb/comparator.h" +#include "leveldb/slice.h" +#include "port/port.h" +#include "util/logging.h" + +namespace leveldb { + +Comparator::~Comparator() { } + +namespace { +class BytewiseComparatorImpl : public Comparator { + public: + BytewiseComparatorImpl() { } + + virtual const char* Name() const { + return "leveldb.BytewiseComparator"; + } + + virtual int Compare(const Slice& a, const Slice& b) const { + return a.compare(b); + } + + virtual void FindShortestSeparator( + std::string* start, + const Slice& limit) const { + // Find length of common prefix + size_t min_length = std::min(start->size(), limit.size()); + size_t diff_index = 0; + while ((diff_index < min_length) && + ((*start)[diff_index] == limit[diff_index])) { + diff_index++; + } + + if (diff_index >= min_length) { + // Do not shorten if one string is a prefix of the other + } else { + uint8_t diff_byte = static_cast((*start)[diff_index]); + if (diff_byte < static_cast(0xff) && + diff_byte + 1 < static_cast(limit[diff_index])) { + (*start)[diff_index]++; + start->resize(diff_index + 1); + assert(Compare(*start, limit) < 0); + } + } + } + + virtual void FindShortSuccessor(std::string* key) const { + // Find first character that can be incremented + size_t n = key->size(); + for (size_t i = 0; i < n; i++) { + const uint8_t byte = (*key)[i]; + if (byte != static_cast(0xff)) { + (*key)[i] = byte + 1; + key->resize(i+1); + return; + } + } + // *key is a run of 0xffs. Leave it alone. + } +}; +} // namespace + +static port::OnceType once = LEVELDB_ONCE_INIT; +static const Comparator* bytewise; + +static void InitModule() { + bytewise = new BytewiseComparatorImpl; +} + +const Comparator* BytewiseComparator() { + port::InitOnce(&once, InitModule); + return bytewise; +} + +} // namespace leveldb diff --git a/leveldb-library/util/crc32c.cc b/leveldb-library/util/crc32c.cc new file mode 100644 index 0000000..edd61cf --- /dev/null +++ b/leveldb-library/util/crc32c.cc @@ -0,0 +1,350 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// A portable implementation of crc32c, optimized to handle +// four bytes at a time. + +#include "util/crc32c.h" + +#include + +#include "port/port.h" +#include "util/coding.h" + +namespace leveldb { +namespace crc32c { + +static const uint32_t table0_[256] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 +}; +static const uint32_t table1_[256] = { + 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, + 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945, + 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, + 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, + 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, + 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4, + 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, + 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, + 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, + 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, + 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, + 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, + 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, + 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, + 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, + 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, + 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, + 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41, + 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, + 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, + 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, + 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, + 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, + 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, + 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, + 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, + 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, + 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, + 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, + 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2, + 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, + 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a, + 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, + 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, + 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, + 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, + 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, + 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, + 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, + 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, + 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, + 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, + 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, + 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306, + 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, + 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, + 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, + 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287, + 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, + 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, + 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, + 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, + 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, + 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, + 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, + 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, + 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, + 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, + 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, + 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502, + 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, + 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, + 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, + 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483 +}; +static const uint32_t table2_[256] = { + 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, + 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469, + 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, + 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, + 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, + 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3, + 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, + 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, + 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, + 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, + 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, + 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, + 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, + 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, + 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, + 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, + 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, + 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0, + 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, + 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75, + 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, + 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, + 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, + 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, + 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, + 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, + 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, + 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, + 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, + 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e, + 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, + 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb, + 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, + 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, + 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, + 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, + 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, + 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, + 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, + 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, + 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, + 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, + 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, + 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb, + 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, + 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, + 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, + 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71, + 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, + 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, + 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, + 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, + 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, + 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, + 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, + 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, + 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, + 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, + 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, + 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622, + 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, + 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d, + 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, + 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8 +}; +static const uint32_t table3_[256] = { + 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, + 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca, + 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, + 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, + 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, + 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7, + 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, + 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, + 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, + 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, + 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, + 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, + 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, + 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, + 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, + 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, + 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, + 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d, + 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, + 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb, + 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, + 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, + 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, + 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, + 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, + 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, + 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, + 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, + 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, + 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b, + 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, + 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d, + 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, + 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, + 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, + 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, + 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, + 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, + 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, + 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, + 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, + 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, + 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, + 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698, + 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, + 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, + 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, + 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, + 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, + 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, + 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, + 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, + 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, + 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, + 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, + 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, + 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, + 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, + 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, + 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f, + 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, + 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4, + 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, + 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842 +}; + +// Used to fetch a naturally-aligned 32-bit word in little endian byte-order +static inline uint32_t LE_LOAD32(const uint8_t *p) { + return DecodeFixed32(reinterpret_cast(p)); +} + +// Determine if the CPU running this program can accelerate the CRC32C +// calculation. +static bool CanAccelerateCRC32C() { + // port::AcceleretedCRC32C returns zero when unable to accelerate. + static const char kTestCRCBuffer[] = "TestCRCBuffer"; + static const char kBufSize = sizeof(kTestCRCBuffer) - 1; + static const uint32_t kTestCRCValue = 0xdcbc59fa; + + return port::AcceleratedCRC32C(0, kTestCRCBuffer, kBufSize) == kTestCRCValue; +} + +uint32_t Extend(uint32_t crc, const char* buf, size_t size) { + static bool accelerate = CanAccelerateCRC32C(); + if (accelerate) { + return port::AcceleratedCRC32C(crc, buf, size); + } + + const uint8_t *p = reinterpret_cast(buf); + const uint8_t *e = p + size; + uint32_t l = crc ^ 0xffffffffu; + +#define STEP1 do { \ + int c = (l & 0xff) ^ *p++; \ + l = table0_[c] ^ (l >> 8); \ +} while (0) +#define STEP4 do { \ + uint32_t c = l ^ LE_LOAD32(p); \ + p += 4; \ + l = table3_[c & 0xff] ^ \ + table2_[(c >> 8) & 0xff] ^ \ + table1_[(c >> 16) & 0xff] ^ \ + table0_[c >> 24]; \ +} while (0) + + // Point x at first 4-byte aligned byte in string. This might be + // just past the end of the string. + const uintptr_t pval = reinterpret_cast(p); + const uint8_t* x = reinterpret_cast(((pval + 3) >> 2) << 2); + if (x <= e) { + // Process bytes until finished or p is 4-byte aligned + while (p != x) { + STEP1; + } + } + // Process bytes 16 at a time + while ((e-p) >= 16) { + STEP4; STEP4; STEP4; STEP4; + } + // Process bytes 4 at a time + while ((e-p) >= 4) { + STEP4; + } + // Process the last few bytes + while (p != e) { + STEP1; + } +#undef STEP4 +#undef STEP1 + return l ^ 0xffffffffu; +} + +} // namespace crc32c +} // namespace leveldb diff --git a/leveldb-library/util/crc32c.h b/leveldb-library/util/crc32c.h new file mode 100644 index 0000000..1d7e5c0 --- /dev/null +++ b/leveldb-library/util/crc32c.h @@ -0,0 +1,45 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_CRC32C_H_ +#define STORAGE_LEVELDB_UTIL_CRC32C_H_ + +#include +#include + +namespace leveldb { +namespace crc32c { + +// Return the crc32c of concat(A, data[0,n-1]) where init_crc is the +// crc32c of some string A. Extend() is often used to maintain the +// crc32c of a stream of data. +extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n); + +// Return the crc32c of data[0,n-1] +inline uint32_t Value(const char* data, size_t n) { + return Extend(0, data, n); +} + +static const uint32_t kMaskDelta = 0xa282ead8ul; + +// Return a masked representation of crc. +// +// Motivation: it is problematic to compute the CRC of a string that +// contains embedded CRCs. Therefore we recommend that CRCs stored +// somewhere (e.g., in files) should be masked before being stored. +inline uint32_t Mask(uint32_t crc) { + // Rotate right by 15 bits and add a constant. + return ((crc >> 15) | (crc << 17)) + kMaskDelta; +} + +// Return the crc whose masked representation is masked_crc. +inline uint32_t Unmask(uint32_t masked_crc) { + uint32_t rot = masked_crc - kMaskDelta; + return ((rot >> 17) | (rot << 15)); +} + +} // namespace crc32c +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_CRC32C_H_ diff --git a/leveldb-library/util/env.cc b/leveldb-library/util/env.cc new file mode 100644 index 0000000..c58a082 --- /dev/null +++ b/leveldb-library/util/env.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/env.h" + +namespace leveldb { + +Env::~Env() { +} + +Status Env::NewAppendableFile(const std::string& fname, WritableFile** result) { + return Status::NotSupported("NewAppendableFile", fname); +} + +SequentialFile::~SequentialFile() { +} + +RandomAccessFile::~RandomAccessFile() { +} + +WritableFile::~WritableFile() { +} + +Logger::~Logger() { +} + +FileLock::~FileLock() { +} + +void Log(Logger* info_log, const char* format, ...) { + if (info_log != NULL) { + va_list ap; + va_start(ap, format); + info_log->Logv(format, ap); + va_end(ap); + } +} + +static Status DoWriteStringToFile(Env* env, const Slice& data, + const std::string& fname, + bool should_sync) { + WritableFile* file; + Status s = env->NewWritableFile(fname, &file); + if (!s.ok()) { + return s; + } + s = file->Append(data); + if (s.ok() && should_sync) { + s = file->Sync(); + } + if (s.ok()) { + s = file->Close(); + } + delete file; // Will auto-close if we did not close above + if (!s.ok()) { + env->DeleteFile(fname); + } + return s; +} + +Status WriteStringToFile(Env* env, const Slice& data, + const std::string& fname) { + return DoWriteStringToFile(env, data, fname, false); +} + +Status WriteStringToFileSync(Env* env, const Slice& data, + const std::string& fname) { + return DoWriteStringToFile(env, data, fname, true); +} + +Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { + data->clear(); + SequentialFile* file; + Status s = env->NewSequentialFile(fname, &file); + if (!s.ok()) { + return s; + } + static const int kBufferSize = 8192; + char* space = new char[kBufferSize]; + while (true) { + Slice fragment; + s = file->Read(kBufferSize, &fragment, space); + if (!s.ok()) { + break; + } + data->append(fragment.data(), fragment.size()); + if (fragment.empty()) { + break; + } + } + delete[] space; + delete file; + return s; +} + +EnvWrapper::~EnvWrapper() { +} + +} // namespace leveldb diff --git a/leveldb-library/util/env_posix.cc b/leveldb-library/util/env_posix.cc new file mode 100644 index 0000000..84aabb2 --- /dev/null +++ b/leveldb-library/util/env_posix.cc @@ -0,0 +1,695 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "leveldb/env.h" +#include "leveldb/slice.h" +#include "port/port.h" +#include "util/logging.h" +#include "util/mutexlock.h" +#include "util/posix_logger.h" +#include "util/env_posix_test_helper.h" + +namespace leveldb { + +namespace { + +static int open_read_only_file_limit = -1; +static int mmap_limit = -1; + +static Status IOError(const std::string& context, int err_number) { + return Status::IOError(context, strerror(err_number)); +} + +// Helper class to limit resource usage to avoid exhaustion. +// Currently used to limit read-only file descriptors and mmap file usage +// so that we do not end up running out of file descriptors, virtual memory, +// or running into kernel performance problems for very large databases. +class Limiter { + public: + // Limit maximum number of resources to |n|. + Limiter(intptr_t n) { + SetAllowed(n); + } + + // If another resource is available, acquire it and return true. + // Else return false. + bool Acquire() { + if (GetAllowed() <= 0) { + return false; + } + MutexLock l(&mu_); + intptr_t x = GetAllowed(); + if (x <= 0) { + return false; + } else { + SetAllowed(x - 1); + return true; + } + } + + // Release a resource acquired by a previous call to Acquire() that returned + // true. + void Release() { + MutexLock l(&mu_); + SetAllowed(GetAllowed() + 1); + } + + private: + port::Mutex mu_; + port::AtomicPointer allowed_; + + intptr_t GetAllowed() const { + return reinterpret_cast(allowed_.Acquire_Load()); + } + + // REQUIRES: mu_ must be held + void SetAllowed(intptr_t v) { + allowed_.Release_Store(reinterpret_cast(v)); + } + + Limiter(const Limiter&); + void operator=(const Limiter&); +}; + +class PosixSequentialFile: public SequentialFile { + private: + std::string filename_; + FILE* file_; + + public: + PosixSequentialFile(const std::string& fname, FILE* f) + : filename_(fname), file_(f) { } + virtual ~PosixSequentialFile() { fclose(file_); } + + virtual Status Read(size_t n, Slice* result, char* scratch) { + Status s; + size_t r = fread_unlocked(scratch, 1, n, file_); + *result = Slice(scratch, r); + if (r < n) { + if (feof(file_)) { + // We leave status as ok if we hit the end of the file + } else { + // A partial read with an error: return a non-ok status + s = IOError(filename_, errno); + } + } + return s; + } + + virtual Status Skip(uint64_t n) { + if (fseek(file_, n, SEEK_CUR)) { + return IOError(filename_, errno); + } + return Status::OK(); + } +}; + +// pread() based random-access +class PosixRandomAccessFile: public RandomAccessFile { + private: + std::string filename_; + bool temporary_fd_; // If true, fd_ is -1 and we open on every read. + int fd_; + Limiter* limiter_; + + public: + PosixRandomAccessFile(const std::string& fname, int fd, Limiter* limiter) + : filename_(fname), fd_(fd), limiter_(limiter) { + temporary_fd_ = !limiter->Acquire(); + if (temporary_fd_) { + // Open file on every access. + close(fd_); + fd_ = -1; + } + } + + virtual ~PosixRandomAccessFile() { + if (!temporary_fd_) { + close(fd_); + limiter_->Release(); + } + } + + virtual Status Read(uint64_t offset, size_t n, Slice* result, + char* scratch) const { + int fd = fd_; + if (temporary_fd_) { + fd = open(filename_.c_str(), O_RDONLY); + if (fd < 0) { + return IOError(filename_, errno); + } + } + + Status s; + ssize_t r = pread(fd, scratch, n, static_cast(offset)); + *result = Slice(scratch, (r < 0) ? 0 : r); + if (r < 0) { + // An error: return a non-ok status + s = IOError(filename_, errno); + } + if (temporary_fd_) { + // Close the temporary file descriptor opened earlier. + close(fd); + } + return s; + } +}; + +// mmap() based random-access +class PosixMmapReadableFile: public RandomAccessFile { + private: + std::string filename_; + void* mmapped_region_; + size_t length_; + Limiter* limiter_; + + public: + // base[0,length-1] contains the mmapped contents of the file. + PosixMmapReadableFile(const std::string& fname, void* base, size_t length, + Limiter* limiter) + : filename_(fname), mmapped_region_(base), length_(length), + limiter_(limiter) { + } + + virtual ~PosixMmapReadableFile() { + munmap(mmapped_region_, length_); + limiter_->Release(); + } + + virtual Status Read(uint64_t offset, size_t n, Slice* result, + char* scratch) const { + Status s; + if (offset + n > length_) { + *result = Slice(); + s = IOError(filename_, EINVAL); + } else { + *result = Slice(reinterpret_cast(mmapped_region_) + offset, n); + } + return s; + } +}; + +class PosixWritableFile : public WritableFile { + private: + std::string filename_; + FILE* file_; + + public: + PosixWritableFile(const std::string& fname, FILE* f) + : filename_(fname), file_(f) { } + + ~PosixWritableFile() { + if (file_ != NULL) { + // Ignoring any potential errors + fclose(file_); + } + } + + virtual Status Append(const Slice& data) { + size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_); + if (r != data.size()) { + return IOError(filename_, errno); + } + return Status::OK(); + } + + virtual Status Close() { + Status result; + if (fclose(file_) != 0) { + result = IOError(filename_, errno); + } + file_ = NULL; + return result; + } + + virtual Status Flush() { + if (fflush_unlocked(file_) != 0) { + return IOError(filename_, errno); + } + return Status::OK(); + } + + Status SyncDirIfManifest() { + const char* f = filename_.c_str(); + const char* sep = strrchr(f, '/'); + Slice basename; + std::string dir; + if (sep == NULL) { + dir = "."; + basename = f; + } else { + dir = std::string(f, sep - f); + basename = sep + 1; + } + Status s; + if (basename.starts_with("MANIFEST")) { + int fd = open(dir.c_str(), O_RDONLY); + if (fd < 0) { + s = IOError(dir, errno); + } else { + if (fsync(fd) < 0) { + s = IOError(dir, errno); + } + close(fd); + } + } + return s; + } + + virtual Status Sync() { + // Ensure new files referred to by the manifest are in the filesystem. + Status s = SyncDirIfManifest(); + if (!s.ok()) { + return s; + } + if (fflush_unlocked(file_) != 0 || + fdatasync(fileno(file_)) != 0) { + s = Status::IOError(filename_, strerror(errno)); + } + return s; + } +}; + +static int LockOrUnlock(int fd, bool lock) { + errno = 0; + struct flock f; + memset(&f, 0, sizeof(f)); + f.l_type = (lock ? F_WRLCK : F_UNLCK); + f.l_whence = SEEK_SET; + f.l_start = 0; + f.l_len = 0; // Lock/unlock entire file + return fcntl(fd, F_SETLK, &f); +} + +class PosixFileLock : public FileLock { + public: + int fd_; + std::string name_; +}; + +// Set of locked files. We keep a separate set instead of just +// relying on fcntrl(F_SETLK) since fcntl(F_SETLK) does not provide +// any protection against multiple uses from the same process. +class PosixLockTable { + private: + port::Mutex mu_; + std::set locked_files_; + public: + bool Insert(const std::string& fname) { + MutexLock l(&mu_); + return locked_files_.insert(fname).second; + } + void Remove(const std::string& fname) { + MutexLock l(&mu_); + locked_files_.erase(fname); + } +}; + +class PosixEnv : public Env { + public: + PosixEnv(); + virtual ~PosixEnv() { + char msg[] = "Destroying Env::Default()\n"; + fwrite(msg, 1, sizeof(msg), stderr); + abort(); + } + + virtual Status NewSequentialFile(const std::string& fname, + SequentialFile** result) { + FILE* f = fopen(fname.c_str(), "r"); + if (f == NULL) { + *result = NULL; + return IOError(fname, errno); + } else { + *result = new PosixSequentialFile(fname, f); + return Status::OK(); + } + } + + virtual Status NewRandomAccessFile(const std::string& fname, + RandomAccessFile** result) { + *result = NULL; + Status s; + int fd = open(fname.c_str(), O_RDONLY); + if (fd < 0) { + s = IOError(fname, errno); + } else if (mmap_limit_.Acquire()) { + uint64_t size; + s = GetFileSize(fname, &size); + if (s.ok()) { + void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + if (base != MAP_FAILED) { + *result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_); + } else { + s = IOError(fname, errno); + } + } + close(fd); + if (!s.ok()) { + mmap_limit_.Release(); + } + } else { + *result = new PosixRandomAccessFile(fname, fd, &fd_limit_); + } + return s; + } + + virtual Status NewWritableFile(const std::string& fname, + WritableFile** result) { + Status s; + FILE* f = fopen(fname.c_str(), "w"); + if (f == NULL) { + *result = NULL; + s = IOError(fname, errno); + } else { + *result = new PosixWritableFile(fname, f); + } + return s; + } + + virtual Status NewAppendableFile(const std::string& fname, + WritableFile** result) { + Status s; + FILE* f = fopen(fname.c_str(), "a"); + if (f == NULL) { + *result = NULL; + s = IOError(fname, errno); + } else { + *result = new PosixWritableFile(fname, f); + } + return s; + } + + virtual bool FileExists(const std::string& fname) { + return access(fname.c_str(), F_OK) == 0; + } + + virtual Status GetChildren(const std::string& dir, + std::vector* result) { + result->clear(); + DIR* d = opendir(dir.c_str()); + if (d == NULL) { + return IOError(dir, errno); + } + struct dirent* entry; + while ((entry = readdir(d)) != NULL) { + result->push_back(entry->d_name); + } + closedir(d); + return Status::OK(); + } + + virtual Status DeleteFile(const std::string& fname) { + Status result; + if (unlink(fname.c_str()) != 0) { + result = IOError(fname, errno); + } + return result; + } + + virtual Status CreateDir(const std::string& name) { + Status result; + if (mkdir(name.c_str(), 0755) != 0) { + result = IOError(name, errno); + } + return result; + } + + virtual Status DeleteDir(const std::string& name) { + Status result; + if (rmdir(name.c_str()) != 0) { + result = IOError(name, errno); + } + return result; + } + + virtual Status GetFileSize(const std::string& fname, uint64_t* size) { + Status s; + struct stat sbuf; + if (stat(fname.c_str(), &sbuf) != 0) { + *size = 0; + s = IOError(fname, errno); + } else { + *size = sbuf.st_size; + } + return s; + } + + virtual Status RenameFile(const std::string& src, const std::string& target) { + Status result; + if (rename(src.c_str(), target.c_str()) != 0) { + result = IOError(src, errno); + } + return result; + } + + virtual Status LockFile(const std::string& fname, FileLock** lock) { + *lock = NULL; + Status result; + int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644); + if (fd < 0) { + result = IOError(fname, errno); + } else if (!locks_.Insert(fname)) { + close(fd); + result = Status::IOError("lock " + fname, "already held by process"); + } else if (LockOrUnlock(fd, true) == -1) { + result = IOError("lock " + fname, errno); + close(fd); + locks_.Remove(fname); + } else { + PosixFileLock* my_lock = new PosixFileLock; + my_lock->fd_ = fd; + my_lock->name_ = fname; + *lock = my_lock; + } + return result; + } + + virtual Status UnlockFile(FileLock* lock) { + PosixFileLock* my_lock = reinterpret_cast(lock); + Status result; + if (LockOrUnlock(my_lock->fd_, false) == -1) { + result = IOError("unlock", errno); + } + locks_.Remove(my_lock->name_); + close(my_lock->fd_); + delete my_lock; + return result; + } + + virtual void Schedule(void (*function)(void*), void* arg); + + virtual void StartThread(void (*function)(void* arg), void* arg); + + virtual Status GetTestDirectory(std::string* result) { + const char* env = getenv("TEST_TMPDIR"); + if (env && env[0] != '\0') { + *result = env; + } else { + char buf[100]; + snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", int(geteuid())); + *result = buf; + } + // Directory may already exist + CreateDir(*result); + return Status::OK(); + } + + static uint64_t gettid() { + pthread_t tid = pthread_self(); + uint64_t thread_id = 0; + memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid))); + return thread_id; + } + + virtual Status NewLogger(const std::string& fname, Logger** result) { + FILE* f = fopen(fname.c_str(), "w"); + if (f == NULL) { + *result = NULL; + return IOError(fname, errno); + } else { + *result = new PosixLogger(f, &PosixEnv::gettid); + return Status::OK(); + } + } + + virtual uint64_t NowMicros() { + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + } + + virtual void SleepForMicroseconds(int micros) { + usleep(micros); + } + + private: + void PthreadCall(const char* label, int result) { + if (result != 0) { + fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); + abort(); + } + } + + // BGThread() is the body of the background thread + void BGThread(); + static void* BGThreadWrapper(void* arg) { + reinterpret_cast(arg)->BGThread(); + return NULL; + } + + pthread_mutex_t mu_; + pthread_cond_t bgsignal_; + pthread_t bgthread_; + bool started_bgthread_; + + // Entry per Schedule() call + struct BGItem { void* arg; void (*function)(void*); }; + typedef std::deque BGQueue; + BGQueue queue_; + + PosixLockTable locks_; + Limiter mmap_limit_; + Limiter fd_limit_; +}; + +// Return the maximum number of concurrent mmaps. +static int MaxMmaps() { + if (mmap_limit >= 0) { + return mmap_limit; + } + // Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes. + mmap_limit = sizeof(void*) >= 8 ? 1000 : 0; + return mmap_limit; +} + +// Return the maximum number of read-only files to keep open. +static intptr_t MaxOpenFiles() { + if (open_read_only_file_limit >= 0) { + return open_read_only_file_limit; + } + struct rlimit rlim; + if (getrlimit(RLIMIT_NOFILE, &rlim)) { + // getrlimit failed, fallback to hard-coded default. + open_read_only_file_limit = 50; + } else if (rlim.rlim_cur == RLIM_INFINITY) { + open_read_only_file_limit = std::numeric_limits::max(); + } else { + // Allow use of 20% of available file descriptors for read-only files. + open_read_only_file_limit = rlim.rlim_cur / 5; + } + return open_read_only_file_limit; +} + +PosixEnv::PosixEnv() + : started_bgthread_(false), + mmap_limit_(MaxMmaps()), + fd_limit_(MaxOpenFiles()) { + PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL)); + PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL)); +} + +void PosixEnv::Schedule(void (*function)(void*), void* arg) { + PthreadCall("lock", pthread_mutex_lock(&mu_)); + + // Start background thread if necessary + if (!started_bgthread_) { + started_bgthread_ = true; + PthreadCall( + "create thread", + pthread_create(&bgthread_, NULL, &PosixEnv::BGThreadWrapper, this)); + } + + // If the queue is currently empty, the background thread may currently be + // waiting. + if (queue_.empty()) { + PthreadCall("signal", pthread_cond_signal(&bgsignal_)); + } + + // Add to priority queue + queue_.push_back(BGItem()); + queue_.back().function = function; + queue_.back().arg = arg; + + PthreadCall("unlock", pthread_mutex_unlock(&mu_)); +} + +void PosixEnv::BGThread() { + while (true) { + // Wait until there is an item that is ready to run + PthreadCall("lock", pthread_mutex_lock(&mu_)); + while (queue_.empty()) { + PthreadCall("wait", pthread_cond_wait(&bgsignal_, &mu_)); + } + + void (*function)(void*) = queue_.front().function; + void* arg = queue_.front().arg; + queue_.pop_front(); + + PthreadCall("unlock", pthread_mutex_unlock(&mu_)); + (*function)(arg); + } +} + +namespace { +struct StartThreadState { + void (*user_function)(void*); + void* arg; +}; +} +static void* StartThreadWrapper(void* arg) { + StartThreadState* state = reinterpret_cast(arg); + state->user_function(state->arg); + delete state; + return NULL; +} + +void PosixEnv::StartThread(void (*function)(void* arg), void* arg) { + pthread_t t; + StartThreadState* state = new StartThreadState; + state->user_function = function; + state->arg = arg; + PthreadCall("start thread", + pthread_create(&t, NULL, &StartThreadWrapper, state)); +} + +} // namespace + +static pthread_once_t once = PTHREAD_ONCE_INIT; +static Env* default_env; +static void InitDefaultEnv() { default_env = new PosixEnv; } + +void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) { + assert(default_env == NULL); + open_read_only_file_limit = limit; +} + +void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) { + assert(default_env == NULL); + mmap_limit = limit; +} + +Env* Env::Default() { + pthread_once(&once, InitDefaultEnv); + return default_env; +} + +} // namespace leveldb diff --git a/leveldb-library/util/env_posix_test_helper.h b/leveldb-library/util/env_posix_test_helper.h new file mode 100644 index 0000000..0386960 --- /dev/null +++ b/leveldb-library/util/env_posix_test_helper.h @@ -0,0 +1,28 @@ +// Copyright 2017 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_ +#define STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_ + +namespace leveldb { + +class EnvPosixTest; + +// A helper for the POSIX Env to facilitate testing. +class EnvPosixTestHelper { + private: + friend class EnvPosixTest; + + // Set the maximum number of read-only files that will be opened. + // Must be called before creating an Env. + static void SetReadOnlyFDLimit(int limit); + + // Set the maximum number of read-only files that will be mapped via mmap. + // Must be called before creating an Env. + static void SetReadOnlyMMapLimit(int limit); +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_ diff --git a/leveldb-library/util/filter_policy.cc b/leveldb-library/util/filter_policy.cc new file mode 100644 index 0000000..7b045c8 --- /dev/null +++ b/leveldb-library/util/filter_policy.cc @@ -0,0 +1,11 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/filter_policy.h" + +namespace leveldb { + +FilterPolicy::~FilterPolicy() { } + +} // namespace leveldb diff --git a/leveldb-library/util/hash.cc b/leveldb-library/util/hash.cc new file mode 100644 index 0000000..ed439ce --- /dev/null +++ b/leveldb-library/util/hash.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include "util/coding.h" +#include "util/hash.h" + +// The FALLTHROUGH_INTENDED macro can be used to annotate implicit fall-through +// between switch labels. The real definition should be provided externally. +// This one is a fallback version for unsupported compilers. +#ifndef FALLTHROUGH_INTENDED +#define FALLTHROUGH_INTENDED do { } while (0) +#endif + +namespace leveldb { + +uint32_t Hash(const char* data, size_t n, uint32_t seed) { + // Similar to murmur hash + const uint32_t m = 0xc6a4a793; + const uint32_t r = 24; + const char* limit = data + n; + uint32_t h = seed ^ (n * m); + + // Pick up four bytes at a time + while (data + 4 <= limit) { + uint32_t w = DecodeFixed32(data); + data += 4; + h += w; + h *= m; + h ^= (h >> 16); + } + + // Pick up remaining bytes + switch (limit - data) { + case 3: + h += static_cast(data[2]) << 16; + FALLTHROUGH_INTENDED; + case 2: + h += static_cast(data[1]) << 8; + FALLTHROUGH_INTENDED; + case 1: + h += static_cast(data[0]); + h *= m; + h ^= (h >> r); + break; + } + return h; +} + + +} // namespace leveldb diff --git a/leveldb-library/util/hash.h b/leveldb-library/util/hash.h new file mode 100644 index 0000000..8889d56 --- /dev/null +++ b/leveldb-library/util/hash.h @@ -0,0 +1,19 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Simple hash function used for internal data structures + +#ifndef STORAGE_LEVELDB_UTIL_HASH_H_ +#define STORAGE_LEVELDB_UTIL_HASH_H_ + +#include +#include + +namespace leveldb { + +extern uint32_t Hash(const char* data, size_t n, uint32_t seed); + +} + +#endif // STORAGE_LEVELDB_UTIL_HASH_H_ diff --git a/leveldb-library/util/histogram.cc b/leveldb-library/util/histogram.cc new file mode 100644 index 0000000..bb95f58 --- /dev/null +++ b/leveldb-library/util/histogram.cc @@ -0,0 +1,139 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include +#include "port/port.h" +#include "util/histogram.h" + +namespace leveldb { + +const double Histogram::kBucketLimit[kNumBuckets] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, + 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, + 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, + 3500, 4000, 4500, 5000, 6000, 7000, 8000, 9000, 10000, 12000, 14000, + 16000, 18000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 60000, + 70000, 80000, 90000, 100000, 120000, 140000, 160000, 180000, 200000, + 250000, 300000, 350000, 400000, 450000, 500000, 600000, 700000, 800000, + 900000, 1000000, 1200000, 1400000, 1600000, 1800000, 2000000, 2500000, + 3000000, 3500000, 4000000, 4500000, 5000000, 6000000, 7000000, 8000000, + 9000000, 10000000, 12000000, 14000000, 16000000, 18000000, 20000000, + 25000000, 30000000, 35000000, 40000000, 45000000, 50000000, 60000000, + 70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000, + 180000000, 200000000, 250000000, 300000000, 350000000, 400000000, + 450000000, 500000000, 600000000, 700000000, 800000000, 900000000, + 1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000, + 2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0, + 5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0, + 1e200, +}; + +void Histogram::Clear() { + min_ = kBucketLimit[kNumBuckets-1]; + max_ = 0; + num_ = 0; + sum_ = 0; + sum_squares_ = 0; + for (int i = 0; i < kNumBuckets; i++) { + buckets_[i] = 0; + } +} + +void Histogram::Add(double value) { + // Linear search is fast enough for our usage in db_bench + int b = 0; + while (b < kNumBuckets - 1 && kBucketLimit[b] <= value) { + b++; + } + buckets_[b] += 1.0; + if (min_ > value) min_ = value; + if (max_ < value) max_ = value; + num_++; + sum_ += value; + sum_squares_ += (value * value); +} + +void Histogram::Merge(const Histogram& other) { + if (other.min_ < min_) min_ = other.min_; + if (other.max_ > max_) max_ = other.max_; + num_ += other.num_; + sum_ += other.sum_; + sum_squares_ += other.sum_squares_; + for (int b = 0; b < kNumBuckets; b++) { + buckets_[b] += other.buckets_[b]; + } +} + +double Histogram::Median() const { + return Percentile(50.0); +} + +double Histogram::Percentile(double p) const { + double threshold = num_ * (p / 100.0); + double sum = 0; + for (int b = 0; b < kNumBuckets; b++) { + sum += buckets_[b]; + if (sum >= threshold) { + // Scale linearly within this bucket + double left_point = (b == 0) ? 0 : kBucketLimit[b-1]; + double right_point = kBucketLimit[b]; + double left_sum = sum - buckets_[b]; + double right_sum = sum; + double pos = (threshold - left_sum) / (right_sum - left_sum); + double r = left_point + (right_point - left_point) * pos; + if (r < min_) r = min_; + if (r > max_) r = max_; + return r; + } + } + return max_; +} + +double Histogram::Average() const { + if (num_ == 0.0) return 0; + return sum_ / num_; +} + +double Histogram::StandardDeviation() const { + if (num_ == 0.0) return 0; + double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_); + return sqrt(variance); +} + +std::string Histogram::ToString() const { + std::string r; + char buf[200]; + snprintf(buf, sizeof(buf), + "Count: %.0f Average: %.4f StdDev: %.2f\n", + num_, Average(), StandardDeviation()); + r.append(buf); + snprintf(buf, sizeof(buf), + "Min: %.4f Median: %.4f Max: %.4f\n", + (num_ == 0.0 ? 0.0 : min_), Median(), max_); + r.append(buf); + r.append("------------------------------------------------------\n"); + const double mult = 100.0 / num_; + double sum = 0; + for (int b = 0; b < kNumBuckets; b++) { + if (buckets_[b] <= 0.0) continue; + sum += buckets_[b]; + snprintf(buf, sizeof(buf), + "[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", + ((b == 0) ? 0.0 : kBucketLimit[b-1]), // left + kBucketLimit[b], // right + buckets_[b], // count + mult * buckets_[b], // percentage + mult * sum); // cumulative percentage + r.append(buf); + + // Add hash marks based on percentage; 20 marks for 100%. + int marks = static_cast(20*(buckets_[b] / num_) + 0.5); + r.append(marks, '#'); + r.push_back('\n'); + } + return r; +} + +} // namespace leveldb diff --git a/leveldb-library/util/histogram.h b/leveldb-library/util/histogram.h new file mode 100644 index 0000000..1ef9f3c --- /dev/null +++ b/leveldb-library/util/histogram.h @@ -0,0 +1,42 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ +#define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ + +#include + +namespace leveldb { + +class Histogram { + public: + Histogram() { } + ~Histogram() { } + + void Clear(); + void Add(double value); + void Merge(const Histogram& other); + + std::string ToString() const; + + private: + double min_; + double max_; + double num_; + double sum_; + double sum_squares_; + + enum { kNumBuckets = 154 }; + static const double kBucketLimit[kNumBuckets]; + double buckets_[kNumBuckets]; + + double Median() const; + double Percentile(double p) const; + double Average() const; + double StandardDeviation() const; +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ diff --git a/leveldb-library/util/logging.cc b/leveldb-library/util/logging.cc new file mode 100644 index 0000000..ca6b324 --- /dev/null +++ b/leveldb-library/util/logging.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/logging.h" + +#include +#include +#include +#include +#include "leveldb/env.h" +#include "leveldb/slice.h" + +namespace leveldb { + +void AppendNumberTo(std::string* str, uint64_t num) { + char buf[30]; + snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num); + str->append(buf); +} + +void AppendEscapedStringTo(std::string* str, const Slice& value) { + for (size_t i = 0; i < value.size(); i++) { + char c = value[i]; + if (c >= ' ' && c <= '~') { + str->push_back(c); + } else { + char buf[10]; + snprintf(buf, sizeof(buf), "\\x%02x", + static_cast(c) & 0xff); + str->append(buf); + } + } +} + +std::string NumberToString(uint64_t num) { + std::string r; + AppendNumberTo(&r, num); + return r; +} + +std::string EscapeString(const Slice& value) { + std::string r; + AppendEscapedStringTo(&r, value); + return r; +} + +bool ConsumeDecimalNumber(Slice* in, uint64_t* val) { + uint64_t v = 0; + int digits = 0; + while (!in->empty()) { + char c = (*in)[0]; + if (c >= '0' && c <= '9') { + ++digits; + const int delta = (c - '0'); + static const uint64_t kMaxUint64 = ~static_cast(0); + if (v > kMaxUint64/10 || + (v == kMaxUint64/10 && delta > kMaxUint64%10)) { + // Overflow + return false; + } + v = (v * 10) + delta; + in->remove_prefix(1); + } else { + break; + } + } + *val = v; + return (digits > 0); +} + +} // namespace leveldb diff --git a/leveldb-library/util/logging.h b/leveldb-library/util/logging.h new file mode 100644 index 0000000..1b450d2 --- /dev/null +++ b/leveldb-library/util/logging.h @@ -0,0 +1,43 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Must not be included from any .h files to avoid polluting the namespace +// with macros. + +#ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_ +#define STORAGE_LEVELDB_UTIL_LOGGING_H_ + +#include +#include +#include +#include "port/port.h" + +namespace leveldb { + +class Slice; +class WritableFile; + +// Append a human-readable printout of "num" to *str +extern void AppendNumberTo(std::string* str, uint64_t num); + +// Append a human-readable printout of "value" to *str. +// Escapes any non-printable characters found in "value". +extern void AppendEscapedStringTo(std::string* str, const Slice& value); + +// Return a human-readable printout of "num" +extern std::string NumberToString(uint64_t num); + +// Return a human-readable version of "value". +// Escapes any non-printable characters found in "value". +extern std::string EscapeString(const Slice& value); + +// Parse a human-readable number from "*in" into *value. On success, +// advances "*in" past the consumed number and sets "*val" to the +// numeric value. Otherwise, returns false and leaves *in in an +// unspecified state. +extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val); + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_LOGGING_H_ diff --git a/leveldb-library/util/mutexlock.h b/leveldb-library/util/mutexlock.h new file mode 100644 index 0000000..1ff5a9e --- /dev/null +++ b/leveldb-library/util/mutexlock.h @@ -0,0 +1,41 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ +#define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ + +#include "port/port.h" +#include "port/thread_annotations.h" + +namespace leveldb { + +// Helper class that locks a mutex on construction and unlocks the mutex when +// the destructor of the MutexLock object is invoked. +// +// Typical usage: +// +// void MyClass::MyMethod() { +// MutexLock l(&mu_); // mu_ is an instance variable +// ... some complex code, possibly with multiple return paths ... +// } + +class SCOPED_LOCKABLE MutexLock { + public: + explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu) + : mu_(mu) { + this->mu_->Lock(); + } + ~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); } + + private: + port::Mutex *const mu_; + // No copying allowed + MutexLock(const MutexLock&); + void operator=(const MutexLock&); +}; + +} // namespace leveldb + + +#endif // STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ diff --git a/leveldb-library/util/options.cc b/leveldb-library/util/options.cc new file mode 100644 index 0000000..b5e6227 --- /dev/null +++ b/leveldb-library/util/options.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/options.h" + +#include "leveldb/comparator.h" +#include "leveldb/env.h" + +namespace leveldb { + +Options::Options() + : comparator(BytewiseComparator()), + create_if_missing(false), + error_if_exists(false), + paranoid_checks(false), + env(Env::Default()), + info_log(NULL), + write_buffer_size(4<<20), + max_open_files(1000), + block_cache(NULL), + block_size(4096), + block_restart_interval(16), + max_file_size(2<<20), + compression(kSnappyCompression), + reuse_logs(false), + filter_policy(NULL) { +} + +} // namespace leveldb diff --git a/leveldb-library/util/posix_logger.h b/leveldb-library/util/posix_logger.h new file mode 100644 index 0000000..9741b1a --- /dev/null +++ b/leveldb-library/util/posix_logger.h @@ -0,0 +1,98 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Logger implementation that can be shared by all environments +// where enough posix functionality is available. + +#ifndef STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ +#define STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ + +#include +#include +#include +#include +#include "leveldb/env.h" + +namespace leveldb { + +class PosixLogger : public Logger { + private: + FILE* file_; + uint64_t (*gettid_)(); // Return the thread id for the current thread + public: + PosixLogger(FILE* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { } + virtual ~PosixLogger() { + fclose(file_); + } + virtual void Logv(const char* format, va_list ap) { + const uint64_t thread_id = (*gettid_)(); + + // We try twice: the first time with a fixed-size stack allocated buffer, + // and the second time with a much larger dynamically allocated buffer. + char buffer[500]; + for (int iter = 0; iter < 2; iter++) { + char* base; + int bufsize; + if (iter == 0) { + bufsize = sizeof(buffer); + base = buffer; + } else { + bufsize = 30000; + base = new char[bufsize]; + } + char* p = base; + char* limit = base + bufsize; + + struct timeval now_tv; + gettimeofday(&now_tv, NULL); + const time_t seconds = now_tv.tv_sec; + struct tm t; + localtime_r(&seconds, &t); + p += snprintf(p, limit - p, + "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", + t.tm_year + 1900, + t.tm_mon + 1, + t.tm_mday, + t.tm_hour, + t.tm_min, + t.tm_sec, + static_cast(now_tv.tv_usec), + static_cast(thread_id)); + + // Print the message + if (p < limit) { + va_list backup_ap; + va_copy(backup_ap, ap); + p += vsnprintf(p, limit - p, format, backup_ap); + va_end(backup_ap); + } + + // Truncate to available space if necessary + if (p >= limit) { + if (iter == 0) { + continue; // Try again with larger buffer + } else { + p = limit - 1; + } + } + + // Add newline if necessary + if (p == base || p[-1] != '\n') { + *p++ = '\n'; + } + + assert(p <= limit); + fwrite(base, 1, p - base, file_); + fflush(file_); + if (base != buffer) { + delete[] base; + } + break; + } + } +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ diff --git a/leveldb-library/util/random.h b/leveldb-library/util/random.h new file mode 100644 index 0000000..ddd51b1 --- /dev/null +++ b/leveldb-library/util/random.h @@ -0,0 +1,64 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_ +#define STORAGE_LEVELDB_UTIL_RANDOM_H_ + +#include + +namespace leveldb { + +// A very simple random number generator. Not especially good at +// generating truly random bits, but good enough for our needs in this +// package. +class Random { + private: + uint32_t seed_; + public: + explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { + // Avoid bad seeds. + if (seed_ == 0 || seed_ == 2147483647L) { + seed_ = 1; + } + } + uint32_t Next() { + static const uint32_t M = 2147483647L; // 2^31-1 + static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 + // We are computing + // seed_ = (seed_ * A) % M, where M = 2^31-1 + // + // seed_ must not be zero or M, or else all subsequent computed values + // will be zero or M respectively. For all other values, seed_ will end + // up cycling through every number in [1,M-1] + uint64_t product = seed_ * A; + + // Compute (product % M) using the fact that ((x << 31) % M) == x. + seed_ = static_cast((product >> 31) + (product & M)); + // The first reduction may overflow by 1 bit, so we may need to + // repeat. mod == M is not possible; using > allows the faster + // sign-bit-based test. + if (seed_ > M) { + seed_ -= M; + } + return seed_; + } + // Returns a uniformly distributed value in the range [0..n-1] + // REQUIRES: n > 0 + uint32_t Uniform(int n) { return Next() % n; } + + // Randomly returns true ~"1/n" of the time, and false otherwise. + // REQUIRES: n > 0 + bool OneIn(int n) { return (Next() % n) == 0; } + + // Skewed: pick "base" uniformly from range [0,max_log] and then + // return "base" random bits. The effect is to pick a number in the + // range [0,2^max_log-1] with exponential bias towards smaller numbers. + uint32_t Skewed(int max_log) { + return Uniform(1 << Uniform(max_log + 1)); + } +}; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_RANDOM_H_ diff --git a/leveldb-library/util/status.cc b/leveldb-library/util/status.cc new file mode 100644 index 0000000..a44f35b --- /dev/null +++ b/leveldb-library/util/status.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include "port/port.h" +#include "leveldb/status.h" + +namespace leveldb { + +const char* Status::CopyState(const char* state) { + uint32_t size; + memcpy(&size, state, sizeof(size)); + char* result = new char[size + 5]; + memcpy(result, state, size + 5); + return result; +} + +Status::Status(Code code, const Slice& msg, const Slice& msg2) { + assert(code != kOk); + const uint32_t len1 = msg.size(); + const uint32_t len2 = msg2.size(); + const uint32_t size = len1 + (len2 ? (2 + len2) : 0); + char* result = new char[size + 5]; + memcpy(result, &size, sizeof(size)); + result[4] = static_cast(code); + memcpy(result + 5, msg.data(), len1); + if (len2) { + result[5 + len1] = ':'; + result[6 + len1] = ' '; + memcpy(result + 7 + len1, msg2.data(), len2); + } + state_ = result; +} + +std::string Status::ToString() const { + if (state_ == NULL) { + return "OK"; + } else { + char tmp[30]; + const char* type; + switch (code()) { + case kOk: + type = "OK"; + break; + case kNotFound: + type = "NotFound: "; + break; + case kCorruption: + type = "Corruption: "; + break; + case kNotSupported: + type = "Not implemented: "; + break; + case kInvalidArgument: + type = "Invalid argument: "; + break; + case kIOError: + type = "IO error: "; + break; + default: + snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", + static_cast(code())); + type = tmp; + break; + } + std::string result(type); + uint32_t length; + memcpy(&length, state_, sizeof(length)); + result.append(state_ + 5, length); + return result; + } +} + +} // namespace leveldb diff --git a/leveldb-library/util/testharness.cc b/leveldb-library/util/testharness.cc new file mode 100644 index 0000000..402fab3 --- /dev/null +++ b/leveldb-library/util/testharness.cc @@ -0,0 +1,77 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/testharness.h" + +#include +#include +#include +#include + +namespace leveldb { +namespace test { + +namespace { +struct Test { + const char* base; + const char* name; + void (*func)(); +}; +std::vector* tests; +} + +bool RegisterTest(const char* base, const char* name, void (*func)()) { + if (tests == NULL) { + tests = new std::vector; + } + Test t; + t.base = base; + t.name = name; + t.func = func; + tests->push_back(t); + return true; +} + +int RunAllTests() { + const char* matcher = getenv("LEVELDB_TESTS"); + + int num = 0; + if (tests != NULL) { + for (size_t i = 0; i < tests->size(); i++) { + const Test& t = (*tests)[i]; + if (matcher != NULL) { + std::string name = t.base; + name.push_back('.'); + name.append(t.name); + if (strstr(name.c_str(), matcher) == NULL) { + continue; + } + } + fprintf(stderr, "==== Test %s.%s\n", t.base, t.name); + (*t.func)(); + ++num; + } + } + fprintf(stderr, "==== PASSED %d tests\n", num); + return 0; +} + +std::string TmpDir() { + std::string dir; + Status s = Env::Default()->GetTestDirectory(&dir); + ASSERT_TRUE(s.ok()) << s.ToString(); + return dir; +} + +int RandomSeed() { + const char* env = getenv("TEST_RANDOM_SEED"); + int result = (env != NULL ? atoi(env) : 301); + if (result <= 0) { + result = 301; + } + return result; +} + +} // namespace test +} // namespace leveldb diff --git a/leveldb-library/util/testharness.h b/leveldb-library/util/testharness.h new file mode 100644 index 0000000..da4fe68 --- /dev/null +++ b/leveldb-library/util/testharness.h @@ -0,0 +1,138 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ +#define STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ + +#include +#include +#include +#include "leveldb/env.h" +#include "leveldb/slice.h" +#include "util/random.h" + +namespace leveldb { +namespace test { + +// Run some of the tests registered by the TEST() macro. If the +// environment variable "LEVELDB_TESTS" is not set, runs all tests. +// Otherwise, runs only the tests whose name contains the value of +// "LEVELDB_TESTS" as a substring. E.g., suppose the tests are: +// TEST(Foo, Hello) { ... } +// TEST(Foo, World) { ... } +// LEVELDB_TESTS=Hello will run the first test +// LEVELDB_TESTS=o will run both tests +// LEVELDB_TESTS=Junk will run no tests +// +// Returns 0 if all tests pass. +// Dies or returns a non-zero value if some test fails. +extern int RunAllTests(); + +// Return the directory to use for temporary storage. +extern std::string TmpDir(); + +// Return a randomization seed for this run. Typically returns the +// same number on repeated invocations of this binary, but automated +// runs may be able to vary the seed. +extern int RandomSeed(); + +// An instance of Tester is allocated to hold temporary state during +// the execution of an assertion. +class Tester { + private: + bool ok_; + const char* fname_; + int line_; + std::stringstream ss_; + + public: + Tester(const char* f, int l) + : ok_(true), fname_(f), line_(l) { + } + + ~Tester() { + if (!ok_) { + fprintf(stderr, "%s:%d:%s\n", fname_, line_, ss_.str().c_str()); + exit(1); + } + } + + Tester& Is(bool b, const char* msg) { + if (!b) { + ss_ << " Assertion failure " << msg; + ok_ = false; + } + return *this; + } + + Tester& IsOk(const Status& s) { + if (!s.ok()) { + ss_ << " " << s.ToString(); + ok_ = false; + } + return *this; + } + +#define BINARY_OP(name,op) \ + template \ + Tester& name(const X& x, const Y& y) { \ + if (! (x op y)) { \ + ss_ << " failed: " << x << (" " #op " ") << y; \ + ok_ = false; \ + } \ + return *this; \ + } + + BINARY_OP(IsEq, ==) + BINARY_OP(IsNe, !=) + BINARY_OP(IsGe, >=) + BINARY_OP(IsGt, >) + BINARY_OP(IsLe, <=) + BINARY_OP(IsLt, <) +#undef BINARY_OP + + // Attach the specified value to the error message if an error has occurred + template + Tester& operator<<(const V& value) { + if (!ok_) { + ss_ << " " << value; + } + return *this; + } +}; + +#define ASSERT_TRUE(c) ::leveldb::test::Tester(__FILE__, __LINE__).Is((c), #c) +#define ASSERT_OK(s) ::leveldb::test::Tester(__FILE__, __LINE__).IsOk((s)) +#define ASSERT_EQ(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsEq((a),(b)) +#define ASSERT_NE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsNe((a),(b)) +#define ASSERT_GE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGe((a),(b)) +#define ASSERT_GT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGt((a),(b)) +#define ASSERT_LE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLe((a),(b)) +#define ASSERT_LT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLt((a),(b)) + +#define TCONCAT(a,b) TCONCAT1(a,b) +#define TCONCAT1(a,b) a##b + +#define TEST(base,name) \ +class TCONCAT(_Test_,name) : public base { \ + public: \ + void _Run(); \ + static void _RunIt() { \ + TCONCAT(_Test_,name) t; \ + t._Run(); \ + } \ +}; \ +bool TCONCAT(_Test_ignored_,name) = \ + ::leveldb::test::RegisterTest(#base, #name, &TCONCAT(_Test_,name)::_RunIt); \ +void TCONCAT(_Test_,name)::_Run() + +// Register the specified test. Typically not used directly, but +// invoked via the macro expansion of TEST. +extern bool RegisterTest(const char* base, const char* name, void (*func)()); + + +} // namespace test +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ diff --git a/leveldb-library/util/testutil.cc b/leveldb-library/util/testutil.cc new file mode 100644 index 0000000..bee56bf --- /dev/null +++ b/leveldb-library/util/testutil.cc @@ -0,0 +1,51 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/testutil.h" + +#include "util/random.h" + +namespace leveldb { +namespace test { + +Slice RandomString(Random* rnd, int len, std::string* dst) { + dst->resize(len); + for (int i = 0; i < len; i++) { + (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' + } + return Slice(*dst); +} + +std::string RandomKey(Random* rnd, int len) { + // Make sure to generate a wide variety of characters so we + // test the boundary conditions for short-key optimizations. + static const char kTestChars[] = { + '\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff' + }; + std::string result; + for (int i = 0; i < len; i++) { + result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; + } + return result; +} + + +extern Slice CompressibleString(Random* rnd, double compressed_fraction, + size_t len, std::string* dst) { + int raw = static_cast(len * compressed_fraction); + if (raw < 1) raw = 1; + std::string raw_data; + RandomString(rnd, raw, &raw_data); + + // Duplicate the random data until we have filled "len" bytes + dst->clear(); + while (dst->size() < len) { + dst->append(raw_data); + } + dst->resize(len); + return Slice(*dst); +} + +} // namespace test +} // namespace leveldb diff --git a/leveldb-library/util/testutil.h b/leveldb-library/util/testutil.h new file mode 100644 index 0000000..d7e4583 --- /dev/null +++ b/leveldb-library/util/testutil.h @@ -0,0 +1,63 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_UTIL_TESTUTIL_H_ +#define STORAGE_LEVELDB_UTIL_TESTUTIL_H_ + +#include "leveldb/env.h" +#include "leveldb/slice.h" +#include "util/random.h" + +namespace leveldb { +namespace test { + +// Store in *dst a random string of length "len" and return a Slice that +// references the generated data. +extern Slice RandomString(Random* rnd, int len, std::string* dst); + +// Return a random key with the specified length that may contain interesting +// characters (e.g. \x00, \xff, etc.). +extern std::string RandomKey(Random* rnd, int len); + +// Store in *dst a string of length "len" that will compress to +// "N*compressed_fraction" bytes and return a Slice that references +// the generated data. +extern Slice CompressibleString(Random* rnd, double compressed_fraction, + size_t len, std::string* dst); + +// A wrapper that allows injection of errors. +class ErrorEnv : public EnvWrapper { + public: + bool writable_file_error_; + int num_writable_file_errors_; + + ErrorEnv() : EnvWrapper(Env::Default()), + writable_file_error_(false), + num_writable_file_errors_(0) { } + + virtual Status NewWritableFile(const std::string& fname, + WritableFile** result) { + if (writable_file_error_) { + ++num_writable_file_errors_; + *result = NULL; + return Status::IOError(fname, "fake error"); + } + return target()->NewWritableFile(fname, result); + } + + virtual Status NewAppendableFile(const std::string& fname, + WritableFile** result) { + if (writable_file_error_) { + ++num_writable_file_errors_; + *result = NULL; + return Status::IOError(fname, "fake error"); + } + return target()->NewAppendableFile(fname, result); + } +}; + +} // namespace test +} // namespace leveldb + +#endif // STORAGE_LEVELDB_UTIL_TESTUTIL_H_