From 3171c9c87c54d07f913c1e38e71ea2d7acce60d7 Mon Sep 17 00:00:00 2001 From: Uwe Klotz Date: Fri, 13 Sep 2019 13:55:16 +0200 Subject: [PATCH 1/6] Add utility functions and typedefs for cache keys --- CMakeLists.txt | 2 ++ build/depends.py | 1 + src/mixxxapplication.cpp | 2 ++ src/test/cache_test.cpp | 41 ++++++++++++++++++++++++++++++++++++++++ src/util/cache.cpp | 31 ++++++++++++++++++++++++++++++ src/util/cache.h | 28 +++++++++++++++++++++++++++ 6 files changed, 105 insertions(+) create mode 100644 src/test/cache_test.cpp create mode 100644 src/util/cache.cpp create mode 100644 src/util/cache.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f11ce8c997e..637f35611db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -526,6 +526,7 @@ add_library(mixxx-lib STATIC EXCLUDE_FROM_ALL src/util/audiosignal.cpp src/util/autohidpi.cpp src/util/battery/battery.cpp + src/util/cache.cpp src/util/cmdlineargs.cpp src/util/color/color.cpp src/util/color/predefinedcolor.cpp @@ -913,6 +914,7 @@ add_executable(mixxx-test src/test/bpmcontrol_test.cpp src/test/broadcastprofile_test.cpp src/test/broadcastsettings_test.cpp + src/test/cache_test.cpp src/test/channelhandle_test.cpp src/test/configobject_test.cpp src/test/controller_preset_validation_test.cpp diff --git a/build/depends.py b/build/depends.py index f558598ac96..9f0a9ebd25d 100644 --- a/build/depends.py +++ b/build/depends.py @@ -1269,6 +1269,7 @@ def sources(self, build): "src/util/xml.cpp", "src/util/tapfilter.cpp", "src/util/movinginterquartilemean.cpp", + "src/util/cache.cpp", "src/util/console.cpp", "src/util/color/color.cpp", "src/util/db/dbconnection.cpp", diff --git a/src/mixxxapplication.cpp b/src/mixxxapplication.cpp index 91cc0586c96..412c342cc15 100644 --- a/src/mixxxapplication.cpp +++ b/src/mixxxapplication.cpp @@ -9,6 +9,7 @@ #include "soundio/soundmanagerutil.h" #include "track/track.h" #include "track/trackref.h" +#include "util/cache.h" #include "util/color/rgbcolor.h" #include "util/math.h" @@ -66,6 +67,7 @@ void MixxxApplication::registerMetaTypes() { qRegisterMetaType>(); qRegisterMetaType(); qRegisterMetaType("mixxx::ReplayGain"); + qRegisterMetaType("mixxx::cache_key_t"); qRegisterMetaType("mixxx::Bpm"); qRegisterMetaType("mixxx::Duration"); qRegisterMetaType>("std::optional"); diff --git a/src/test/cache_test.cpp b/src/test/cache_test.cpp new file mode 100644 index 00000000000..4ba26d8acfe --- /dev/null +++ b/src/test/cache_test.cpp @@ -0,0 +1,41 @@ +#include "util/cache.h" + +#include + +#include + +namespace { + +class CacheTest : public testing::Test { +}; + +TEST_F(CacheTest, InvalidCacheKey) { + EXPECT_FALSE(mixxx::isValidCacheKey( + mixxx::invalidCacheKey())); +} + +TEST_F(CacheTest, InvalidCacheKeyEmptyByteArray) { + EXPECT_FALSE(mixxx::isValidCacheKey( + mixxx::calculateCacheKey(QByteArray()))); + EXPECT_FALSE(mixxx::isValidCacheKey( + mixxx::calculateCacheKey(""))); + EXPECT_FALSE(mixxx::isValidCacheKey( + mixxx::calculateCacheKey("\0"))); +} + +TEST_F(CacheTest, ValidCacheKey) { + EXPECT_TRUE(mixxx::isValidCacheKey( + mixxx::calculateCacheKey(QByteArrayLiteral("test")))); +} + +TEST_F(CacheTest, ValidCacheKeySingleZeroAscii) { + EXPECT_TRUE(mixxx::isValidCacheKey( + mixxx::calculateCacheKey(QByteArray("0")))); +} + +TEST_F(CacheTest, ValidCacheKeySingleZeroByte) { + EXPECT_TRUE(mixxx::isValidCacheKey( + mixxx::calculateCacheKey(QByteArray("\0", 1)))); +} + +} // anonymous namespace diff --git a/src/util/cache.cpp b/src/util/cache.cpp new file mode 100644 index 00000000000..bc298b2b589 --- /dev/null +++ b/src/util/cache.cpp @@ -0,0 +1,31 @@ +#include "util/cache.h" + +#include "util/assert.h" + +namespace mixxx { + +cache_key_t calculateCacheKey(const QByteArray& bytes) { + cache_key_t key = invalidCacheKey(); + DEBUG_ASSERT(!isValidCacheKey(key)); + if (bytes.isEmpty()) { + return key; + } + size_t leftShiftBytes = 0; + for (const auto nextByte : bytes) { + // Only 8 bits are relevant and we don't want the sign + // extension of a (signed) char during conversion. + const cache_key_t nextBits = + static_cast(nextByte); + DEBUG_ASSERT(nextBits == (nextBits & static_cast(0xff))); + key ^= nextBits << (leftShiftBytes * 8); + leftShiftBytes = (leftShiftBytes + 1) % sizeof(cache_key_t); + } + if (!isValidCacheKey(key)) { + // Unlikely but possible + key = ~key; + } + DEBUG_ASSERT(isValidCacheKey(key)); + return key; +} + +} // namespace mixxx diff --git a/src/util/cache.h b/src/util/cache.h new file mode 100644 index 00000000000..560d69f140b --- /dev/null +++ b/src/util/cache.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +namespace mixxx { + +typedef quint64 cache_key_t; + +// A signed integer is needed for storing cache keys as +// integer numbers with full precision in the database. +typedef qint64 cache_key_signed_t; + +inline constexpr cache_key_t invalidCacheKey() { + return 0; +} + +inline constexpr bool isValidCacheKey(cache_key_t key) { + return key != invalidCacheKey(); +} + +// Transforms a byte array containing a hash result into an +// unsigned integer number that should be almost unique. +cache_key_t calculateCacheKey(const QByteArray& bytes); + +} // namespace mixxx + +Q_DECLARE_METATYPE(mixxx::cache_key_t); From 686a229956e10be841f4c97b3023074cd8f9d94f Mon Sep 17 00:00:00 2001 From: Uwe Klotz Date: Tue, 11 Feb 2020 13:54:40 +0100 Subject: [PATCH 2/6] Use a cryptographic hash function for directory hashes --- src/library/dao/libraryhashdao.cpp | 33 ++++++++++++------- src/library/dao/libraryhashdao.h | 11 ++++--- src/library/scanner/importfilestask.cpp | 2 +- src/library/scanner/importfilestask.h | 5 ++- src/library/scanner/libraryscanner.cpp | 14 ++++---- src/library/scanner/libraryscanner.h | 2 +- .../scanner/recursivescandirectorytask.cpp | 12 ++++--- src/library/scanner/scannerglobal.h | 7 ++-- src/library/scanner/scannertask.h | 2 +- 9 files changed, 51 insertions(+), 37 deletions(-) diff --git a/src/library/dao/libraryhashdao.cpp b/src/library/dao/libraryhashdao.cpp index 637f1f49f4b..cc8da1d171c 100644 --- a/src/library/dao/libraryhashdao.cpp +++ b/src/library/dao/libraryhashdao.cpp @@ -8,10 +8,21 @@ #include "libraryhashdao.h" #include "library/queryutil.h" -QHash LibraryHashDAO::getDirectoryHashes() { +namespace { + +// Store hash values as a signed 64-bit integer. Otherwise values greater +// than 2^63-1 would be converted into a floating point numbers while +// losing precision!! +inline mixxx::cache_key_signed_t dbHash(mixxx::cache_key_t hash) { + return static_cast(hash); +} + +} // anonymous namespace + +QHash LibraryHashDAO::getDirectoryHashes() { QSqlQuery query(m_database); query.prepare("SELECT hash, directory_path FROM LibraryHashes"); - QHash hashes; + QHash hashes; if (!query.exec()) { LOG_FAILED_QUERY(query); } @@ -20,15 +31,15 @@ QHash LibraryHashDAO::getDirectoryHashes() { int directoryPathColumn = query.record().indexOf("directory_path"); while (query.next()) { hashes[query.value(directoryPathColumn).toString()] = - query.value(hashColumn).toInt(); + query.value(hashColumn).toULongLong(); } return hashes; } -int LibraryHashDAO::getDirectoryHash(const QString& dirPath) { +mixxx::cache_key_t LibraryHashDAO::getDirectoryHash(const QString& dirPath) { //qDebug() << "LibraryHashDAO::getDirectoryHash" << QThread::currentThread() << m_database.connectionName(); - int hash = -1; + mixxx::cache_key_t hash = mixxx::invalidCacheKey(); QSqlQuery query(m_database); query.prepare("SELECT hash FROM LibraryHashes " @@ -40,7 +51,7 @@ int LibraryHashDAO::getDirectoryHash(const QString& dirPath) { } //Grab a hash for this directory from the database, from the last time it was scanned. if (query.next()) { - hash = query.value(query.record().indexOf("hash")).toInt(); + hash = query.value(query.record().indexOf("hash")).toULongLong(); //qDebug() << "prev hash exists" << hash << dirPath; } else { //qDebug() << "prev hash does not exist" << dirPath; @@ -49,13 +60,13 @@ int LibraryHashDAO::getDirectoryHash(const QString& dirPath) { return hash; } -void LibraryHashDAO::saveDirectoryHash(const QString& dirPath, const int hash) { +void LibraryHashDAO::saveDirectoryHash(const QString& dirPath, mixxx::cache_key_t hash) { //qDebug() << "LibraryHashDAO::saveDirectoryHash" << QThread::currentThread() << m_database.connectionName(); QSqlQuery query(m_database); query.prepare("INSERT INTO LibraryHashes (directory_path, hash, directory_deleted) " "VALUES (:directory_path, :hash, :directory_deleted)"); query.bindValue(":directory_path", dirPath); - query.bindValue(":hash", hash); + query.bindValue(":hash", dbHash(hash)); query.bindValue(":directory_deleted", 0); if (!query.exec()) { @@ -65,8 +76,8 @@ void LibraryHashDAO::saveDirectoryHash(const QString& dirPath, const int hash) { } void LibraryHashDAO::updateDirectoryHash(const QString& dirPath, - const int newHash, - const int dir_deleted) { + mixxx::cache_key_t newHash, + int dir_deleted) { //qDebug() << "LibraryHashDAO::updateDirectoryHash" << QThread::currentThread() << m_database.connectionName(); QSqlQuery query(m_database); // By definition if we have calculated a new hash for a directory then it @@ -75,7 +86,7 @@ void LibraryHashDAO::updateDirectoryHash(const QString& dirPath, "SET hash=:hash, directory_deleted=:directory_deleted, " "needs_verification=0 " "WHERE directory_path=:directory_path"); - query.bindValue(":hash", newHash); + query.bindValue(":hash", dbHash(newHash)); query.bindValue(":directory_deleted", dir_deleted); query.bindValue(":directory_path", dirPath); diff --git a/src/library/dao/libraryhashdao.h b/src/library/dao/libraryhashdao.h index 123a95b2faa..363c01164e2 100644 --- a/src/library/dao/libraryhashdao.h +++ b/src/library/dao/libraryhashdao.h @@ -7,6 +7,7 @@ #include #include "library/dao/dao.h" +#include "util/cache.h" class LibraryHashDAO : public DAO { public: @@ -16,11 +17,11 @@ class LibraryHashDAO : public DAO { m_database = database; }; - QHash getDirectoryHashes(); - int getDirectoryHash(const QString& dirPath); - void saveDirectoryHash(const QString& dirPath, const int hash); - void updateDirectoryHash(const QString& dirPath, const int newHash, - const int dir_deleted); + QHash getDirectoryHashes(); + mixxx::cache_key_t getDirectoryHash(const QString& dirPath); + void saveDirectoryHash(const QString& dirPath, mixxx::cache_key_t hash); + void updateDirectoryHash(const QString& dirPath, mixxx::cache_key_t newHash, + int dir_deleted); void markAsExisting(const QString& dirPath); void invalidateAllDirectories(); void markUnverifiedDirectoriesAsDeleted(); diff --git a/src/library/scanner/importfilestask.cpp b/src/library/scanner/importfilestask.cpp index efdadd80e38..86964dffe3d 100644 --- a/src/library/scanner/importfilestask.cpp +++ b/src/library/scanner/importfilestask.cpp @@ -8,7 +8,7 @@ ImportFilesTask::ImportFilesTask(LibraryScanner* pScanner, const ScannerGlobalPointer scannerGlobal, const QString& dirPath, const bool prevHashExists, - const int newHash, + const mixxx::cache_key_t newHash, const QLinkedList& filesToImport, const QLinkedList& possibleCovers, SecurityTokenPointer pToken) diff --git a/src/library/scanner/importfilestask.h b/src/library/scanner/importfilestask.h index c929be8b22f..c4d5e58f1f0 100644 --- a/src/library/scanner/importfilestask.h +++ b/src/library/scanner/importfilestask.h @@ -6,7 +6,6 @@ #include "util/sandbox.h" #include "library/scanner/scannertask.h" -#include "library/scanner/scannerglobal.h" // Import the provided files. Successful if the scan completed without being // cancelled. False if the scan was cancelled part-way through. @@ -17,7 +16,7 @@ class ImportFilesTask : public ScannerTask { const ScannerGlobalPointer scannerGlobal, const QString& dirPath, const bool prevHashExists, - const int newHash, + const mixxx::cache_key_t newHash, const QLinkedList& filesToImport, const QLinkedList& possibleCovers, SecurityTokenPointer pToken); @@ -28,7 +27,7 @@ class ImportFilesTask : public ScannerTask { private: const QString m_dirPath; const bool m_prevHashExists; - const int m_newHash; + const mixxx::cache_key_t m_newHash; const QLinkedList m_filesToImport; const QLinkedList m_possibleCovers; SecurityTokenPointer m_pToken; diff --git a/src/library/scanner/libraryscanner.cpp b/src/library/scanner/libraryscanner.cpp index e9fca5fd44b..f1995e9644b 100644 --- a/src/library/scanner/libraryscanner.cpp +++ b/src/library/scanner/libraryscanner.cpp @@ -26,9 +26,6 @@ mixxx::Logger kLogger("LibraryScanner"); QAtomicInt s_instanceCounter(0); -const QString kDeleteOrphanedLibraryScannerDirectories = - "delete from LibraryHashes where hash <> 0 and directory_path not in (select directory from track_locations)"; - // Returns the number of affected rows or -1 on error int execCleanupQuery(QSqlDatabase database, const QString& statement) { FwdSqlQuery query(database, statement); @@ -145,8 +142,11 @@ void LibraryScanner::run() { kLogger.info() << "Cleaning up database..."; PerformanceTimer timer; timer.start(); - auto numRows = execCleanupQuery(dbConnection, - kDeleteOrphanedLibraryScannerDirectories); + const auto sqlCmd = + QString("delete from LibraryHashes where hash <> %1 " + "and directory_path not in (select directory from track_locations)") + .arg(mixxx::invalidCacheKey()); + auto numRows = execCleanupQuery(dbConnection, sqlCmd); if (numRows < 0) { kLogger.warning() << "Failed to delete orphaned directory hashes"; @@ -189,7 +189,7 @@ void LibraryScanner::slotStartScan() { changeScannerState(SCANNING); QSet trackLocations = m_trackDao.getTrackLocations(); - QHash directoryHashes = m_libraryHashDao.getDirectoryHashes(); + QHash directoryHashes = m_libraryHashDao.getDirectoryHashes(); QRegExp extensionFilter(SoundSourceProxy::getSupportedFileNamesRegex()); QRegExp coverExtensionFilter = QRegExp(CoverArtUtils::supportedCoverArtExtensionsRegex(), @@ -515,7 +515,7 @@ void LibraryScanner::queueTask(ScannerTask* pTask) { } void LibraryScanner::slotDirectoryHashedAndScanned(const QString& directoryPath, - bool newDirectory, int hash) { + bool newDirectory, mixxx::cache_key_t hash) { ScopedTimer timer("LibraryScanner::slotDirectoryHashedAndScanned"); //kLogger.debug() << "sloDirectoryHashedAndScanned" << directoryPath // << newDirectory << hash; diff --git a/src/library/scanner/libraryscanner.h b/src/library/scanner/libraryscanner.h index 80edfb0991e..4761e7f5777 100644 --- a/src/library/scanner/libraryscanner.h +++ b/src/library/scanner/libraryscanner.h @@ -69,7 +69,7 @@ class LibraryScanner : public QThread { // ScannerTask signal handlers. void slotDirectoryHashedAndScanned(const QString& directoryPath, - bool newDirectory, int hash); + bool newDirectory, mixxx::cache_key_t hash); void slotDirectoryUnchanged(const QString& directoryPath); void slotTrackExists(const QString& trackPath); void slotAddNewTrack(const QString& trackPath); diff --git a/src/library/scanner/recursivescandirectorytask.cpp b/src/library/scanner/recursivescandirectorytask.cpp index 3b133982334..d08c4c71714 100644 --- a/src/library/scanner/recursivescandirectorytask.cpp +++ b/src/library/scanner/recursivescandirectorytask.cpp @@ -1,3 +1,4 @@ +#include #include #include "library/scanner/recursivescandirectorytask.h" @@ -38,7 +39,8 @@ void RecursiveScanDirectoryTask::run() { QLinkedList filesToImport; QLinkedList possibleCovers; QLinkedList dirsToScan; - QStringList newHashStr; + + QCryptographicHash hasher(QCryptographicHash::Sha256); // TODO(rryan) benchmark QRegExp copy versus QMutex/QRegExp in ScannerGlobal // versus slicing the extension off and checking for set/list containment. @@ -54,7 +56,7 @@ void RecursiveScanDirectoryTask::run() { if (currentFileInfo.isFile()) { const QString& fileName = currentFileInfo.fileName(); if (supportedExtensionsRegex.indexIn(fileName) != -1) { - newHashStr.append(currentFile); + hasher.addData(currentFile.toUtf8()); filesToImport.append(currentFileInfo); } else if (supportedCoverExtensionsRegex.indexIn(fileName) != -1) { possibleCovers.append(currentFileInfo); @@ -73,13 +75,13 @@ void RecursiveScanDirectoryTask::run() { // Note: A hash of "0" is a real hash if the directory contains no files! // Calculate a hash of the directory's file list. - int newHash = qHash(newHashStr.join("")); + const mixxx::cache_key_t newHash = mixxx::calculateCacheKey(hasher.result()); QString dirPath = m_dir.path(); // Try to retrieve a hash from the last time that directory was scanned. - int prevHash = m_scannerGlobal->directoryHashInDatabase(dirPath); - bool prevHashExists = prevHash != -1; + const mixxx::cache_key_t prevHash = m_scannerGlobal->directoryHashInDatabase(dirPath); + const bool prevHashExists = mixxx::isValidCacheKey(prevHash); if (prevHashExists || m_scanUnhashed) { // Compare the hashes, and if they don't match, rescan the files in that diff --git a/src/library/scanner/scannerglobal.h b/src/library/scanner/scannerglobal.h index ffb18465eac..b774227b3b6 100644 --- a/src/library/scanner/scannerglobal.h +++ b/src/library/scanner/scannerglobal.h @@ -9,6 +9,7 @@ #include #include +#include "util/cache.h" #include "util/task.h" #include "util/performancetimer.h" @@ -37,7 +38,7 @@ class DirInfo { class ScannerGlobal { public: ScannerGlobal(const QSet& trackLocations, - const QHash& directoryHashes, + const QHash& directoryHashes, const QRegExp& supportedExtensionsMatcher, const QRegExp& supportedCoverExtensionsMatcher, const QStringList& directoriesBlacklist) @@ -62,7 +63,7 @@ class ScannerGlobal { } // Returns the directory hash if it exists or -1 if it doesn't. - inline int directoryHashInDatabase(const QString& directoryPath) const { + inline mixxx::cache_key_t directoryHashInDatabase(const QString& directoryPath) const { return m_directoryHashes.value(directoryPath, -1); } @@ -177,7 +178,7 @@ class ScannerGlobal { TaskWatcher m_watcher; QSet m_trackLocations; - QHash m_directoryHashes; + QHash m_directoryHashes; mutable QMutex m_supportedExtensionsMatcherMutex; QRegExp m_supportedExtensionsMatcher; diff --git a/src/library/scanner/scannertask.h b/src/library/scanner/scannertask.h index 22db07c462f..1a3bc869a2f 100644 --- a/src/library/scanner/scannertask.h +++ b/src/library/scanner/scannertask.h @@ -22,7 +22,7 @@ class ScannerTask : public QObject, public QRunnable { void taskDone(bool success); void queueTask(ScannerTask* pTask); void directoryHashedAndScanned(const QString& directoryPath, - bool newDirectory, int hash); + bool newDirectory, mixxx::cache_key_t hash); void directoryUnchanged(const QString& directoryPath); void trackExists(const QString& filePath); void addNewTrack(const QString& filePath); From 2ebaedeb5f28c91bd8efb81061c0222b2f5dfdca Mon Sep 17 00:00:00 2001 From: Uwe Klotz Date: Wed, 12 Feb 2020 09:04:33 +0100 Subject: [PATCH 3/6] Add reasoning for cache key calculation --- src/util/cache.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/util/cache.cpp b/src/util/cache.cpp index bc298b2b589..b8e2c4d9c55 100644 --- a/src/util/cache.cpp +++ b/src/util/cache.cpp @@ -10,6 +10,12 @@ cache_key_t calculateCacheKey(const QByteArray& bytes) { if (bytes.isEmpty()) { return key; } + // We should not make any assumptions about the significance of + // the given bytes, even if they are supposed to be the result of + // applying a hash function. Instead of truncating the information + // by using only the first sizeof(cache_key_t) bytes we slice the + // array into groups of sizeof(cache_key_t) bytes and combine them + // with XOR. size_t leftShiftBytes = 0; for (const auto nextByte : bytes) { // Only 8 bits are relevant and we don't want the sign From 44ee5dfaaa1e5ad6ebee4b26c67ba7941673401a Mon Sep 17 00:00:00 2001 From: Uwe Klotz Date: Wed, 12 Feb 2020 09:05:17 +0100 Subject: [PATCH 4/6] Use prepare/bind/exec for cleanup SQL query --- src/library/scanner/libraryscanner.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/library/scanner/libraryscanner.cpp b/src/library/scanner/libraryscanner.cpp index f1995e9644b..cd48824ce35 100644 --- a/src/library/scanner/libraryscanner.cpp +++ b/src/library/scanner/libraryscanner.cpp @@ -27,8 +27,7 @@ mixxx::Logger kLogger("LibraryScanner"); QAtomicInt s_instanceCounter(0); // Returns the number of affected rows or -1 on error -int execCleanupQuery(QSqlDatabase database, const QString& statement) { - FwdSqlQuery query(database, statement); +int execCleanupQuery(FwdSqlQuery& query) { VERIFY_OR_DEBUG_ASSERT(query.isPrepared()) { return -1; } @@ -139,14 +138,19 @@ void LibraryScanner::run() { // Clean up the database and fix inconsistencies from previous runs. // See also: https://bugs.launchpad.net/mixxx/+bug/1846945 { - kLogger.info() << "Cleaning up database..."; + kLogger.info() + << "Cleaning up database..."; PerformanceTimer timer; timer.start(); - const auto sqlCmd = - QString("delete from LibraryHashes where hash <> %1 " - "and directory_path not in (select directory from track_locations)") - .arg(mixxx::invalidCacheKey()); - auto numRows = execCleanupQuery(dbConnection, sqlCmd); + const auto sqlStmt = QStringLiteral( + "DELETE FROM LibraryHashes WHERE hash <> :unequalHash " + "AND directory_path NOT IN " + "(SELECT directory FROM track_locations)"); + FwdSqlQuery query(dbConnection, sqlStmt); + query.bindValue( + QStringLiteral(":unequalHash"), + static_cast(mixxx::invalidCacheKey())); + auto numRows = execCleanupQuery(query); if (numRows < 0) { kLogger.warning() << "Failed to delete orphaned directory hashes"; From 160618c3ad5ee7f9ccdfbe3fc0c721df1dd92742 Mon Sep 17 00:00:00 2001 From: Uwe Klotz Date: Wed, 12 Feb 2020 12:05:38 +0100 Subject: [PATCH 5/6] Truncate message digest into cache key --- src/library/dao/libraryhashdao.cpp | 4 +-- .../scanner/recursivescandirectorytask.cpp | 2 +- src/test/cache_test.cpp | 12 +++---- src/util/cache.cpp | 33 ++++++++++--------- src/util/cache.h | 14 ++++++-- 5 files changed, 37 insertions(+), 28 deletions(-) diff --git a/src/library/dao/libraryhashdao.cpp b/src/library/dao/libraryhashdao.cpp index cc8da1d171c..32cc824cb0b 100644 --- a/src/library/dao/libraryhashdao.cpp +++ b/src/library/dao/libraryhashdao.cpp @@ -13,8 +13,8 @@ namespace { // Store hash values as a signed 64-bit integer. Otherwise values greater // than 2^63-1 would be converted into a floating point numbers while // losing precision!! -inline mixxx::cache_key_signed_t dbHash(mixxx::cache_key_t hash) { - return static_cast(hash); +inline constexpr mixxx::cache_key_signed_t dbHash(mixxx::cache_key_t hash) { + return mixxx::signedCacheKey(hash); } } // anonymous namespace diff --git a/src/library/scanner/recursivescandirectorytask.cpp b/src/library/scanner/recursivescandirectorytask.cpp index d08c4c71714..dcef6e15107 100644 --- a/src/library/scanner/recursivescandirectorytask.cpp +++ b/src/library/scanner/recursivescandirectorytask.cpp @@ -75,7 +75,7 @@ void RecursiveScanDirectoryTask::run() { // Note: A hash of "0" is a real hash if the directory contains no files! // Calculate a hash of the directory's file list. - const mixxx::cache_key_t newHash = mixxx::calculateCacheKey(hasher.result()); + const mixxx::cache_key_t newHash = mixxx::cacheKeyFromMessageDigest(hasher.result()); QString dirPath = m_dir.path(); diff --git a/src/test/cache_test.cpp b/src/test/cache_test.cpp index 4ba26d8acfe..d6f59f7ce43 100644 --- a/src/test/cache_test.cpp +++ b/src/test/cache_test.cpp @@ -16,26 +16,26 @@ TEST_F(CacheTest, InvalidCacheKey) { TEST_F(CacheTest, InvalidCacheKeyEmptyByteArray) { EXPECT_FALSE(mixxx::isValidCacheKey( - mixxx::calculateCacheKey(QByteArray()))); + mixxx::cacheKeyFromMessageDigest(QByteArray()))); EXPECT_FALSE(mixxx::isValidCacheKey( - mixxx::calculateCacheKey(""))); + mixxx::cacheKeyFromMessageDigest(""))); EXPECT_FALSE(mixxx::isValidCacheKey( - mixxx::calculateCacheKey("\0"))); + mixxx::cacheKeyFromMessageDigest("\0"))); } TEST_F(CacheTest, ValidCacheKey) { EXPECT_TRUE(mixxx::isValidCacheKey( - mixxx::calculateCacheKey(QByteArrayLiteral("test")))); + mixxx::cacheKeyFromMessageDigest(QByteArrayLiteral("test")))); } TEST_F(CacheTest, ValidCacheKeySingleZeroAscii) { EXPECT_TRUE(mixxx::isValidCacheKey( - mixxx::calculateCacheKey(QByteArray("0")))); + mixxx::cacheKeyFromMessageDigest(QByteArray("0")))); } TEST_F(CacheTest, ValidCacheKeySingleZeroByte) { EXPECT_TRUE(mixxx::isValidCacheKey( - mixxx::calculateCacheKey(QByteArray("\0", 1)))); + mixxx::cacheKeyFromMessageDigest(QByteArray("\0", 1)))); } } // anonymous namespace diff --git a/src/util/cache.cpp b/src/util/cache.cpp index b8e2c4d9c55..021efc19b5e 100644 --- a/src/util/cache.cpp +++ b/src/util/cache.cpp @@ -1,30 +1,31 @@ #include "util/cache.h" #include "util/assert.h" +#include "util/math.h" namespace mixxx { -cache_key_t calculateCacheKey(const QByteArray& bytes) { +cache_key_t cacheKeyFromMessageDigest(const QByteArray& messageDigest) { cache_key_t key = invalidCacheKey(); DEBUG_ASSERT(!isValidCacheKey(key)); - if (bytes.isEmpty()) { + if (messageDigest.isEmpty()) { return key; } - // We should not make any assumptions about the significance of - // the given bytes, even if they are supposed to be the result of - // applying a hash function. Instead of truncating the information - // by using only the first sizeof(cache_key_t) bytes we slice the - // array into groups of sizeof(cache_key_t) bytes and combine them - // with XOR. - size_t leftShiftBytes = 0; - for (const auto nextByte : bytes) { + // Source: FIPS 180-4 Secure Hash Standard (SHS) + // SP 800-107 + // 5 Hash function Usage + // 5.1 Truncated Message Digest + const auto significantByteCount = math_min( + messageDigest.size(), + static_cast(sizeof(cache_key_t))); + for (auto i = 0; i < significantByteCount; ++i) { // Only 8 bits are relevant and we don't want the sign - // extension of a (signed) char during conversion. - const cache_key_t nextBits = - static_cast(nextByte); - DEBUG_ASSERT(nextBits == (nextBits & static_cast(0xff))); - key ^= nextBits << (leftShiftBytes * 8); - leftShiftBytes = (leftShiftBytes + 1) % sizeof(cache_key_t); + // extension of a (signed) char during the conversion. + const cache_key_t nextByte = + static_cast(messageDigest.at(i)); + DEBUG_ASSERT(nextByte == (nextByte & 0xFF)); + key <<= 8; + key |= nextByte; } if (!isValidCacheKey(key)) { // Unlikely but possible diff --git a/src/util/cache.h b/src/util/cache.h index 560d69f140b..8a26615a5b9 100644 --- a/src/util/cache.h +++ b/src/util/cache.h @@ -2,6 +2,7 @@ #include #include +#include // static_assert namespace mixxx { @@ -19,9 +20,16 @@ inline constexpr bool isValidCacheKey(cache_key_t key) { return key != invalidCacheKey(); } -// Transforms a byte array containing a hash result into an -// unsigned integer number that should be almost unique. -cache_key_t calculateCacheKey(const QByteArray& bytes); +inline constexpr cache_key_t signedCacheKey(cache_key_t cacheKey) { + static_assert(sizeof(cache_key_t) == sizeof(cache_key_signed_t)); + return static_cast(cacheKey); +} + +// Truncate a message digest to obtain a cache key. +// +// The message digest should either be empty or contain at least as many +// bytes as the size (in bytes) of the cache key. +cache_key_t cacheKeyFromMessageDigest(const QByteArray& messageDigest); } // namespace mixxx From 5c6996d31494ca5accb069ec1e4a2123f3c63870 Mon Sep 17 00:00:00 2001 From: Uwe Klotz Date: Wed, 12 Feb 2020 18:29:07 +0100 Subject: [PATCH 6/6] Use static_assert from C++11 --- src/util/cache.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/util/cache.h b/src/util/cache.h index 8a26615a5b9..c4b4844ff72 100644 --- a/src/util/cache.h +++ b/src/util/cache.h @@ -21,7 +21,9 @@ inline constexpr bool isValidCacheKey(cache_key_t key) { } inline constexpr cache_key_t signedCacheKey(cache_key_t cacheKey) { - static_assert(sizeof(cache_key_t) == sizeof(cache_key_signed_t)); + static_assert( + sizeof(cache_key_t) == sizeof(cache_key_signed_t), + "size mismatch of signed/unsigned cache key types"); return static_cast(cacheKey); }