Skip to content

Commit

Permalink
Merge pull request #2497 from uklotzde/libraryscannerhash
Browse files Browse the repository at this point in the history
LibraryScanner: Improve hashing of directory contents
  • Loading branch information
daschuer authored Feb 18, 2020
2 parents 2f64246 + 866704d commit 030e792
Show file tree
Hide file tree
Showing 15 changed files with 180 additions and 40 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ add_library(mixxx-lib STATIC EXCLUDE_FROM_ALL
src/util/audiosignal.cpp
src/util/autohidpi.cpp
src/util/battery/battery.cpp
src/util/cache.cpp
src/util/cmdlineargs.cpp
src/util/color/color.cpp
src/util/color/predefinedcolor.cpp
Expand Down Expand Up @@ -918,6 +919,7 @@ add_executable(mixxx-test
src/test/bpmcontrol_test.cpp
src/test/broadcastprofile_test.cpp
src/test/broadcastsettings_test.cpp
src/test/cache_test.cpp
src/test/channelhandle_test.cpp
src/test/configobject_test.cpp
src/test/controller_preset_validation_test.cpp
Expand Down
1 change: 1 addition & 0 deletions build/depends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1274,6 +1274,7 @@ def sources(self, build):
"src/util/xml.cpp",
"src/util/tapfilter.cpp",
"src/util/movinginterquartilemean.cpp",
"src/util/cache.cpp",
"src/util/console.cpp",
"src/util/color/color.cpp",
"src/util/db/dbconnection.cpp",
Expand Down
33 changes: 22 additions & 11 deletions src/library/dao/libraryhashdao.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,21 @@
#include "libraryhashdao.h"
#include "library/queryutil.h"

QHash<QString, int> LibraryHashDAO::getDirectoryHashes() {
namespace {

// Store hash values as a signed 64-bit integer. Otherwise values greater
// than 2^63-1 would be converted into a floating point numbers while
// losing precision!!
inline constexpr mixxx::cache_key_signed_t dbHash(mixxx::cache_key_t hash) {
return mixxx::signedCacheKey(hash);
}

} // anonymous namespace

QHash<QString, mixxx::cache_key_t> LibraryHashDAO::getDirectoryHashes() {
QSqlQuery query(m_database);
query.prepare("SELECT hash, directory_path FROM LibraryHashes");
QHash<QString, int> hashes;
QHash<QString, mixxx::cache_key_t> hashes;
if (!query.exec()) {
LOG_FAILED_QUERY(query);
}
Expand All @@ -20,15 +31,15 @@ QHash<QString, int> LibraryHashDAO::getDirectoryHashes() {
int directoryPathColumn = query.record().indexOf("directory_path");
while (query.next()) {
hashes[query.value(directoryPathColumn).toString()] =
query.value(hashColumn).toInt();
query.value(hashColumn).toULongLong();
}

return hashes;
}

int LibraryHashDAO::getDirectoryHash(const QString& dirPath) {
mixxx::cache_key_t LibraryHashDAO::getDirectoryHash(const QString& dirPath) {
//qDebug() << "LibraryHashDAO::getDirectoryHash" << QThread::currentThread() << m_database.connectionName();
int hash = -1;
mixxx::cache_key_t hash = mixxx::invalidCacheKey();

QSqlQuery query(m_database);
query.prepare("SELECT hash FROM LibraryHashes "
Expand All @@ -40,7 +51,7 @@ int LibraryHashDAO::getDirectoryHash(const QString& dirPath) {
}
//Grab a hash for this directory from the database, from the last time it was scanned.
if (query.next()) {
hash = query.value(query.record().indexOf("hash")).toInt();
hash = query.value(query.record().indexOf("hash")).toULongLong();
//qDebug() << "prev hash exists" << hash << dirPath;
} else {
//qDebug() << "prev hash does not exist" << dirPath;
Expand All @@ -49,13 +60,13 @@ int LibraryHashDAO::getDirectoryHash(const QString& dirPath) {
return hash;
}

void LibraryHashDAO::saveDirectoryHash(const QString& dirPath, const int hash) {
void LibraryHashDAO::saveDirectoryHash(const QString& dirPath, mixxx::cache_key_t hash) {
//qDebug() << "LibraryHashDAO::saveDirectoryHash" << QThread::currentThread() << m_database.connectionName();
QSqlQuery query(m_database);
query.prepare("INSERT INTO LibraryHashes (directory_path, hash, directory_deleted) "
"VALUES (:directory_path, :hash, :directory_deleted)");
query.bindValue(":directory_path", dirPath);
query.bindValue(":hash", hash);
query.bindValue(":hash", dbHash(hash));
query.bindValue(":directory_deleted", 0);

if (!query.exec()) {
Expand All @@ -65,8 +76,8 @@ void LibraryHashDAO::saveDirectoryHash(const QString& dirPath, const int hash) {
}

void LibraryHashDAO::updateDirectoryHash(const QString& dirPath,
const int newHash,
const int dir_deleted) {
mixxx::cache_key_t newHash,
int dir_deleted) {
//qDebug() << "LibraryHashDAO::updateDirectoryHash" << QThread::currentThread() << m_database.connectionName();
QSqlQuery query(m_database);
// By definition if we have calculated a new hash for a directory then it
Expand All @@ -75,7 +86,7 @@ void LibraryHashDAO::updateDirectoryHash(const QString& dirPath,
"SET hash=:hash, directory_deleted=:directory_deleted, "
"needs_verification=0 "
"WHERE directory_path=:directory_path");
query.bindValue(":hash", newHash);
query.bindValue(":hash", dbHash(newHash));
query.bindValue(":directory_deleted", dir_deleted);
query.bindValue(":directory_path", dirPath);

Expand Down
11 changes: 6 additions & 5 deletions src/library/dao/libraryhashdao.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <QSqlDatabase>

#include "library/dao/dao.h"
#include "util/cache.h"

class LibraryHashDAO : public DAO {
public:
Expand All @@ -16,11 +17,11 @@ class LibraryHashDAO : public DAO {
m_database = database;
};

QHash<QString, int> getDirectoryHashes();
int getDirectoryHash(const QString& dirPath);
void saveDirectoryHash(const QString& dirPath, const int hash);
void updateDirectoryHash(const QString& dirPath, const int newHash,
const int dir_deleted);
QHash<QString, mixxx::cache_key_t> getDirectoryHashes();
mixxx::cache_key_t getDirectoryHash(const QString& dirPath);
void saveDirectoryHash(const QString& dirPath, mixxx::cache_key_t hash);
void updateDirectoryHash(const QString& dirPath, mixxx::cache_key_t newHash,
int dir_deleted);
void markAsExisting(const QString& dirPath);
void invalidateAllDirectories();
void markUnverifiedDirectoriesAsDeleted();
Expand Down
2 changes: 1 addition & 1 deletion src/library/scanner/importfilestask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ImportFilesTask::ImportFilesTask(LibraryScanner* pScanner,
const ScannerGlobalPointer scannerGlobal,
const QString& dirPath,
const bool prevHashExists,
const int newHash,
const mixxx::cache_key_t newHash,
const QLinkedList<QFileInfo>& filesToImport,
const QLinkedList<QFileInfo>& possibleCovers,
SecurityTokenPointer pToken)
Expand Down
5 changes: 2 additions & 3 deletions src/library/scanner/importfilestask.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include "util/sandbox.h"
#include "library/scanner/scannertask.h"
#include "library/scanner/scannerglobal.h"

// Import the provided files. Successful if the scan completed without being
// cancelled. False if the scan was cancelled part-way through.
Expand All @@ -17,7 +16,7 @@ class ImportFilesTask : public ScannerTask {
const ScannerGlobalPointer scannerGlobal,
const QString& dirPath,
const bool prevHashExists,
const int newHash,
const mixxx::cache_key_t newHash,
const QLinkedList<QFileInfo>& filesToImport,
const QLinkedList<QFileInfo>& possibleCovers,
SecurityTokenPointer pToken);
Expand All @@ -28,7 +27,7 @@ class ImportFilesTask : public ScannerTask {
private:
const QString m_dirPath;
const bool m_prevHashExists;
const int m_newHash;
const mixxx::cache_key_t m_newHash;
const QLinkedList<QFileInfo> m_filesToImport;
const QLinkedList<QFileInfo> m_possibleCovers;
SecurityTokenPointer m_pToken;
Expand Down
24 changes: 14 additions & 10 deletions src/library/scanner/libraryscanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,8 @@ mixxx::Logger kLogger("LibraryScanner");

QAtomicInt s_instanceCounter(0);

const QString kDeleteOrphanedLibraryScannerDirectories =
"delete from LibraryHashes where hash <> 0 and directory_path not in (select directory from track_locations)";

// Returns the number of affected rows or -1 on error
int execCleanupQuery(QSqlDatabase database, const QString& statement) {
FwdSqlQuery query(database, statement);
int execCleanupQuery(FwdSqlQuery& query) {
VERIFY_OR_DEBUG_ASSERT(query.isPrepared()) {
return -1;
}
Expand Down Expand Up @@ -142,11 +138,19 @@ void LibraryScanner::run() {
// Clean up the database and fix inconsistencies from previous runs.
// See also: https://bugs.launchpad.net/mixxx/+bug/1846945
{
kLogger.info() << "Cleaning up database...";
kLogger.info()
<< "Cleaning up database...";
PerformanceTimer timer;
timer.start();
auto numRows = execCleanupQuery(dbConnection,
kDeleteOrphanedLibraryScannerDirectories);
const auto sqlStmt = QStringLiteral(
"DELETE FROM LibraryHashes WHERE hash <> :unequalHash "
"AND directory_path NOT IN "
"(SELECT directory FROM track_locations)");
FwdSqlQuery query(dbConnection, sqlStmt);
query.bindValue(
QStringLiteral(":unequalHash"),
static_cast<mixxx::cache_key_signed_t>(mixxx::invalidCacheKey()));
auto numRows = execCleanupQuery(query);
if (numRows < 0) {
kLogger.warning()
<< "Failed to delete orphaned directory hashes";
Expand Down Expand Up @@ -189,7 +193,7 @@ void LibraryScanner::slotStartScan() {
changeScannerState(SCANNING);

QSet<QString> trackLocations = m_trackDao.getTrackLocations();
QHash<QString, int> directoryHashes = m_libraryHashDao.getDirectoryHashes();
QHash<QString, mixxx::cache_key_t> directoryHashes = m_libraryHashDao.getDirectoryHashes();
QRegExp extensionFilter(SoundSourceProxy::getSupportedFileNamesRegex());
QRegExp coverExtensionFilter =
QRegExp(CoverArtUtils::supportedCoverArtExtensionsRegex(),
Expand Down Expand Up @@ -515,7 +519,7 @@ void LibraryScanner::queueTask(ScannerTask* pTask) {
}

void LibraryScanner::slotDirectoryHashedAndScanned(const QString& directoryPath,
bool newDirectory, int hash) {
bool newDirectory, mixxx::cache_key_t hash) {
ScopedTimer timer("LibraryScanner::slotDirectoryHashedAndScanned");
//kLogger.debug() << "sloDirectoryHashedAndScanned" << directoryPath
// << newDirectory << hash;
Expand Down
2 changes: 1 addition & 1 deletion src/library/scanner/libraryscanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class LibraryScanner : public QThread {

// ScannerTask signal handlers.
void slotDirectoryHashedAndScanned(const QString& directoryPath,
bool newDirectory, int hash);
bool newDirectory, mixxx::cache_key_t hash);
void slotDirectoryUnchanged(const QString& directoryPath);
void slotTrackExists(const QString& trackPath);
void slotAddNewTrack(const QString& trackPath);
Expand Down
12 changes: 7 additions & 5 deletions src/library/scanner/recursivescandirectorytask.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <QCryptographicHash>
#include <QDirIterator>

#include "library/scanner/recursivescandirectorytask.h"
Expand Down Expand Up @@ -38,7 +39,8 @@ void RecursiveScanDirectoryTask::run() {
QLinkedList<QFileInfo> filesToImport;
QLinkedList<QFileInfo> possibleCovers;
QLinkedList<QDir> dirsToScan;
QStringList newHashStr;

QCryptographicHash hasher(QCryptographicHash::Sha256);

// TODO(rryan) benchmark QRegExp copy versus QMutex/QRegExp in ScannerGlobal
// versus slicing the extension off and checking for set/list containment.
Expand All @@ -54,7 +56,7 @@ void RecursiveScanDirectoryTask::run() {
if (currentFileInfo.isFile()) {
const QString& fileName = currentFileInfo.fileName();
if (supportedExtensionsRegex.indexIn(fileName) != -1) {
newHashStr.append(currentFile);
hasher.addData(currentFile.toUtf8());
filesToImport.append(currentFileInfo);
} else if (supportedCoverExtensionsRegex.indexIn(fileName) != -1) {
possibleCovers.append(currentFileInfo);
Expand All @@ -73,13 +75,13 @@ void RecursiveScanDirectoryTask::run() {

// Note: A hash of "0" is a real hash if the directory contains no files!
// Calculate a hash of the directory's file list.
int newHash = qHash(newHashStr.join(""));
const mixxx::cache_key_t newHash = mixxx::cacheKeyFromMessageDigest(hasher.result());

QString dirPath = m_dir.path();

// Try to retrieve a hash from the last time that directory was scanned.
int prevHash = m_scannerGlobal->directoryHashInDatabase(dirPath);
bool prevHashExists = prevHash != -1;
const mixxx::cache_key_t prevHash = m_scannerGlobal->directoryHashInDatabase(dirPath);
const bool prevHashExists = mixxx::isValidCacheKey(prevHash);

if (prevHashExists || m_scanUnhashed) {
// Compare the hashes, and if they don't match, rescan the files in that
Expand Down
7 changes: 4 additions & 3 deletions src/library/scanner/scannerglobal.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <QMutexLocker>
#include <QSharedPointer>

#include "util/cache.h"
#include "util/task.h"
#include "util/performancetimer.h"

Expand Down Expand Up @@ -37,7 +38,7 @@ class DirInfo {
class ScannerGlobal {
public:
ScannerGlobal(const QSet<QString>& trackLocations,
const QHash<QString, int>& directoryHashes,
const QHash<QString, mixxx::cache_key_t>& directoryHashes,
const QRegExp& supportedExtensionsMatcher,
const QRegExp& supportedCoverExtensionsMatcher,
const QStringList& directoriesBlacklist)
Expand All @@ -62,7 +63,7 @@ class ScannerGlobal {
}

// Returns the directory hash if it exists or -1 if it doesn't.
inline int directoryHashInDatabase(const QString& directoryPath) const {
inline mixxx::cache_key_t directoryHashInDatabase(const QString& directoryPath) const {
return m_directoryHashes.value(directoryPath, -1);
}

Expand Down Expand Up @@ -177,7 +178,7 @@ class ScannerGlobal {
TaskWatcher m_watcher;

QSet<QString> m_trackLocations;
QHash<QString, int> m_directoryHashes;
QHash<QString, mixxx::cache_key_t> m_directoryHashes;

mutable QMutex m_supportedExtensionsMatcherMutex;
QRegExp m_supportedExtensionsMatcher;
Expand Down
2 changes: 1 addition & 1 deletion src/library/scanner/scannertask.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ScannerTask : public QObject, public QRunnable {
void taskDone(bool success);
void queueTask(ScannerTask* pTask);
void directoryHashedAndScanned(const QString& directoryPath,
bool newDirectory, int hash);
bool newDirectory, mixxx::cache_key_t hash);
void directoryUnchanged(const QString& directoryPath);
void trackExists(const QString& filePath);
void addNewTrack(const QString& filePath);
Expand Down
2 changes: 2 additions & 0 deletions src/mixxxapplication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "soundio/soundmanagerutil.h"
#include "track/track.h"
#include "track/trackref.h"
#include "util/cache.h"
#include "util/color/rgbcolor.h"
#include "util/math.h"

Expand Down Expand Up @@ -66,6 +67,7 @@ void MixxxApplication::registerMetaTypes() {
qRegisterMetaType<QList<CrateId>>();
qRegisterMetaType<TrackPointer>();
qRegisterMetaType<mixxx::ReplayGain>("mixxx::ReplayGain");
qRegisterMetaType<mixxx::cache_key_t>("mixxx::cache_key_t");
qRegisterMetaType<mixxx::Bpm>("mixxx::Bpm");
qRegisterMetaType<mixxx::Duration>("mixxx::Duration");
qRegisterMetaType<std::optional<mixxx::RgbColor>>("std::optional<mixxx::RgbColor>");
Expand Down
41 changes: 41 additions & 0 deletions src/test/cache_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include "util/cache.h"

#include <gtest/gtest.h>

#include <QtDebug>

namespace {

class CacheTest : public testing::Test {
};

TEST_F(CacheTest, InvalidCacheKey) {
EXPECT_FALSE(mixxx::isValidCacheKey(
mixxx::invalidCacheKey()));
}

TEST_F(CacheTest, InvalidCacheKeyEmptyByteArray) {
EXPECT_FALSE(mixxx::isValidCacheKey(
mixxx::cacheKeyFromMessageDigest(QByteArray())));
EXPECT_FALSE(mixxx::isValidCacheKey(
mixxx::cacheKeyFromMessageDigest("")));
EXPECT_FALSE(mixxx::isValidCacheKey(
mixxx::cacheKeyFromMessageDigest("\0")));
}

TEST_F(CacheTest, ValidCacheKey) {
EXPECT_TRUE(mixxx::isValidCacheKey(
mixxx::cacheKeyFromMessageDigest(QByteArrayLiteral("test"))));
}

TEST_F(CacheTest, ValidCacheKeySingleZeroAscii) {
EXPECT_TRUE(mixxx::isValidCacheKey(
mixxx::cacheKeyFromMessageDigest(QByteArray("0"))));
}

TEST_F(CacheTest, ValidCacheKeySingleZeroByte) {
EXPECT_TRUE(mixxx::isValidCacheKey(
mixxx::cacheKeyFromMessageDigest(QByteArray("\0", 1))));
}

} // anonymous namespace
Loading

0 comments on commit 030e792

Please sign in to comment.