Skip to content

Commit

Permalink
Local discovery: Use db instead of filesystem
Browse files Browse the repository at this point in the history
We mostly trust the file watchers meaning that we don't re-scan the
local tree if we have done that recently and no file watcher events
have arrived. If the file watchers invalidate a subtree, we rescan
only that subtree.

Since we're not entirely sure the file watchers are reliable, we still
do full local discoveries regularly (1h by default). There is a config
file setting as well as an environment variable to control the interval.
  • Loading branch information
ckamm committed Oct 24, 2017
1 parent 66f0ce6 commit e85a339
Show file tree
Hide file tree
Showing 13 changed files with 292 additions and 40 deletions.
4 changes: 3 additions & 1 deletion doc/conffile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Some interesting values that can be set on the configuration file are:
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``forceSyncInterval`` | ``7200000`` | The duration of no activity after which a synchronization run shall be triggered automatically. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``fullLocalDiscoveryInterval`` | ``3600000`` | The interval after which the next synchronization will perform a full local discovery. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``notificationRefreshInterval`` | ``300000`` | Specifies the default interval of checking for new server notifications in milliseconds. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+

Expand Down Expand Up @@ -62,4 +64,4 @@ Some interesting values that can be set on the configuration file are:
| | | ``2`` for No Proxy. |
+ + +--------------------------------------------------------------------------------------------------------+
| | | ``3`` for HTTP(S) Proxy. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
35 changes: 29 additions & 6 deletions src/common/syncjournaldb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,13 +547,28 @@ bool SyncJournalDb::checkConnect()
return sqlFail("prepare _getFileRecordQueryByFileId", *_getFileRecordQueryByFileId);
}

// This query is used to skip discovery and fill the tree from the
// database instead
_getFilesBelowPathQuery.reset(new SqlQuery(_db));
if (_getFilesBelowPathQuery->prepare(
GET_FILE_RECORD_QUERY
" WHERE path > (?1||'/') AND path < (?1||'0') ORDER BY path||'/' ASC")) {
" WHERE path > (?1||'/') AND path < (?1||'0')"
// We want to ensure that the contents of a directory are sorted
// directly behind the directory itself. Without this ORDER BY
// an ordering like foo, foo-2, foo/file would be returned.
// With the trailing /, we get foo-2, foo, foo/file. This property
// is used in fill_tree_from_db().
" ORDER BY path||'/' ASC")) {
return sqlFail("prepare _getFilesBelowPathQuery", *_getFilesBelowPathQuery);
}

_getAllFilesQuery.reset(new SqlQuery(_db));
if (_getAllFilesQuery->prepare(
GET_FILE_RECORD_QUERY
" ORDER BY path||'/' ASC")) {
return sqlFail("prepare _getAllFilesQuery", *_getAllFilesQuery);
}

_setFileRecordQuery.reset(new SqlQuery(_db));
if (_setFileRecordQuery->prepare("INSERT OR REPLACE INTO metadata "
"(phash, pathlen, path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, filesize, ignoredChildrenRemote, contentChecksum, contentChecksumTypeId) "
Expand Down Expand Up @@ -704,6 +719,7 @@ void SyncJournalDb::close()
_getFileRecordQueryByInode.reset(0);
_getFileRecordQueryByFileId.reset(0);
_getFilesBelowPathQuery.reset(0);
_getAllFilesQuery.reset(0);
_setFileRecordQuery.reset(0);
_setFileRecordChecksumQuery.reset(0);
_setFileRecordLocalMetadataQuery.reset(0);
Expand Down Expand Up @@ -1133,16 +1149,23 @@ bool SyncJournalDb::getFilesBelowPath(const QByteArray &path, const std::functio
if (!checkConnect())
return false;

_getFilesBelowPathQuery->reset_and_clear_bindings();
_getFilesBelowPathQuery->bindValue(1, path);
// Since the path column doesn't store the starting /, the getFilesBelowPathQuery
// can't be used for the root path "". It would scan for (path > '/' and path < '0')
// and find nothing. So, unfortunately, we have to use a different query for
// retrieving the whole tree.
auto &query = path.isEmpty() ? _getAllFilesQuery : _getFilesBelowPathQuery;

if (!_getFilesBelowPathQuery->exec()) {
query->reset_and_clear_bindings();
if (query == _getFilesBelowPathQuery)
query->bindValue(1, path);

if (!query->exec()) {
return false;
}

while (_getFilesBelowPathQuery->next()) {
while (query->next()) {
SyncJournalFileRecord rec;
fillFileRecordFromGetQuery(rec, *_getFilesBelowPathQuery);
fillFileRecordFromGetQuery(rec, *query);
rowCallback(rec);
}

Expand Down
1 change: 1 addition & 0 deletions src/common/syncjournaldb.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ class OCSYNC_EXPORT SyncJournalDb : public QObject
QScopedPointer<SqlQuery> _getFileRecordQueryByInode;
QScopedPointer<SqlQuery> _getFileRecordQueryByFileId;
QScopedPointer<SqlQuery> _getFilesBelowPathQuery;
QScopedPointer<SqlQuery> _getAllFilesQuery;
QScopedPointer<SqlQuery> _setFileRecordQuery;
QScopedPointer<SqlQuery> _setFileRecordChecksumQuery;
QScopedPointer<SqlQuery> _setFileRecordLocalMetadataQuery;
Expand Down
3 changes: 3 additions & 0 deletions src/csync/csync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,9 @@ int csync_s::reinitialize() {
local.files.clear();
remote.files.clear();

local_discovery_style = LocalDiscoveryStyle::FilesystemOnly;
locally_touched_dirs.clear();

status = CSYNC_STATUS_INIT;
SAFE_FREE(error_string);

Expand Down
16 changes: 16 additions & 0 deletions src/csync/csync_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <stdbool.h>
#include <sqlite3.h>
#include <map>
#include <set>

#include "common/syncjournaldb.h"
#include "config_csync.h"
Expand Down Expand Up @@ -70,6 +71,11 @@ enum csync_replica_e {
REMOTE_REPLICA
};

enum class LocalDiscoveryStyle {
FilesystemOnly, //< read all local data from the filesystem
DatabaseAndFilesystem, //< read from the db, except for listed paths
};


/*
* This is a structurere similar to QStringRef
Expand Down Expand Up @@ -190,6 +196,16 @@ struct OCSYNC_EXPORT csync_s {
*/
bool read_remote_from_db = false;

LocalDiscoveryStyle local_discovery_style = LocalDiscoveryStyle::FilesystemOnly;

/**
* List of folder-relative directory paths that should be scanned on the
* filesystem if the local_discovery_style suggests it.
*
* Their parents will be scanned too. The paths don't start with a /.
*/
std::set<QByteArray> locally_touched_dirs;

bool ignore_hidden_files = true;

csync_s(const char *localUri, OCC::SyncJournalDb *statedb);
Expand Down
69 changes: 46 additions & 23 deletions src/csync/csync_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,28 +435,31 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
{
int64_t count = 0;
QByteArray skipbase;
auto rowCallback = [ctx, &count, &skipbase](const OCC::SyncJournalFileRecord &rec) {
/* When selective sync is used, the database may have subtrees with a parent
* whose etag (md5) is _invalid_. These are ignored and shall not appear in the
* remote tree.
* Sometimes folders that are not ignored by selective sync get marked as
* _invalid_, but that is not a problem as the next discovery will retrieve
* their correct etags again and we don't run into this case.
*/
if( rec._etag == "_invalid_") {
qCDebug(lcUpdate, "%s selective sync excluded", rec._path.constData());
skipbase = rec._path;
skipbase += '/';
return;
}
auto &files = ctx->current == LOCAL_REPLICA ? ctx->local.files : ctx->remote.files;
auto rowCallback = [ctx, &count, &skipbase, &files](const OCC::SyncJournalFileRecord &rec) {
if (ctx->current == REMOTE_REPLICA) {
/* When selective sync is used, the database may have subtrees with a parent
* whose etag is _invalid_. These are ignored and shall not appear in the
* remote tree.
* Sometimes folders that are not ignored by selective sync get marked as
* _invalid_, but that is not a problem as the next discovery will retrieve
* their correct etags again and we don't run into this case.
*/
if (rec._etag == "_invalid_") {
qCDebug(lcUpdate, "%s selective sync excluded", rec._path.constData());
skipbase = rec._path;
skipbase += '/';
return;
}

/* Skip over all entries with the same base path. Note that this depends
* strongly on the ordering of the retrieved items. */
if( !skipbase.isEmpty() && rec._path.startsWith(skipbase) ) {
qCDebug(lcUpdate, "%s selective sync excluded because the parent is", rec._path.constData());
return;
} else {
skipbase.clear();
/* Skip over all entries with the same base path. Note that this depends
* strongly on the ordering of the retrieved items. */
if (!skipbase.isEmpty() && rec._path.startsWith(skipbase)) {
qCDebug(lcUpdate, "%s selective sync excluded because the parent is", rec._path.constData());
return;
} else {
skipbase.clear();
}
}

std::unique_ptr<csync_file_stat_t> st = csync_file_stat_t::fromSyncJournalFileRecord(rec);
Expand All @@ -477,7 +480,7 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
}

/* store into result list. */
ctx->remote.files[rec._path] = std::move(st);
files[rec._path] = std::move(st);
++count;
};

Expand Down Expand Up @@ -522,6 +525,26 @@ int csync_ftw(CSYNC *ctx, const char *uri, csync_walker_fn fn,
int rc = 0;

bool do_read_from_db = (ctx->current == REMOTE_REPLICA && ctx->remote.read_from_db);
const char *db_uri = uri;

if (ctx->current == LOCAL_REPLICA
&& ctx->local_discovery_style == LocalDiscoveryStyle::DatabaseAndFilesystem) {
const char *local_uri = uri + strlen(ctx->local.uri);
if (*local_uri == '/')
++local_uri;
db_uri = local_uri;
do_read_from_db = true;

// Minor bug: local_uri doesn't have a trailing /. Example: Assume it's "d/foo"
// and we want to check whether we should read from the db. Assume "d/foo a" is
// in locally_touched_dirs. Then this check will say no, don't read from the db!
// (because "d/foo" < "d/foo a" < "d/foo/bar")
// C++14: Could skip the conversion to QByteArray here.
auto it = ctx->locally_touched_dirs.lower_bound(QByteArray(local_uri));
if (it != ctx->locally_touched_dirs.end() && it->startsWith(local_uri)) {
do_read_from_db = false;
}
}

if (!depth) {
mark_current_item_ignored(ctx, previous_fs, CSYNC_STATUS_INDIVIDUAL_TOO_DEEP);
Expand All @@ -533,7 +556,7 @@ int csync_ftw(CSYNC *ctx, const char *uri, csync_walker_fn fn,
// if the etag of this dir is still the same, its content is restored from the
// database.
if( do_read_from_db ) {
if( ! fill_tree_from_db(ctx, uri) ) {
if( ! fill_tree_from_db(ctx, db_uri) ) {
errno = ENOENT;
ctx->status_code = CSYNC_STATUS_OPENDIR_ERROR;
goto error;
Expand Down
105 changes: 95 additions & 10 deletions src/gui/folder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,12 +448,26 @@ int Folder::slotWipeErrorBlacklist()

void Folder::slotWatchedPathChanged(const QString &path)
{
if (!path.startsWith(this->path())) {
qCDebug(lcFolder) << "Changed path is not contained in folder, ignoring:" << path;
return;
}

auto relativePath = path.midRef(this->path().size());

// Add to list of locally modified paths
//
// We do this before checking for our own sync-related changes to make
// extra sure to not miss relevant changes.
auto relativePathBytes = relativePath.toUtf8();
_localDiscoveryPaths.insert(relativePathBytes);
qCDebug(lcFolder) << "local discovery: inserted" << relativePath << "due to file watcher";

// The folder watcher fires a lot of bogus notifications during
// a sync operation, both for actual user files and the database
// and log. Therefore we check notifications against operations
// the sync is doing to filter out our own changes.
#ifdef Q_OS_MAC
Q_UNUSED(path)
// On OSX the folder watcher does not report changes done by our
// own process. Therefore nothing needs to be done here!
#else
Expand All @@ -465,15 +479,12 @@ void Folder::slotWatchedPathChanged(const QString &path)
#endif

// Check that the mtime actually changed.
if (path.startsWith(this->path())) {
auto relativePath = path.mid(this->path().size());
SyncJournalFileRecord record;
if (_journal.getFileRecord(relativePath, &record)
&& record.isValid()
&& !FileSystem::fileChanged(path, record._fileSize, record._modtime)) {
qCInfo(lcFolder) << "Ignoring spurious notification for file" << relativePath;
return; // probably a spurious notification
}
SyncJournalFileRecord record;
if (_journal.getFileRecord(relativePathBytes, &record)
&& record.isValid()
&& !FileSystem::fileChanged(path, record._fileSize, record._modtime)) {
qCInfo(lcFolder) << "Ignoring spurious notification for file" << relativePath;
return; // probably a spurious notification
}

emit watchedFileChangedExternally(path);
Expand Down Expand Up @@ -645,6 +656,36 @@ void Folder::startSync(const QStringList &pathList)
setDirtyNetworkLimits();
setSyncOptions();

static qint64 fullLocalDiscoveryInterval = []() {
auto interval = ConfigFile().fullLocalDiscoveryInterval();
QByteArray env = qgetenv("OWNCLOUD_FULL_LOCAL_DISCOVERY_INTERVAL");
if (!env.isEmpty()) {
interval = env.toLongLong();
}
return interval;
}();
if (_folderWatcher && _folderWatcher->isReliable()
&& _timeSinceLastFullLocalDiscovery.isValid()
&& (fullLocalDiscoveryInterval < 0
|| _timeSinceLastFullLocalDiscovery.elapsed() < fullLocalDiscoveryInterval)) {
qCInfo(lcFolder) << "Allowing local discovery to read from the database";
_engine->setLocalDiscoveryOptions(LocalDiscoveryStyle::DatabaseAndFilesystem, _localDiscoveryPaths);

if (lcFolder().isDebugEnabled()) {
QByteArrayList paths;
for (auto &path : _localDiscoveryPaths)
paths.append(path);
qCDebug(lcFolder) << "local discovery paths: " << paths;
}

_previousLocalDiscoveryPaths = std::move(_localDiscoveryPaths);
} else {
qCInfo(lcFolder) << "Forbidding local discovery to read from the database";
_engine->setLocalDiscoveryOptions(LocalDiscoveryStyle::FilesystemOnly);
_previousLocalDiscoveryPaths.clear();
}
_localDiscoveryPaths.clear();

_engine->setIgnoreHiddenFiles(_definition.ignoreHiddenFiles);

QMetaObject::invokeMethod(_engine.data(), "startSync", Qt::QueuedConnection);
Expand Down Expand Up @@ -783,6 +824,24 @@ void Folder::slotSyncFinished(bool success)
journalDb()->setSelectiveSyncList(SyncJournalDb::SelectiveSyncWhiteList, QStringList());
}

// bug: This function uses many different criteria for "sync was successful" - investigate!
if ((_syncResult.status() == SyncResult::Success
|| _syncResult.status() == SyncResult::Problem)
&& success) {
if (_engine->lastLocalDiscoveryStyle() == LocalDiscoveryStyle::FilesystemOnly) {
_timeSinceLastFullLocalDiscovery.start();
}
qCDebug(lcFolder) << "Sync success, forgetting last sync's local discovery path list";
} else {
// On overall-failure we can't forget about last sync's local discovery
// paths yet, reuse them for the next sync again.
// C++17: Could use std::set::merge().
_localDiscoveryPaths.insert(
_previousLocalDiscoveryPaths.begin(), _previousLocalDiscoveryPaths.end());
qCDebug(lcFolder) << "Sync failed, keeping last sync's local discovery path list";
}
_previousLocalDiscoveryPaths.clear();

emit syncStateChange();

// The syncFinished result that is to be triggered here makes the folderman
Expand Down Expand Up @@ -851,6 +910,25 @@ void Folder::slotItemCompleted(const SyncFileItemPtr &item)
_folderWatcher->removePath(path() + item->_file);
}

// Success and failure of sync items adjust what the next sync is
// supposed to do.
//
// For successes, we want to wipe the file from the list to ensure we don't
// rediscover it even if this overall sync fails.
//
// For failures, we want to add the file to the list so the next sync
// will be able to retry it.
if (item->_status == SyncFileItem::Success
|| item->_status == SyncFileItem::FileIgnored
|| item->_status == SyncFileItem::Restoration
|| item->_status == SyncFileItem::Conflict) {
if (_previousLocalDiscoveryPaths.erase(item->_file.toUtf8()))
qCDebug(lcFolder) << "local discovery: wiped" << item->_file;
} else {
_localDiscoveryPaths.insert(item->_file.toUtf8());
qCDebug(lcFolder) << "local discovery: inserted" << item->_file << "due to sync failure";
}

_syncResult.processCompletedItem(item);

_fileLog->logItem(*item);
Expand Down Expand Up @@ -903,6 +981,11 @@ void Folder::slotScheduleThisFolder()
FolderMan::instance()->scheduleFolder(this);
}

void Folder::slotNextSyncFullLocalDiscovery()
{
_timeSinceLastFullLocalDiscovery.invalidate();
}

void Folder::scheduleThisFolderSoon()
{
if (!_scheduleSelfTimer.isActive()) {
Expand All @@ -925,6 +1008,8 @@ void Folder::registerFolderWatcher()
_folderWatcher.reset(new FolderWatcher(path(), this));
connect(_folderWatcher.data(), &FolderWatcher::pathChanged,
this, &Folder::slotWatchedPathChanged);
connect(_folderWatcher.data(), &FolderWatcher::lostChanges,
this, &Folder::slotNextSyncFullLocalDiscovery);
}

void Folder::slotAboutToRemoveAllFiles(SyncFileItem::Direction dir, bool *cancel)
Expand Down
Loading

0 comments on commit e85a339

Please sign in to comment.