Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up local discovery by not doing it #6087

Merged
merged 3 commits into from
Oct 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/conffile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Some interesting values that can be set on the configuration file are:
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``forceSyncInterval`` | ``7200000`` | The duration of no activity after which a synchronization run shall be triggered automatically. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``fullLocalDiscoveryInterval`` | ``3600000`` | The interval after which the next synchronization will perform a full local discovery. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
| ``notificationRefreshInterval`` | ``300000`` | Specifies the default interval of checking for new server notifications in milliseconds. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+

Expand Down Expand Up @@ -62,4 +64,4 @@ Some interesting values that can be set on the configuration file are:
| | | ``2`` for No Proxy. |
+ + +--------------------------------------------------------------------------------------------------------+
| | | ``3`` for HTTP(S) Proxy. |
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
+---------------------------------+---------------+--------------------------------------------------------------------------------------------------------+
35 changes: 29 additions & 6 deletions src/common/syncjournaldb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,13 +547,28 @@ bool SyncJournalDb::checkConnect()
return sqlFail("prepare _getFileRecordQueryByFileId", *_getFileRecordQueryByFileId);
}

// This query is used to skip discovery and fill the tree from the
// database instead
_getFilesBelowPathQuery.reset(new SqlQuery(_db));
if (_getFilesBelowPathQuery->prepare(
GET_FILE_RECORD_QUERY
" WHERE path > (?1||'/') AND path < (?1||'0') ORDER BY path||'/' ASC")) {
" WHERE path > (?1||'/') AND path < (?1||'0')"
// We want to ensure that the contents of a directory are sorted
// directly behind the directory itself. Without this ORDER BY
// an ordering like foo, foo-2, foo/file would be returned.
// With the trailing /, we get foo-2, foo, foo/file. This property
// is used in fill_tree_from_db().
" ORDER BY path||'/' ASC")) {
return sqlFail("prepare _getFilesBelowPathQuery", *_getFilesBelowPathQuery);
}

_getAllFilesQuery.reset(new SqlQuery(_db));
if (_getAllFilesQuery->prepare(
GET_FILE_RECORD_QUERY
" ORDER BY path||'/' ASC")) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we concatenate '/' to the path while ordering?

return sqlFail("prepare _getAllFilesQuery", *_getAllFilesQuery);
}

_setFileRecordQuery.reset(new SqlQuery(_db));
if (_setFileRecordQuery->prepare("INSERT OR REPLACE INTO metadata "
"(phash, pathlen, path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, filesize, ignoredChildrenRemote, contentChecksum, contentChecksumTypeId) "
Expand Down Expand Up @@ -704,6 +719,7 @@ void SyncJournalDb::close()
_getFileRecordQueryByInode.reset(0);
_getFileRecordQueryByFileId.reset(0);
_getFilesBelowPathQuery.reset(0);
_getAllFilesQuery.reset(0);
_setFileRecordQuery.reset(0);
_setFileRecordChecksumQuery.reset(0);
_setFileRecordLocalMetadataQuery.reset(0);
Expand Down Expand Up @@ -1133,16 +1149,23 @@ bool SyncJournalDb::getFilesBelowPath(const QByteArray &path, const std::functio
if (!checkConnect())
return false;

_getFilesBelowPathQuery->reset_and_clear_bindings();
_getFilesBelowPathQuery->bindValue(1, path);
// Since the path column doesn't store the starting /, the getFilesBelowPathQuery
// can't be used for the root path "". It would scan for (path > '/' and path < '0')
// and find nothing. So, unfortunately, we have to use a different query for
// retrieving the whole tree.
auto &query = path.isEmpty() ? _getAllFilesQuery : _getFilesBelowPathQuery;

if (!_getFilesBelowPathQuery->exec()) {
query->reset_and_clear_bindings();
if (query == _getFilesBelowPathQuery)
query->bindValue(1, path);

if (!query->exec()) {
return false;
}

while (_getFilesBelowPathQuery->next()) {
while (query->next()) {
SyncJournalFileRecord rec;
fillFileRecordFromGetQuery(rec, *_getFilesBelowPathQuery);
fillFileRecordFromGetQuery(rec, *query);
rowCallback(rec);
}

Expand Down
1 change: 1 addition & 0 deletions src/common/syncjournaldb.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ class OCSYNC_EXPORT SyncJournalDb : public QObject
QScopedPointer<SqlQuery> _getFileRecordQueryByInode;
QScopedPointer<SqlQuery> _getFileRecordQueryByFileId;
QScopedPointer<SqlQuery> _getFilesBelowPathQuery;
QScopedPointer<SqlQuery> _getAllFilesQuery;
QScopedPointer<SqlQuery> _setFileRecordQuery;
QScopedPointer<SqlQuery> _setFileRecordChecksumQuery;
QScopedPointer<SqlQuery> _setFileRecordLocalMetadataQuery;
Expand Down
3 changes: 3 additions & 0 deletions src/csync/csync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,9 @@ int csync_s::reinitialize() {
local.files.clear();
remote.files.clear();

local_discovery_style = LocalDiscoveryStyle::FilesystemOnly;
locally_touched_dirs.clear();

status = CSYNC_STATUS_INIT;
SAFE_FREE(error_string);

Expand Down
16 changes: 16 additions & 0 deletions src/csync/csync_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <stdbool.h>
#include <sqlite3.h>
#include <map>
#include <set>

#include "common/syncjournaldb.h"
#include "config_csync.h"
Expand Down Expand Up @@ -70,6 +71,11 @@ enum csync_replica_e {
REMOTE_REPLICA
};

enum class LocalDiscoveryStyle {
FilesystemOnly, //< read all local data from the filesystem
DatabaseAndFilesystem, //< read from the db, except for listed paths
};


/*
* This is a structurere similar to QStringRef
Expand Down Expand Up @@ -190,6 +196,16 @@ struct OCSYNC_EXPORT csync_s {
*/
bool read_remote_from_db = false;

LocalDiscoveryStyle local_discovery_style = LocalDiscoveryStyle::FilesystemOnly;

/**
* List of folder-relative directory paths that should be scanned on the
* filesystem if the local_discovery_style suggests it.
*
* Their parents will be scanned too. The paths don't start with a /.
*/
std::set<QByteArray> locally_touched_dirs;

bool ignore_hidden_files = true;

csync_s(const char *localUri, OCC::SyncJournalDb *statedb);
Expand Down
69 changes: 46 additions & 23 deletions src/csync/csync_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,28 +435,31 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
{
int64_t count = 0;
QByteArray skipbase;
auto rowCallback = [ctx, &count, &skipbase](const OCC::SyncJournalFileRecord &rec) {
/* When selective sync is used, the database may have subtrees with a parent
* whose etag (md5) is _invalid_. These are ignored and shall not appear in the
* remote tree.
* Sometimes folders that are not ignored by selective sync get marked as
* _invalid_, but that is not a problem as the next discovery will retrieve
* their correct etags again and we don't run into this case.
*/
if( rec._etag == "_invalid_") {
qCDebug(lcUpdate, "%s selective sync excluded", rec._path.constData());
skipbase = rec._path;
skipbase += '/';
return;
}
auto &files = ctx->current == LOCAL_REPLICA ? ctx->local.files : ctx->remote.files;
auto rowCallback = [ctx, &count, &skipbase, &files](const OCC::SyncJournalFileRecord &rec) {
if (ctx->current == REMOTE_REPLICA) {
/* When selective sync is used, the database may have subtrees with a parent
* whose etag is _invalid_. These are ignored and shall not appear in the
* remote tree.
* Sometimes folders that are not ignored by selective sync get marked as
* _invalid_, but that is not a problem as the next discovery will retrieve
* their correct etags again and we don't run into this case.
*/
if (rec._etag == "_invalid_") {
qCDebug(lcUpdate, "%s selective sync excluded", rec._path.constData());
skipbase = rec._path;
skipbase += '/';
return;
}

/* Skip over all entries with the same base path. Note that this depends
* strongly on the ordering of the retrieved items. */
if( !skipbase.isEmpty() && rec._path.startsWith(skipbase) ) {
qCDebug(lcUpdate, "%s selective sync excluded because the parent is", rec._path.constData());
return;
} else {
skipbase.clear();
/* Skip over all entries with the same base path. Note that this depends
* strongly on the ordering of the retrieved items. */
if (!skipbase.isEmpty() && rec._path.startsWith(skipbase)) {
qCDebug(lcUpdate, "%s selective sync excluded because the parent is", rec._path.constData());
return;
} else {
skipbase.clear();
}
}

std::unique_ptr<csync_file_stat_t> st = csync_file_stat_t::fromSyncJournalFileRecord(rec);
Expand All @@ -477,7 +480,7 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
}

/* store into result list. */
ctx->remote.files[rec._path] = std::move(st);
files[rec._path] = std::move(st);
++count;
};

Expand Down Expand Up @@ -522,6 +525,26 @@ int csync_ftw(CSYNC *ctx, const char *uri, csync_walker_fn fn,
int rc = 0;

bool do_read_from_db = (ctx->current == REMOTE_REPLICA && ctx->remote.read_from_db);
const char *db_uri = uri;

if (ctx->current == LOCAL_REPLICA
&& ctx->local_discovery_style == LocalDiscoveryStyle::DatabaseAndFilesystem) {
const char *local_uri = uri + strlen(ctx->local.uri);
if (*local_uri == '/')
++local_uri;
db_uri = local_uri;
do_read_from_db = true;

// Minor bug: local_uri doesn't have a trailing /. Example: Assume it's "d/foo"
// and we want to check whether we should read from the db. Assume "d/foo a" is
// in locally_touched_dirs. Then this check will say no, don't read from the db!
// (because "d/foo" < "d/foo a" < "d/foo/bar")
// C++14: Could skip the conversion to QByteArray here.
auto it = ctx->locally_touched_dirs.lower_bound(QByteArray(local_uri));
if (it != ctx->locally_touched_dirs.end() && it->startsWith(local_uri)) {
do_read_from_db = false;
}
}

if (!depth) {
mark_current_item_ignored(ctx, previous_fs, CSYNC_STATUS_INDIVIDUAL_TOO_DEEP);
Expand All @@ -533,7 +556,7 @@ int csync_ftw(CSYNC *ctx, const char *uri, csync_walker_fn fn,
// if the etag of this dir is still the same, its content is restored from the
// database.
if( do_read_from_db ) {
if( ! fill_tree_from_db(ctx, uri) ) {
if( ! fill_tree_from_db(ctx, db_uri) ) {
errno = ENOENT;
ctx->status_code = CSYNC_STATUS_OPENDIR_ERROR;
goto error;
Expand Down
Loading