Skip to content

Commit

Permalink
Excludes: During directory traversal, use QRegularExpression
Browse files Browse the repository at this point in the history
On Mac, this halves the time spent in csync_excluded_traversal
when using check_csync_excluded_performance. A similar performance
increase is seen on linux.
  • Loading branch information
guruz authored and ckamm committed Sep 29, 2017
1 parent 7af81f7 commit 1da7024
Show file tree
Hide file tree
Showing 7 changed files with 293 additions and 42 deletions.
133 changes: 111 additions & 22 deletions src/csync/csync_exclude.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@

#include "common/utility.h"

#include <QString>

#ifdef _WIN32
#include <io.h>
#else
Expand Down Expand Up @@ -234,20 +236,28 @@ static CSYNC_EXCLUDE_TYPE _csync_excluded_common(c_strlist_t *excludes, const ch
}
blen = strlen(bname);

rc = csync_fnmatch("._sync_*.db*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
}
rc = csync_fnmatch(".sync_*.db*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
}
rc = csync_fnmatch(".csync_journal.db*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
// 9 = strlen(".sync_.db")
if (blen >= 9 && bname[0] == '.') {
rc = csync_fnmatch("._sync_*.db*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
}
rc = csync_fnmatch(".sync_*.db*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
}
rc = csync_fnmatch(".csync_journal.db*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
}
rc = csync_fnmatch(".owncloudsync.log*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
}
}

// check the strlen and ignore the file if its name is longer than 254 chars.
Expand Down Expand Up @@ -303,12 +313,6 @@ static CSYNC_EXCLUDE_TYPE _csync_excluded_common(c_strlist_t *excludes, const ch
goto out;
}

rc = csync_fnmatch(".owncloudsync.log*", bname, 0);
if (rc == 0) {
match = CSYNC_FILE_SILENTLY_EXCLUDED;
goto out;
}

if (!OCC::Utility::shouldUploadConflictFiles()) {
if (OCC::Utility::isConflictFile(bname)) {
match = CSYNC_FILE_EXCLUDE_CONFLICT;
Expand Down Expand Up @@ -415,8 +419,93 @@ static CSYNC_EXCLUDE_TYPE _csync_excluded_common(c_strlist_t *excludes, const ch
return match;
}

CSYNC_EXCLUDE_TYPE csync_excluded_traversal(c_strlist_t *excludes, const char *path, int filetype) {
return _csync_excluded_common(excludes, path, filetype, false);
/* Only for bnames (not paths) */
static QString convertToBnameRegexpSyntax(QString exclude)
{
QString s = QRegularExpression::escape(exclude).replace("\\*", ".*").replace("?", ".");
//qDebug() << "Converted pattern" << exclude << "to regex" << s;
return s;
}

void csync_exclude_traversal_prepare(CSYNC *ctx)
{
ctx->parsed_traversal_excludes.prepare(ctx->excludes);
}

void csync_s::TraversalExcludes::prepare(c_strlist_t *excludes)
{
c_strlist_destroy(list_patterns_with_slashes);
list_patterns_with_slashes = nullptr;

// Start out with regexes that would match nothing
QString exclude_only = "a^";
QString exclude_and_remove = "a^";

size_t exclude_count = excludes ? excludes->count : 0;
for (unsigned int i = 0; i < exclude_count; i++) {
char *exclude = excludes->vector[i];
QString *builderToUse = & exclude_only;
if (exclude[0] == '\n') continue; // empty line
if (exclude[0] == '\r') continue; // empty line

/* If an exclude entry contains a slash, we use the C-style codepath without QRegularEpression */
if (strchr(exclude, '/')) {
_csync_exclude_add(&list_patterns_with_slashes, exclude);
continue;
}

/* Those will attempt to use QRegularExpression */
if (exclude[0] == ']'){
exclude++;
builderToUse = &exclude_and_remove;
}
if (builderToUse->size() > 0) {
builderToUse->append("|");
}
builderToUse->append(convertToBnameRegexpSyntax(exclude));
}

QString pattern = "^(" + exclude_only + ")$|^(" + exclude_and_remove + ")$";
regexp_exclude.setPattern(pattern);
QRegularExpression::PatternOptions patternOptions = QRegularExpression::OptimizeOnFirstUsageOption;
if (OCC::Utility::fsCasePreserving())
patternOptions |= QRegularExpression::CaseInsensitiveOption;
regexp_exclude.setPatternOptions(patternOptions);
regexp_exclude.optimize();
}

CSYNC_EXCLUDE_TYPE csync_excluded_traversal(CSYNC *ctx, const char *path, int filetype) {
CSYNC_EXCLUDE_TYPE match = CSYNC_NOT_EXCLUDED;

/* Check only static patterns and only with the reduced list which is empty usually */
match = _csync_excluded_common(ctx->parsed_traversal_excludes.list_patterns_with_slashes, path, filetype, false);
if (match != CSYNC_NOT_EXCLUDED) {
return match;
}

if (ctx->excludes) {
/* Now check with our optimized regexps */
const char *bname = NULL;
/* split up the path */
bname = strrchr(path, '/');
if (bname) {
bname += 1; // don't include the /
} else {
bname = path;
}
QString p = QString::fromUtf8(bname);
auto m = ctx->parsed_traversal_excludes.regexp_exclude.match(p);
if (m.hasMatch()) {
if (!m.captured(1).isEmpty()) {
//qDebug() << "WOULD EXCLUDE" << m.captured(1);
match = CSYNC_FILE_EXCLUDE_LIST;
} else if (!m.captured(2).isEmpty()) {
//qDebug() << "WOULD EXCLUDE AND REMOVE" << m.captured(2);
match = CSYNC_FILE_EXCLUDE_AND_REMOVE;
}
}
}
return match;
}

CSYNC_EXCLUDE_TYPE csync_excluded_no_ctx(c_strlist_t *excludes, const char *path, int filetype) {
Expand Down
14 changes: 12 additions & 2 deletions src/csync/csync_exclude.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ int OCSYNC_EXPORT _csync_exclude_add(c_strlist_t **inList, const char *string);
*/
int OCSYNC_EXPORT csync_exclude_load(const char *fname, c_strlist_t **list);

/**
* @brief When all list loads and list are done
*
* Used to initialize internal data structures that build upon the loaded excludes.
*
* @param ctx
*/
void OCSYNC_EXPORT csync_exclude_traversal_prepare(CSYNC *ctx);

/**
* @brief Check if the given path should be excluded in a traversal situation.
*
Expand All @@ -66,10 +75,11 @@ int OCSYNC_EXPORT csync_exclude_load(const char *fname, c_strlist_t **list);
*
* @return 2 if excluded and needs cleanup, 1 if excluded, 0 if not.
*/
CSYNC_EXCLUDE_TYPE csync_excluded_traversal(c_strlist_t *excludes, const char *path, int filetype);
CSYNC_EXCLUDE_TYPE OCSYNC_EXPORT csync_excluded_traversal(CSYNC *ctx, const char *path, int filetype);

/**
* @brief csync_excluded_no_ctx
* @brief Checks all path components if the whole path should be excluded
*
* @param excludes
* @param path
* @param filetype
Expand Down
14 changes: 12 additions & 2 deletions src/csync/csync_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@

#include "csync_macros.h"

#include <QRegularExpression>

/**
* How deep to scan directories.
*/
Expand Down Expand Up @@ -102,10 +104,18 @@ struct OCSYNC_EXPORT csync_s {
void *checksum_userdata = nullptr;

} callbacks;
c_strlist_t *excludes = nullptr;


OCC::SyncJournalDb *statedb;

c_strlist_t *excludes = nullptr; /* list of individual patterns collected from all exclude files */
struct TraversalExcludes {
void prepare(c_strlist_t *excludes);

QRegularExpression regexp_exclude;
c_strlist_t *list_patterns_with_slashes = nullptr;
/* FIXME ^^ at a later point use QRegularExpression too if those become popular */
} parsed_traversal_excludes;

struct {
std::map<QByteArray, QByteArray> folder_renamed_to; // map from->to
std::map<QByteArray, QByteArray> folder_renamed_from; // map to->from
Expand Down
4 changes: 2 additions & 2 deletions src/csync/csync_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ static int _csync_detect_update(CSYNC *ctx, std::unique_ptr<csync_file_stat_t> f
excluded =CSYNC_FILE_EXCLUDE_STAT_FAILED;
} else {
/* Check if file is excluded */
excluded = csync_excluded_traversal(ctx->excludes, fs->path, fs->type);
excluded = csync_excluded_traversal(ctx, fs->path, fs->type);
}

if( excluded == CSYNC_NOT_EXCLUDED ) {
Expand Down Expand Up @@ -464,7 +464,7 @@ static bool fill_tree_from_db(CSYNC *ctx, const char *uri)
/* Check for exclusion from the tree.
* Note that this is only a safety net in case the ignore list changes
* without a full remote discovery being triggered. */
CSYNC_EXCLUDE_TYPE excluded = csync_excluded_traversal(ctx->excludes, st->path, st->type);
CSYNC_EXCLUDE_TYPE excluded = csync_excluded_traversal(ctx, st->path, st->type);
if (excluded != CSYNC_NOT_EXCLUDED) {
qDebug(lcUpdate, "%s excluded (%d)", st->path.constData(), excluded);

Expand Down
3 changes: 3 additions & 0 deletions src/libsync/discoveryphase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <csync_private.h>
#include <csync_rename.h>
#include <csync_exclude.h>

#include <QLoggingCategory>
#include <QUrl>
Expand Down Expand Up @@ -718,6 +719,8 @@ void DiscoveryJob::start()
_csync_ctx->callbacks.remote_closedir_hook = remote_vio_closedir_hook;
_csync_ctx->callbacks.vio_userdata = this;

csync_exclude_traversal_prepare(_csync_ctx); // Converts the flat exclude list to optimized regexps

csync_set_log_callback(_log_callback);
csync_set_log_level(_log_level);
_lastUpdateProgressCallbackCall.invalidate();
Expand Down
1 change: 1 addition & 0 deletions src/libsync/excludedfiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ bool ExcludedFiles::reloadExcludes()
if (csync_exclude_load(file.toUtf8(), _excludesPtr) < 0)
success = false;
}
// The csync_exclude_traversal_prepare is called implicitely at sync start.
return success;
}

Expand Down
Loading

0 comments on commit 1da7024

Please sign in to comment.