Skip to content

Commit

Permalink
Add fuzzy find as you type
Browse files Browse the repository at this point in the history
  • Loading branch information
Chronial committed Aug 29, 2019
1 parent 78bc17d commit ef1e2a1
Show file tree
Hide file tree
Showing 8 changed files with 337 additions and 47 deletions.
22 changes: 15 additions & 7 deletions DbAlbumCollection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "DbReloadWorker.h"
#include "Engine.h"
#include "EngineThread.h"
#include "FindAsYouType.h"
#include "config.h"

namespace db_structure {
Expand Down Expand Up @@ -187,15 +188,22 @@ std::optional<DBPos> DbAlbumCollection::performFayt(const std::string& input) {
if (!db)
return std::nullopt;

// input should be already whitespace-cleaned
size_t inputLen = pfc::strlen_utf8(input.c_str());
for (auto it = db->sortIndex.begin(); it != db->sortIndex.end(); ++it) {
if (0 == stricmp_utf8_partial(
input.c_str(), remove_whitespace(it->title).c_str(), inputLen)) {
return posFromIter(it);
FuzzyMatcher matcher(input);

int maxScore = -1;
const db_structure::Album* maxAlbum = nullptr;
for (const auto& album : db->sortIndex) {
int score = matcher.match(album.title);
if (score > maxScore) {
maxScore = score;
maxAlbum = &album;
}
}
return std::nullopt;
if (maxScore > -1) {
return posFromIter(db->sortIndex.iterator_to(*maxAlbum));
} else {
return std::nullopt;
}
}

DBIter DbAlbumCollection::begin() const {
Expand Down
297 changes: 286 additions & 11 deletions FindAsYouType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,263 @@
#include "EngineThread.h"
#include "PlaybackTracer.h"

// The fuzzy matching algorithm is adapted from fzf
namespace {

using index_t = t_uint8;
using score_t = t_int16;

namespace {

const score_t scoreMatch = 16;
const score_t penaltyGapStart = -3;
const score_t penaltyGapExtention = -1;

// We prefer matches at the beginning of a word, but the bonus should not be
// too great to prevent the longer acronym matches from always winning over
// shorter fuzzy matches. The bonus point here was specifically chosen that
// the bonus is cancelled when the gap between the acronyms grows over
// 8 characters, which is approximately the average length of the words found
// in web2 dictionary and my file system.
const score_t bonusBoundary = scoreMatch / 2;

// Although bonus point for non-word characters is non-contextual, we need it
// for computing bonus points for consecutive chunks starting with a non-word
// character.
const score_t bonusNonWord = scoreMatch / 2;

// Minimum bonus point given to characters in consecutive chunks.
// Note that bonus points for consecutive matches shouldn't have needed if
// we used fixed match score as in the original algorithm.
const score_t bonusConsecutive = -(penaltyGapStart + penaltyGapExtention);

// The first character in the typed pattern usually has more
// significance than the rest so it's important that it appears at
// special positions where bonus points are given. e.g. "to-go" vs.
// "ongoing" on "og" or on "ogo". The amount of the extra bonus should
// be limited so that the gap penalty is still respected.
const score_t bonusFirstCharMultiplier = 2;

} // namespace

enum charClass { charNonWord, charAlnum };

charClass getCharClass(wchar_t c) {
if (IsCharAlphaNumericW(c)) {
return charAlnum;
} else {
return charNonWord;
}
}

score_t bonusFor(charClass prevClass, charClass cClass) {
if (prevClass == charNonWord && cClass != charNonWord) {
return bonusBoundary;
} else if (cClass == charNonWord) {
return bonusNonWord;
}
return 0;
}

int fuzzy_match(const std::wstring& pattern, const std::string& input,
std::vector<size_t>* positions) {
// Assume that pattern is given in lowercase
// First check if there's a match and calculate bonus for each position.
index_t M = index_t(pattern.length());
if (M == 0) {
return 0;
}

// Rune array
std::vector<wchar_t> T(
std::min(input.length() + 1, size_t(std::numeric_limits<index_t>::max())));
index_t N = index_t(pfc::stringcvt::convert_utf8_to_wide(
T.data(), T.size(), input.c_str(), input.size()));
T.resize(N);
CharLowerW(T.data());
for (auto& c : T) {
if (c == '\r' || c == '\n')
c = ' ';
}

// Phase 1. Optimized search for ASCII string
// First row of score matrix
std::vector<score_t> H0(N);
std::vector<index_t> C0(N);

// The first occurrence of each character in the pattern
std::vector<index_t> F(M);

// Bonus point for each position
std::vector<score_t> B(N);

// Phase 2. Calculate bonus for each point
score_t maxScore = 0;
index_t maxScorePos = 0;
index_t pidx = 0;

// Will hold the last index of pattern[-1] in input
index_t lastIdx = 0;
wchar_t pchar0 = pattern[0];
wchar_t pchar = pattern[0];
score_t prevH0 = 0;

auto prevClass = charNonWord;
bool inGap = false;
for (index_t off = 0; off < T.size(); off++) {
wchar_t c = T[off];
charClass cClass = getCharClass(pchar);
score_t bonus = bonusFor(prevClass, cClass);
B[off] = bonus;
prevClass = cClass;

if (c == pchar) {
if (pidx < M) {
F[pidx] = off;
pidx++;
pchar = pattern[std::min(pidx, index_t(M - 1))];
}
lastIdx = off;
}

if (c == pchar0) {
score_t score = scoreMatch + bonus * bonusFirstCharMultiplier;
H0[off] = score;
C0[off] = 1;
if (M == 1 && score > maxScore) {
maxScore = score;
maxScorePos = off;
if (bonus == bonusBoundary) {
break;
}
}
inGap = false;
} else {
if (inGap) {
H0[off] = score_t(std::max(0, prevH0 + penaltyGapExtention));
} else {
H0[off] = score_t(std::max(0, prevH0 + penaltyGapStart));
}
C0[off] = 0;
inGap = true;
}
prevH0 = H0[off];
}
if (pidx != M) {
return -1;
}
if (M == 1) {
if (positions) {
*positions = std::vector<size_t>{maxScorePos};
}
return maxScore;
}

// Phase 3. Fill in score matrix
index_t f0 = F[0];
int width = lastIdx - f0 + 1;
// score matrix
std::vector<score_t> H(width * M);
std::copy(&H0[f0], &H0[lastIdx] + 1, &H[0]);

// Possible length of consecutive chunk at each position.
std::vector<index_t> C(width * M);
std::copy(&C0[f0], &C0[lastIdx] + 1, &C[0]);

for (index_t i = 1; i < M; i++) {
int row = i * width;
index_t f = F[i];
inGap = false;
for (index_t j = f; j <= lastIdx; j++) {
index_t j0 = j - f0;
// score if we "go diagonal"
score_t s1 = 0;
// s2 is score if we don't consume a pattern character
score_t s2 = 0;
index_t consecutive = 0;

if (j > f) {
if (inGap) {
s2 = H[row + j0 - 1] + penaltyGapExtention;
} else {
s2 = H[row + j0 - 1] + penaltyGapStart;
}
}

if (pattern[i] == T[j]) {
score_t b = B[j];
consecutive = C[row - width + j0 - 1] + 1;
// Break consecutive chunk
if (b == bonusBoundary) {
consecutive = 1;
} else if (consecutive > 1) {
b = std::max({b, bonusConsecutive, B[j - int(consecutive) + 1]});
}
s1 = H[row - width + j0 - 1] + scoreMatch + b;
if (s1 < s2) {
consecutive = 0;
}
}
C[row + j0] = consecutive;

score_t score = std::max({score_t(0), s1, s2});
H[row + j0] = score;
if (i == M - 1 && score > maxScore) {
maxScore = score;
maxScorePos = j;
}
inGap = s1 < s2;
}
}

// Phase 4. (Optional) Backtrace to find character positions
if (positions) {
positions->clear();
index_t j = maxScorePos;
int i = M - 1;
bool preferMatch = true;
while (true) {
int row = i * width;
index_t j0 = j - f0;
score_t s = H[row + j0];
score_t s1 = 0;
score_t s2 = 0;
if (i > 0 && j >= F[i]) {
s1 = H[row - width + j0 - 1];
}
if (j > F[i]) {
s2 = H[row + j0 - 1];
}

if (s > s1 && (s > s2 || s == s2 && preferMatch)) {
positions->push_back(j);
if (i == 0) {
break;
}
i--;
}
preferMatch = C[row + j0] > 1 ||
row + width + j0 + 1 < int(C.size()) && C[row + width + j0 + 1] > 0;
j--;
}
std::reverse(positions->begin(), positions->end());
}
return maxScore;
}

} // namespace

FuzzyMatcher::FuzzyMatcher(const std::string& pattern) {
this->pattern = wstring_from_utf8(pattern);
this->pattern.resize(
std::min(int(this->pattern.size()), std::numeric_limits<index_t>::max() - 1));
CharLowerW(this->pattern.data());
}

int FuzzyMatcher::match(const std::string& input, std::vector<size_t>* positions) {
return fuzzy_match(this->pattern, input, positions);
}

void FindAsYouType::onChar(WPARAM wParam) {
switch (wParam) {
case 1: // any other nonchar character
Expand All @@ -27,10 +284,31 @@ void FindAsYouType::onChar(WPARAM wParam) {
}
}

namespace {
void fold_whitespace(std::string& s) {
bool prev_whitespace = true;
int j = 0;
for (const char& c : s) {
if (c == ' ' || c == '\r' || c == '\n') {
if (prev_whitespace) {
continue;
} else {
prev_whitespace = true;
s[j++] = ' ';
}
} else {
prev_whitespace = false;
s[j++] = c;
}
}
s.resize(j);
}
} // namespace

void FindAsYouType::enterChar(wchar_t c) {
std::string newString(enteredString.c_str());
newString.append(pfc::stringcvt::string_utf8_from_wide(&c, 1));
newString = remove_whitespace(newString);
fold_whitespace(newString);
if (updateSearch(newString.c_str())) {
enteredString = newString.c_str();
} else {
Expand All @@ -52,17 +330,14 @@ void FindAsYouType::reset() {
engine.thread.invalidateWindow();
}

int FindAsYouType::highlightLength(const std::string& albumTitle) {
size_t input_length = pfc::strlen_utf8(enteredString);
std::string substring{};
substring.reserve(albumTitle.length());
for (auto c : albumTitle) {
substring.push_back(c);
if (pfc::strlen_utf8(remove_whitespace(substring).c_str()) == input_length) {
return pfc::strlen_utf8(substring.c_str());
}
std::vector<size_t> FindAsYouType::highlightPositions(const std::string& albumTitle) {
std::vector<size_t> positions;
if (enteredString.is_empty()) {
return positions;
}
return 0;
FuzzyMatcher matcher{std::string(enteredString)};
matcher.match(albumTitle, &positions);
return positions;
}

bool FindAsYouType::updateSearch(const char* searchFor) {
Expand Down
11 changes: 10 additions & 1 deletion FindAsYouType.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,19 @@ class FindAsYouType {
explicit FindAsYouType(Engine& engine) : engine(engine){};
void onChar(WPARAM wParam);
void reset();
int highlightLength(const std::string& albumTitle);
std::vector<size_t> highlightPositions(const std::string& albumTitle);

private:
void enterChar(wchar_t c);
void removeChar();
bool updateSearch(const char* searchFor);
};

class FuzzyMatcher {
public:
explicit FuzzyMatcher(const std::string& pattern);
int match(const std::string& input, std::vector<size_t>* positions = nullptr);

private:
std::wstring pattern;
};
4 changes: 2 additions & 2 deletions Renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,15 +248,15 @@ void Renderer::drawScene(bool selectionPass) {
void Renderer::drawGui() {
if (cfgShowAlbumTitle || engine.db.initializing()) {
std::string albumTitle;
int highlight = 0;
std::vector<size_t> highlight;
if (engine.db.initializing()) {
albumTitle = "Generating Cover Display ...";
} else if (engine.db.empty()) {
albumTitle = "No Covers to Display";
} else {
DBIter iter = engine.db.iterFromPos(engine.worldState.getTarget()).value();
albumTitle = engine.db.getAlbumInfo(iter).title;
highlight = engine.findAsYouType.highlightLength(albumTitle);
highlight = engine.findAsYouType.highlightPositions(albumTitle);
}
textDisplay.displayText(albumTitle, highlight, int(winWidth * cfgTitlePosH),
int(winHeight * (1 - cfgTitlePosV)));
Expand Down
Loading

0 comments on commit ef1e2a1

Please sign in to comment.