Skip to content

Commit

Permalink
refactor - optimize snippets search
Browse files Browse the repository at this point in the history
  • Loading branch information
adixchen committed Feb 13, 2023
1 parent 558e191 commit 77d48f7
Show file tree
Hide file tree
Showing 11 changed files with 5,189 additions and 2,021 deletions.
4 changes: 4 additions & 0 deletions backend/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module.exports = {
testEnvironment: 'node',
testMatch: ['**/*.spec.js'],
};
6,612 changes: 4,787 additions & 1,825 deletions backend/package-lock.json

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,23 @@
"scripts": {
"start": "node ./bin/www",
"debug": "nodemon --inspect ./bin/www --watch src --watch docs/openapi/openapi.yaml",
"integration-tests": "NODE_ENV=test nyc mocha '**/*.spec.js' --exit"
"integration-tests": "NODE_ENV=test nyc mocha '**/*.spec.js' --exit",
"test": "jest --config ./jest.config.js"
},
"dependencies": {
"aws-sdk": "^2.1048.0",
"body-parser": "^1.19.1",
"body-parser": "^1.20.1",
"cheerio": "^1.0.0-rc.10",
"cookie-parser": "^1.4.6",
"debug": "^4.3.3",
"escape-string-regexp": "^1.0.5",
"express": "^4.17.2",
"express": "^4.18.2",
"express-async-errors": "^3.1.1",
"fs-extra": "^7.0.1",
"helmet": "^3.23.3",
"http-status-codes": "^1.4.0",
"keycloak-connect": "12.0.4",
"mongoose": "^5.13.14",
"mongoose": "^5.13.15",
"morgan": "^1.10.0",
"multer": "^1.4.4",
"multer-s3": "^2.10.0",
Expand All @@ -38,6 +39,7 @@
"chai": "^4.3.4",
"chai-as-promised": "^7.1.1",
"eslint": "^8.5.0",
"jest": "^29.4.2",
"jsonwebtoken": "^8.5.1",
"mocha": "^9.1.3",
"nodemon": "^2.0.20",
Expand Down
134 changes: 134 additions & 0 deletions backend/src/common/search.service.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
const Snippet = require('../model/snippet');

const searchUtils = require('./search.utils');

let findItems = async function (type, isPublic, userId, query, page, limit, searchInclude) {
//split in text and tags
const searchedTermsAndTags = searchUtils.splitSearchQuery(query);
let searchedTerms = searchedTermsAndTags.terms;
const searchedTags = searchedTermsAndTags.tags;
let snippets = [];

const {specialSearchFilters, fulltextSearchTerms} = searchUtils.extractFulltextAndSpecialSearchTerms(searchedTerms);

if ( searchedTerms.length > 0 && searchedTags.length > 0 ) {
snippets = await getItemsForTagsAndTerms(type, isPublic, userId, searchedTags, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude);
} else if ( searchedTerms.length > 0 ) {
snippets = await getItemsForSearchedTerms(type, isPublic, userId, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude);
} else {
snippets = await getItemsForSearchedTags(type, isPublic, userId, searchedTags, page, limit, specialSearchFilters);
}

return snippets;
}

let getItemsForTagsAndTerms = async function (type, isPublic, userId, searchedTags, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude) {
let filter = {
tags:
{
$all: searchedTags
}
}
if ( userId ){
filter['userId'] = userId;
}
if (isPublic) {
filter['public'] = true;
}

filter = searchUtils.includeFulltextSearchTermsInFilter(fulltextSearchTerms, filter);

addSpecialSearchFiltersToMongoFilter(specialSearchFilters, filter);

let snippets = await Snippet.find(
filter,
{
score: {$meta: "textScore"}
}
)
.sort({score: {$meta: "textScore"}})
.skip((page - 1) * limit)
.limit(limit)
.lean()
.exec();

return snippets;
}


let getItemsForSearchedTerms = async function (type, isPublic, userId, fulltextSearchTerms, page, limit, specialSearchFilters, searchInclude) {

let filter = {};
if ( userId ){
filter['userId'] = userId;
} else {
filter['public'] = true;
}

if ( fulltextSearchTerms.length > 0 ) {
if ( searchInclude === 'any' ) {
filter.$text = {$search: fulltextSearchTerms.join(' ')}
} else {
filter.$text = {$search: searchUtils.generateFullSearchText(fulltextSearchTerms)};
}
}

addSpecialSearchFiltersToMongoFilter(specialSearchFilters, filter);

let snippets = await Snippet.find(
filter,
{
score: {$meta: "textScore"}
}
)
.sort({score: {$meta: "textScore"}})
.skip((page - 1) * limit)
.limit(limit)
.lean()
.exec();

return snippets;
}


let getItemsForSearchedTags = async function (type, isPublic, userId, searchedTags, page, limit, specialSearchFilters) {
let filter = {
tags:
{
$all: searchedTags
}
}
if ( userId ) {
filter['userId'] = userId;
} else {
filter['public'] = true;
}

addSpecialSearchFiltersToMongoFilter(specialSearchFilters, filter);

let snippets = await Snippet.find(filter)
.sort({createdAt: -1})
.skip((page - 1) * limit)
.limit(limit)
.lean()
.exec();

return snippets;
}

let addSpecialSearchFiltersToMongoFilter = function (specialSearchFilters, filter) {
if ( specialSearchFilters.userId ) {
filter.userId = specialSearchFilters.userId;
} else if ( specialSearchFilters.privateOnly ) {
filter.public = false;
}

if ( specialSearchFilters.site ) {
filter.sourceUrl = new RegExp(specialSearchFilters.site, 'i'); //TODO when performance becomes an issue extract domains from URLs and make a direct comparison with the domain
}
};


module.exports = {
findSnippets: findItems
}
94 changes: 26 additions & 68 deletions backend/src/common/search.utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,63 +58,10 @@ let splitSearchQuery = function (query) {
return result;
}

let bookmarkContainsSearchedTerm = function (bookmark, searchedTerm) {
let result = false;
// const escapedSearchPattern = '\\b' + this.escapeRegExp(searchedTerm.toLowerCase()) + '\\b'; word boundary was not enough, especially for special characters which can happen in coding
// https://stackoverflow.com/questions/23458872/javascript-regex-word-boundary-b-issue
const separatingChars = '\\s\\.,;#\\-\\/_\\[\\]\\(\\)\\*\\+';
const escapedSearchPattern = `(^|[${separatingChars}])(${escapeRegExp(searchedTerm.toLowerCase())})(?=$|[${separatingChars}])`;
const pattern = new RegExp(escapedSearchPattern);
if ( (bookmark.name && pattern.test(bookmark.name.toLowerCase()))
|| (bookmark.location && pattern.test(bookmark.location.toLowerCase()))
|| (bookmark.description && pattern.test(bookmark.description.toLowerCase()))
|| (bookmark.sourceCodeURL && pattern.test(bookmark.sourceCodeURL.toLowerCase()))
) {
result = true;
}

if ( result ) {
return true;
} else {
// if not found already look through the tags also
bookmark.tags.forEach(tag => {
if ( pattern.test(tag.toLowerCase()) ) {
result = true;
}
});
}

return result;
}

function escapeRegExp(str) {
const specials = [
// order matters for these
'-'
, '['
, ']'
// order doesn't matter for any of these
, '/'
, '{'
, '}'
, '('
, ')'
, '*'
, '+'
, '?'
, '.'
, '\\'
, '^'
, '$'
, '|'
],
regex = RegExp('[' + specials.join('\\') + ']', 'g');
return str.replace(regex, '\\$&'); // $& means the whole matched string
}

let extractSpecialSearchTerms = function (searchedTerms) {
let extractFulltextAndSpecialSearchTerms = function (searchedTerms) {
let specialSearchFilters = {}
let nonSpecialSearchTerms = [];
let fulltextSearchTerms = [];
for ( let i = 0; i < searchedTerms.length; i++ ) {
const searchTerm = searchedTerms[i];

Expand All @@ -141,39 +88,50 @@ let extractSpecialSearchTerms = function (searchedTerms) {
continue;
}

nonSpecialSearchTerms.push(searchTerm);
fulltextSearchTerms.push(searchTerm);
}

return {
specialSearchFilters: specialSearchFilters,
nonSpecialSearchTerms: nonSpecialSearchTerms
fulltextSearchTerms: fulltextSearchTerms
}
}

/*
The default search in Mongo uses the OR operatar, here
The default search in Mongo uses the OR operator, here
we make to AND by placing the search terms between ""
*/
let generateFullSearchText = function (nonSpecialSearchTerms) {
let generateFullSearchText = function (fulltextSearchTerms) {
let termsQuery = '';
nonSpecialSearchTerms.forEach(searchTerm => {
if ( searchTerm.startsWith('-') ) {
fulltextSearchTerms.forEach(searchTerm => {
if ( searchTerm.startsWith('-') ) { // "-" means it must not contain this searchTerm
termsQuery += ' ' + searchTerm;
} else { //wrap it in quotes to make it a default AND in search
termsQuery += ' "' + searchTerm.substring(0, searchTerm.length) + '"';
}
});
//const termsJoined = nonSpecialSearchTerms.join(' ');
//const termsQuery = escapeStringRegexp(termsJoined);
//const termsQuery = termsJoined;

return termsQuery.trim();
};

let includeFulltextSearchTermsInFilter = function (fulltextSearchTerms, filter, searchInclude) {
let newFilter = {...filter};
if ( fulltextSearchTerms.length > 0 ) {
let searchText = '';
if ( searchInclude === 'any' ) {
searchText = {$search: fulltextSearchTerms.join(' ')}
} else {
searchText = {$search: generateFullSearchText(fulltextSearchTerms)};
}

newFilter.$text = searchText;
}
return newFilter;
}

module.exports = {
splitSearchQuery: splitSearchQuery,
bookmarkContainsSearchedTerm: bookmarkContainsSearchedTerm,
extractSpecialSearchTerms: extractSpecialSearchTerms,
escapeRegExp: escapeRegExp,
generateFullSearchText: generateFullSearchText
extractFulltextAndSpecialSearchTerms: extractFulltextAndSpecialSearchTerms,
generateFullSearchText: generateFullSearchText,
includeFulltextSearchTermsInFilter: includeFulltextSearchTermsInFilter
}
60 changes: 60 additions & 0 deletions backend/src/common/search.utils.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
const searchUtils = require('./search.utils');

describe('splitSearchQuery', () => {
it('should split search query into terms and tags', () => {
const query = 'term1 [tag1] term2 [tag2]';
const expectedResult = {
terms: ['term1', 'term2'],
tags: ['tag1', 'tag2']
};
expect(searchUtils.splitSearchQuery(query)).toEqual(expectedResult);
});
});

describe('extractFulltextAndSpecialSearchTerms', () => {
it('should extract special search terms and filters from searched terms', () => {
const searchedTerms = [ 'lang:en', 'site:github.com', 'private:only', 'term1', 'user:12345678-abcd-1234-abcd-123456789abc' ];
const expectedResult = {
"fulltextSearchTerms": [
"term1"
],
"specialSearchFilters": {
"lang": "en",
"privateOnly": true,
"site": "github.com",
"userId": "12345678-abcd-1234-abcd-123456789abc"
}
}
expect(searchUtils.extractFulltextAndSpecialSearchTerms(searchedTerms)).toEqual(expectedResult);
});
});

describe('includeFulltextSearchTermsInFilter', () => {
test('returns filter with $text when fulltextSearchTerms is not empty', () => {
const fulltextSearchTerms = ['test'];
const filter = {};
const searchInclude = 'any';
const expected = {
...filter,
$text: {$search: fulltextSearchTerms.join(' ')}
};
expect(searchUtils.includeFulltextSearchTermsInFilter(fulltextSearchTerms, filter, searchInclude)).toEqual(expected);
});

test('returns filter without $text when fulltextSearchTerms is empty', () => {
const fulltextSearchTerms = [];
const filter = {};
const searchInclude = 'any';
expect(searchUtils.includeFulltextSearchTermsInFilter(fulltextSearchTerms, filter, searchInclude)).toBe(undefined);
});
});

describe('generateFullSearchText', () => {
it('should generate the correct full search text for given fulltext search terms', () => {
const fulltextSearchTerms = ['apple', '-banana', 'cherry'];
const expectedResult = '"apple" -banana "cherry"';

expect(searchUtils.generateFullSearchText(fulltextSearchTerms)).toBe(expectedResult);
});
});

Loading

0 comments on commit 77d48f7

Please sign in to comment.