Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the block and patterns search algorithm #25105

Merged
merged 9 commits into from
Sep 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 93 additions & 34 deletions packages/block-editor/src/components/inserter/search-items.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,53 @@ import {
words,
} from 'lodash';

// Default search helpers
const defaultGetName = ( item ) => item.name || '';
const defaultGetTitle = ( item ) => item.title;
const defaultGetKeywords = ( item ) => item.keywords || [];
const defaultGetCategory = ( item ) => item.category;
const defaultGetCollection = () => null;
const defaultGetVariations = () => [];

/**
* Converts the search term into a list of normalized terms.
* Sanitizes the search input string.
*
* @param {string} term The search term to normalize.
* @param {string} input The search input to normalize.
*
* @return {string[]} The normalized list of search terms.
* @return {string} The normalized search input.
*/
export const normalizeSearchTerm = ( term = '' ) => {
function normalizeSearchInput( input = '' ) {
// Disregard diacritics.
// Input: "média"
term = deburr( term );
input = deburr( input );

// Accommodate leading slash, matching autocomplete expectations.
// Input: "/media"
term = term.replace( /^\//, '' );
input = input.replace( /^\//, '' );

// Lowercase.
// Input: "MEDIA"
term = term.toLowerCase();
input = input.toLowerCase();

return input;
}

/**
* Converts the search term into a list of normalized terms.
*
* @param {string} input The search term to normalize.
*
* @return {string[]} The normalized list of search terms.
*/
export const getNormalizedSearchTerms = ( input = '' ) => {
// Extract words.
return words( term );
return words( normalizeSearchInput( input ) );
};

const removeMatchingTerms = ( unmatchedTerms, unprocessedTerms ) => {
return differenceWith(
unmatchedTerms,
normalizeSearchTerm( unprocessedTerms ),
getNormalizedSearchTerms( unprocessedTerms ),
( unmatchedTerm, unprocessedTerm ) =>
unprocessedTerm.includes( unmatchedTerm )
);
Expand All @@ -47,9 +66,9 @@ export const searchBlockItems = (
items,
categories,
collections,
searchTerm
searchInput
) => {
const normalizedSearchTerms = normalizeSearchTerm( searchTerm );
const normalizedSearchTerms = getNormalizedSearchTerms( searchInput );
if ( normalizedSearchTerms.length === 0 ) {
return items;
}
Expand All @@ -73,7 +92,7 @@ export const searchBlockItems = (
)
),
};
return searchItems( items, searchTerm, config ).map( ( item ) => {
return searchItems( items, searchInput, config ).map( ( item ) => {
if ( isEmpty( item.variations ) ) {
return item;
}
Expand All @@ -83,7 +102,7 @@ export const searchBlockItems = (
return (
intersectionWith(
normalizedSearchTerms,
normalizeSearchTerm( title ).concat( keywords ),
getNormalizedSearchTerms( title ).concat( keywords ),
( termToMatch, labelTerm ) =>
labelTerm.includes( termToMatch )
).length > 0
Expand All @@ -105,50 +124,90 @@ export const searchBlockItems = (
/**
* Filters an item list given a search term.
*
* @param {Array} items Item list
* @param {string} searchTerm Search term.
* @param {Object} config Search Config.
* @return {Array} Filtered item list.
* @param {Array} items Item list
* @param {string} searchInput Search input.
* @param {Object} config Search Config.
* @return {Array} Filtered item list.
*/
export const searchItems = ( items = [], searchTerm = '', config = {} ) => {
const normalizedSearchTerms = normalizeSearchTerm( searchTerm );
export const searchItems = ( items = [], searchInput = '', config = {} ) => {
const normalizedSearchTerms = getNormalizedSearchTerms( searchInput );
if ( normalizedSearchTerms.length === 0 ) {
return items;
}

const defaultGetTitle = ( item ) => item.title;
const defaultGetKeywords = ( item ) => item.keywords || [];
const defaultGetCategory = ( item ) => item.category;
const defaultGetCollection = () => null;
const defaultGetVariations = () => [];
const rankedItems = items
.map( ( item ) => {
youknowriad marked this conversation as resolved.
Show resolved Hide resolved
return [ item, getItemSearchRank( item, searchInput, config ) ];
} )
.filter( ( [ , rank ] ) => rank > 0 );

rankedItems.sort( ( [ , rank1 ], [ , rank2 ] ) => rank2 - rank1 );
return rankedItems.map( ( [ item ] ) => item );
};

/**
* Get the search rank for a given item and a specific search term.
* The better the match, the higher the rank.
* If the rank equals 0, it should be excluded from the results.
*
* @param {Object} item Item to filter.
* @param {string} searchTerm Search term.
* @param {Object} config Search Config.
* @return {number} Search Rank.
*/
export function getItemSearchRank( item, searchTerm, config = {} ) {
const {
getName = defaultGetName,
getTitle = defaultGetTitle,
getKeywords = defaultGetKeywords,
getCategory = defaultGetCategory,
getCollection = defaultGetCollection,
getVariations = defaultGetVariations,
} = config;

return items.filter( ( item ) => {
const title = getTitle( item );
const keywords = getKeywords( item );
const category = getCategory( item );
const collection = getCollection( item );
const variations = getVariations( item );
const name = getName( item );
const title = getTitle( item );
const keywords = getKeywords( item );
const category = getCategory( item );
const collection = getCollection( item );
const variations = getVariations( item );

const normalizedSearchInput = normalizeSearchInput( searchTerm );
const normalizedTitle = normalizeSearchInput( title );

let rank = 0;

// Prefers exact matches
// Then prefers if the beginning of the title matches the search term
// name, keywords, categories, collection, variations match come later.
if ( normalizedSearchInput === normalizedTitle ) {
rank += 30;
} else if ( normalizedTitle.startsWith( normalizedSearchInput ) ) {
rank += 20;
} else {
const terms = [
name,
title,
...keywords,
category,
collection,
...variations,
].join( ' ' );

const normalizedSearchTerms = words( normalizedSearchInput );
const unmatchedTerms = removeMatchingTerms(
normalizedSearchTerms,
terms
);

return unmatchedTerms.length === 0;
} );
};
if ( unmatchedTerms.length === 0 ) {
rank += 10;
}
}

// Give a better rank to "core" namespaced items.
if ( rank !== 0 && name.startsWith( 'core/' ) ) {
rank++;
}

return rank;
}
64 changes: 57 additions & 7 deletions packages/block-editor/src/components/inserter/test/search-items.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,72 @@ import items, {
youtubeItem,
paragraphEmbedItem,
} from './fixtures';
import { normalizeSearchTerm, searchBlockItems } from '../search-items';
import {
getNormalizedSearchTerms,
searchBlockItems,
getItemSearchRank,
} from '../search-items';

describe( 'normalizeSearchTerm', () => {
describe( 'getNormalizedSearchTerms', () => {
it( 'should return an empty array when no words detected', () => {
expect( normalizeSearchTerm( ' - !? *** ' ) ).toEqual( [] );
expect( getNormalizedSearchTerms( ' - !? *** ' ) ).toEqual( [] );
} );

it( 'should remove diacritics', () => {
expect( normalizeSearchTerm( 'média' ) ).toEqual( [ 'media' ] );
expect( getNormalizedSearchTerms( 'média' ) ).toEqual( [ 'media' ] );
} );

it( 'should trim whitespace', () => {
expect( normalizeSearchTerm( ' média ' ) ).toEqual( [ 'media' ] );
expect( getNormalizedSearchTerms( ' média ' ) ).toEqual( [
'media',
] );
} );

it( 'should convert to lowercase', () => {
expect( normalizeSearchTerm( ' Média ' ) ).toEqual( [ 'media' ] );
expect( getNormalizedSearchTerms( ' Média ' ) ).toEqual( [
'media',
] );
} );

it( 'should extract only words', () => {
expect(
normalizeSearchTerm( ' Média & Text Tag-Cloud > 123' )
getNormalizedSearchTerms( ' Média & Text Tag-Cloud > 123' )
).toEqual( [ 'media', 'text', 'tag', 'cloud', '123' ] );
} );
} );

describe( 'getItemSearchRank', () => {
it( 'should return the highest rank for exact matches', () => {
expect( getItemSearchRank( { title: 'Button' }, 'button' ) ).toEqual(
30
);
} );

it( 'should return a high rank if the start of title matches the search term', () => {
expect(
getItemSearchRank( { title: 'Button Advanced' }, 'button' )
).toEqual( 20 );
} );

it( 'should add a bonus point to items with core namespaces', () => {
expect(
getItemSearchRank(
{ name: 'core/button', title: 'Button' },
'button'
)
).toEqual( 31 );
} );

it( 'should have a small rank if it matches keywords, category...', () => {
expect(
getItemSearchRank(
{ title: 'link', keywords: [ 'button' ] },
'button'
)
).toEqual( 10 );
} );
} );

describe( 'searchBlockItems', () => {
it( 'should return back all items when no terms detected', () => {
expect(
Expand All @@ -53,6 +93,16 @@ describe( 'searchBlockItems', () => {
] );
} );

it( 'should use the ranking algorithm to order the blocks', () => {
expect(
searchBlockItems( items, categories, collections, 'a para' )
).toEqual( [
paragraphEmbedItem,
paragraphItem,
advancedParagraphItem,
] );
} );

it( 'should search items using the keywords and partial terms', () => {
expect(
searchBlockItems( items, categories, collections, 'GOOGL' )
Expand Down