From f6d161d194c0a6197ab8f3144c0cb907a2bc8349 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Mon, 2 Dec 2024 13:17:45 +0000 Subject: [PATCH] Prefer exact matches in Link Search results sorting (#67367) * Weight towards exact matches * Add additional test coverage Co-authored-by: getdave Co-authored-by: draganescu Co-authored-by: talldan Co-authored-by: jasmussen Co-authored-by: kevin940726 Co-authored-by: ironprogrammer Co-authored-by: annezazu --- .../__experimental-fetch-link-suggestions.ts | 25 +++++++++++-- .../__experimental-fetch-link-suggestions.js | 37 +++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts b/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts index e1a166ee272dbe..29012197589035 100644 --- a/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts +++ b/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts @@ -270,12 +270,29 @@ export function sortResults( results: SearchResult[], search: string ) { for ( const result of results ) { if ( result.title ) { const titleTokens = tokenize( result.title ); - const matchingTokens = titleTokens.filter( ( titleToken ) => - searchTokens.some( ( searchToken ) => - titleToken.includes( searchToken ) + const exactMatchingTokens = titleTokens.filter( ( titleToken ) => + searchTokens.some( + ( searchToken ) => titleToken === searchToken ) ); - scores[ result.id ] = matchingTokens.length / titleTokens.length; + const subMatchingTokens = titleTokens.filter( ( titleToken ) => + searchTokens.some( + ( searchToken ) => + titleToken !== searchToken && + titleToken.includes( searchToken ) + ) + ); + + // The score is a combination of exact matches and sub-matches. + // More weight is given to exact matches, as they are more relevant (e.g. "cat" vs "caterpillar"). + // Diving by the total number of tokens in the title normalizes the score and skews + // the results towards shorter titles. + const exactMatchScore = + ( exactMatchingTokens.length / titleTokens.length ) * 10; + + const subMatchScore = subMatchingTokens.length / titleTokens.length; + + scores[ result.id ] = exactMatchScore + subMatchScore; } else { scores[ result.id ] = 0; } diff --git a/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js b/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js index 6878c74332c3d7..ad0014ff86ecb8 100644 --- a/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js +++ b/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js @@ -393,6 +393,43 @@ describe( 'sortResults', () => { 6, ] ); } ); + + it( 'orders results to prefer direct matches over sub matches', () => { + const results = [ + { + id: 1, + title: 'News', + url: 'http://wordpress.local/news/', + type: 'page', + kind: 'post-type', + }, + { + id: 2, + title: 'Newspaper', + url: 'http://wordpress.local/newspaper/', + type: 'page', + kind: 'post-type', + }, + { + id: 3, + title: 'News Flash News', + url: 'http://wordpress.local/news-flash-news/', + type: 'page', + kind: 'post-type', + }, + { + id: 4, + title: 'News', + url: 'http://wordpress.local/news-2/', + type: 'page', + kind: 'post-type', + }, + ]; + const order = sortResults( results, 'News' ).map( + ( result ) => result.id + ); + expect( order ).toEqual( [ 1, 4, 3, 2 ] ); + } ); } ); describe( 'tokenize', () => {