Skip to content

Commit

Permalink
Add similarity property to the Azure Search Index definition for API …
Browse files Browse the repository at this point in the history
…2019-05-06-preview (Azure#9249)

* BM25

* Add descriptions

* Update Azure.Search

* Revert Microsoft.Azure.Search changes

* Fix spacing

* PR comments

* Prettier + custom word

* PR comments

Co-authored-by: Raouf Merouche <ramero@microsoft.com.com>
  • Loading branch information
2 people authored and 00Kai0 committed Oct 12, 2020
1 parent 95c47c6 commit 7e56013
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 0 deletions.
1 change: 1 addition & 0 deletions custom-words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,7 @@ testallroutes
testnew
testnewroute
textanalytics
TFIDF
Tful
Tfvc
timeframe
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@
"applicationId": "00000000-0000-0000-0000-000000000000",
"applicationSecret": "myapplicationsecret"
}
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"b": 0.5,
"k1": 1.3
}
}
},
Expand Down Expand Up @@ -356,6 +361,11 @@
"applicationId": "00000000-0000-0000-0000-000000000000",
"applicationSecret": null
}
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"b": 0.5,
"k1": 1.3
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@
"keyVaultKeyVersion": "myKeyVersion-32charAlphaNumericString",
"keyVaultUri": "https://myKeyVault.vault.azure.net",
"accessCredentials": null
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.ClassicSimilarity"
}
}
},
Expand Down Expand Up @@ -353,6 +356,9 @@
"keyVaultKeyVersion": "myKeyVersion-32charAlphaNumericString",
"keyVaultUri": "https://myKeyVault.vault.azure.net",
"accessCredentials": null
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.ClassicSimilarity"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4296,6 +4296,51 @@
"url": "https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.html"
}
},
"Similarity": {
"discriminator": "@odata.type",
"properties": {
"@odata.type": {
"type": "string"
}
},
"required": [
"@odata.type"
],
"description": "Base type for similarity algorithms. Similarity algorithms are used to calculate scores that tie queries to documents. The higher the score, the more relevant the document is to that specific query. Those scores are used to rank the search results.",
"externalDocs": {
"url": "https://docs.microsoft.com/azure/search/index-ranking-similarity"
}
},
"ClassicSimilarity": {
"x-ms-discriminator-value": "#Microsoft.Azure.Search.ClassicSimilarity",
"allOf": [
{
"$ref": "#/definitions/Similarity"
}
],
"description": "Legacy similarity algorithm which uses the Lucene TFIDFSimilarity implementation of TF-IDF. This variation of TF-IDF introduces static document length normalization as well as coordinating factors that penalize documents that only partially match the searched queries."
},
"BM25Similarity": {
"x-ms-discriminator-value": "#Microsoft.Azure.Search.BM25Similarity",
"allOf": [
{
"$ref": "#/definitions/Similarity"
}
],
"properties": {
"k1": {
"type": "number",
"format": "double",
"description": "This property controls the scaling function between the term frequency of each matching terms and the final relevance score of a document-query pair. By default, a value of 1.2 is used. A value of 0.0 means the score does not scale with an increase in term frequency."
},
"b": {
"type": "number",
"format": "double",
"description": "This property controls how the length of a document affects the relevance score. By default, a value of 0.75 is used. A value of 0.0 means no length normalization is applied, while a value of 1.0 means the score is fully normalized by the length of the document."
}
},
"description": "Ranking function based on the Okapi BM25 similarity algorithm. BM25 is a TF-IDF-like algorithm that includes length normalization (controlled by the 'b' parameter) as well as term frequency saturation (controlled by the 'k1' parameter)."
},
"DataSourceCredentials": {
"properties": {
"connectionString": {
Expand Down Expand Up @@ -5409,6 +5454,13 @@
"url": "https://aka.ms/azure-search-encryption-with-cmk"
}
},
"similarity": {
"$ref": "#/definitions/Similarity",
"description": "The type of similarity algorithm to be used when scoring and ranking the documents matching a search query. The similarity algorithm can only be defined at index creation time and cannot be modified on existing indexes. If null, the ClassicSimilarity algorithm is used.",
"externalDocs": {
"url": "https://docs.microsoft.com/azure/search/index-ranking-similarity"
}
},
"@odata.etag": {
"x-ms-client-name": "ETag",
"type": "string",
Expand Down

0 comments on commit 7e56013

Please sign in to comment.