-
Notifications
You must be signed in to change notification settings - Fork 645
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Dedupe auxiliary data strings in Azure Search Service (#634)
Progress on NuGet/Engineering#2635
- Loading branch information
1 parent
035f754
commit b1f928e
Showing
18 changed files
with
396 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
src/NuGet.Services.AzureSearch/AuxiliaryFiles/StringCache.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System.Collections.Concurrent; | ||
using System.Threading; | ||
|
||
namespace NuGet.Services.AzureSearch.AuxiliaryFiles | ||
{ | ||
public class StringCache | ||
{ | ||
/// <summary> | ||
/// Maintain a lookup of strings for de-duping. We maintain the original case for de-duping purposes by using | ||
/// the default string comparer. As of July of 2019 in PROD, maintaining original case of version | ||
/// string adds less than 0.3% extra strings. De-duping version strings in a case-sensitive manner removes | ||
/// 87.0% of the string allocations. Intuitively this means most people use the same case of a given version | ||
/// string and a lot of people use the same versions strings (common ones are 1.0.0, 1.0.1, 1.0.2, 1.1.0, etc). | ||
/// </summary> | ||
private readonly ConcurrentDictionary<string, string> _values = new ConcurrentDictionary<string, string>(); | ||
|
||
/// <summary> | ||
/// Keep track of the number of requests for a string. This is the number of times <see cref="Dedupe(string)"/> | ||
/// has been called. | ||
/// </summary> | ||
private int _requestCount = 0; | ||
|
||
/// <summary> | ||
/// Keep track of the number of string de-duped, i.e. "cache hits". | ||
/// </summary> | ||
private int _hitCount = 0; | ||
|
||
/// <summary> | ||
/// Keep track of the number of characters in the cache. | ||
/// </summary> | ||
private long _charCount = 0; | ||
|
||
public int StringCount => _values.Count; | ||
public int RequestCount => _requestCount; | ||
public int HitCount => _hitCount; | ||
public long CharCount => _charCount; | ||
|
||
public string Dedupe(string value) | ||
{ | ||
Interlocked.Increment(ref _requestCount); | ||
|
||
if (value == null) | ||
{ | ||
return null; | ||
} | ||
|
||
// Inspired by: | ||
// https://devblogs.microsoft.com/pfxteam/building-a-custom-getoradd-method-for-concurrentdictionarytkeytvalue/ | ||
while (true) | ||
{ | ||
if (_values.TryGetValue(value, out var existingValue)) | ||
{ | ||
Interlocked.Increment(ref _hitCount); | ||
return existingValue; | ||
} | ||
|
||
if (_values.TryAdd(value, value)) | ||
{ | ||
Interlocked.Add(ref _charCount, value.Length); | ||
return value; | ||
} | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Resets <see cref="RequestCount"/> and <see cref="HitCount"/> back to zero. | ||
/// </summary> | ||
public void ResetCounts() | ||
{ | ||
_requestCount = 0; | ||
_hitCount = 0; | ||
} | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.