Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 400+ typos #1736

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public class DistancedItemQueue<U, T> implements Iterable<DistancedItem<T>> {
private final PriorityQueue<DistancedItem<T>> queue;
private final boolean minQueue;
/**
* Creates ontainer for items with their distances.
* Creates container for items with their distances.
*
* @param origin Origin (reference) point
* @param initial Initial list of elements to add in the structure
Expand Down Expand Up @@ -94,7 +94,7 @@ public DistancedItem<T> dequeue() {
}

/**
* Dequeue all the elements from queueu with ordering mantained
* Dequeue all the elements from queue with ordering maintained
*
* @return remove all the elements in the order of the queue i.e min/max queue.
*/
Expand Down
2 changes: 1 addition & 1 deletion ann/src/main/scala/com/twitter/ann/annoy/AnnoyCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ object AnnoyCommon {

case class AnnoyRuntimeParams(
/* Number of vectors to evaluate while searching. A larger value will give more accurate results, but will take longer time to return.
* Default value would be numberOfTrees*numberOfNeigboursRequested
* Default value would be numberOfTrees*numberOfNeighboursRequested
*/
nodesToExplore: Option[Int])
extends RuntimeParams {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ private[this] class RawAnnoyQueryIndex[D <: Distance[D]](
): Future[List[NeighborWithDistance[Long, D]]] = {
futurePool {
val queryVector = embedding.toArray
val neigboursToRequest = neighboursToRequest(numOfNeighbours, runtimeParams)
val neigbours = index
.getNearestWithDistance(queryVector, neigboursToRequest)
val neighboursToRequest = neighboursToRequest(numOfNeighbours, runtimeParams)
val neighbours = index
.getNearestWithDistance(queryVector, neighboursToRequest)
.asScala
.take(numOfNeighbours)
.map { nn =>
Expand All @@ -114,24 +114,24 @@ private[this] class RawAnnoyQueryIndex[D <: Distance[D]](
}
.toList

neigbours
neighbours
}
}

// Annoy java lib do not expose param for numOfNodesToExplore.
// Default number is numOfTrees*numOfNeigbours.
// Default number is numOfTrees*numOfNeighbours.
// Simple hack is to artificially increase the numOfNeighbours to be requested and then just cap it before returning.
private[this] def neighboursToRequest(
numOfNeighbours: Int,
annoyParams: AnnoyRuntimeParams
): Int = {
annoyParams.nodesToExplore match {
case Some(nodesToExplore) => {
val neigboursToRequest = nodesToExplore / numOfTrees
if (neigboursToRequest < numOfNeighbours)
val neighboursToRequest = nodesToExplore / numOfTrees
if (neighboursToRequest < numOfNeighbours)
numOfNeighbours
else
neigboursToRequest
neighboursToRequest
}
case _ => numOfNeighbours
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ object SerializableBruteForceIndex {
}

/**
* This is a class that wrapps a BruteForceIndex and provides a method for serialization.
* This is a class that wraps a BruteForceIndex and provides a method for serialization.
*
* @param bruteForceIndex all queries and updates are sent to this index.
* @param embeddingInjection injection that can convert embeddings to thrift embeddings.
Expand Down
4 changes: 2 additions & 2 deletions ann/src/main/scala/com/twitter/ann/hnsw/TypedHnswIndex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ object TypedHnswIndex {
* construction, but better index quality. At some point, increasing
* ef_construction does not improve the quality of the index. One way to
* check if the selection of ef_construction was ok is to measure a recall
* for M nearest neighbor search when ef = ef_constuction: if the recall is
* for M nearest neighbor search when ef = ef_construction: if the recall is
* lower than 0.9, than there is room for improvement.
* @param maxM The number of bi-directional links created for every new element during construction.
* Reasonable range for M is 2-100. Higher M work better on datasets with high
Expand Down Expand Up @@ -64,7 +64,7 @@ object TypedHnswIndex {
* construction, but better index quality. At some point, increasing
* ef_construction does not improve the quality of the index. One way to
* check if the selection of ef_construction was ok is to measure a recall
* for M nearest neighbor search when ef = ef_constuction: if the recall is
* for M nearest neighbor search when ef = ef_construction: if the recall is
* lower than 0.9, than there is room for improvement.
* @param maxM The number of bi-directional links created for every new element during construction.
* Reasonable range for M is 2-100. Higher M work better on datasets with high
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ trait KnnJobBase {

object KnnJob extends TwitterExecutionApp with KnnJobBase {

val KnnPathSuffix: String = "/user/cortex-mlx/qualatative_analysis/knn_ground_truth/"
val KnnPathSuffix: String = "/user/cortex-mlx/qualitative_analysis/knn_ground_truth/"
val partitionKey: String = "version"

override def job: Execution[Unit] = Execution.withId { implicit uniqueId =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ object KnnHelper {
* @param reducers number of reducers for grouping
* @param isSearchSpaceLarger Used for optimization: Is the search space larger than the query space? Ignored if numOfSearchGroups > 1.
* @param numOfSearchGroups we divide the search space into these groups (randomly). Useful when the search space is too large. Overrides isSearchSpaceLarger.
* @param numReplicas Each search group will be responsible for 1/numReplicas queryEmebeddings.
* @param numReplicas Each search group will be responsible for 1/numReplicas queryEmbeddings.
* This might speed up the search when the size of the index embeddings is
* large.
* @tparam A type of query entity
Expand Down Expand Up @@ -226,7 +226,7 @@ object KnnHelper {
* the search space is too large. Search groups are shards. Choose this
* number by ensuring searchSpaceEmbeddings.size / numOfSearchGroups
* embeddings will fit into memory.
* @param numReplicas Each search group will be responsible for 1/numReplicas queryEmebeddings.
* @param numReplicas Each search group will be responsible for 1/numReplicas queryEmbeddings.
* By increasing this number, we can parallelize the work and reduce end to end
* running times.
* @param indexingStrategy How we will search for nearest neighbors within a search group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import com.twitter.scalding.UniqueID
import com.twitter.scalding_internal.job.TwitterExecutionApp

/**
* This job reads index embedding data, query embeddings data, and split into index set, query set and true nearest neigbor set
* This job reads index embedding data, query embeddings data, and split into index set, query set and true nearest neighbor set
* from query to index.
*/
object KnnTruthSetGenerator extends TwitterExecutionApp {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ This job builds an ANN index based on hnsw algorithm using producer embeddings (
Job arguments
*************

Enviroment variables (resources):
Environment variables (resources):
==============
- **CPU** Number of cpu cores (default: 32)
- **RAM_GB** RAM in gigabytes (default: 150)
Expand Down Expand Up @@ -95,7 +95,7 @@ General arguments (specified as **--profile.{options}**):
- **num_dimensions** Dimension of embedding in the input data. An exception will be thrown if any entry does not have a number of dimensions equal to this number.
- **metric** Distance metric (InnerProduct/Cosine/L2)
- **concurrency_level** Specifies how many parallel inserts happen to the index. This should probably be set to the number of cores on the machine.
- **algo** The kind of index you want to ouput. The supported options right now are:
- **algo** The kind of index you want to output. The supported options right now are:

1. **hnsw** (Metric supported: Cosine, L2, InnerProduct)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class InMemoryLoadTestQueryRecorder[T](
latencyHistogram.add(queryLatency.inMicroseconds)
counter.incrementAndGet()
// Requests are assumed to have started around the time time of the first time record was called
// plus the time it took for that query to hhave completed.
// plus the time it took for that query to have completed.
val (elapsedSinceFirstCall, firstQueryLatency) = elapsedTimeFun.get()
val durationSoFar = elapsedSinceFirstCall() + firstQueryLatency
elapsedTime.set(durationSoFar)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ $ oscar hdfs \
--sample_percent $SAMPLE_PERCENT
```

It will sample 0.1% of embeddings and store them in `tab` format to hdfs that can be direcly used as `query_set` for loadtest.
It will sample 0.1% of embeddings and store them in `tab` format to hdfs that can be directly used as `query_set` for loadtest.

# Knn Truth Set Generator

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ object IndexBuilderUtils {
concurrencyLevel: Int
): Future[Int] = {
val count = new AtomicInteger()
// Async stream allows us to procss at most concurrentLevel futures at a time.
// Async stream allows us to process at most concurrentLevel futures at a time.
Future.Unit.before {
val stream = AsyncStream.fromSeq(embeddings)
val appendStream = stream.mapConcurrent(concurrencyLevel) { annEmbedding =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct AnnoyIndexMetadata {

struct AnnoyRuntimeParam {
/* Number of vectors to evaluate while searching. A larger value will give more accurate results, but will take longer time to return.
* Default value would be numberOfTrees*numberOfNeigboursRequested
* Default value would be numberOfTrees*numberOfNeighboursRequested
*/
1: optional i32 numOfNodesToExplore
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class CrMixerServer extends ThriftServer with Mtls with HttpServer with HttpMtls
TripCandidateStoreModule,
TwiceClustersMembersStoreModule,
TweetBasedQigSimilarityEngineModule,
TweetBasedTwHINSimlarityEngineModule,
TweetBasedTwHINSimilarityEngineModule,
TweetBasedUnifiedSimilarityEngineModule,
TweetBasedUserAdGraphSimilarityEngineModule,
TweetBasedUserTweetGraphSimilarityEngineModule,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.twitter.cr_mixer.logging

import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets
import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo
import com.twitter.cr_mixer.thriftscala.ImpressedBucketInfo
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.logging.Logger
Expand All @@ -24,13 +24,13 @@ object ScribeLoggerUtils {

private[logging] def getImpressedBuckets(
scopedStats: StatsReceiver
): Option[List[ImpressesedBucketInfo]] = {
): Option[List[ImpressedBucketInfo]] = {
StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) {
CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets =>
val listBucketsSet = listBuckets.toSet
scopedStats.stat("impressed_buckets").add(listBucketsSet.size)
listBucketsSet.map { bucket =>
ImpressesedBucketInfo(
ImpressedBucketInfo(
experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L),
bucketName = bucket.name,
version = bucket.experiment.settings.version,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ object ModelConfig {
val DebuggerDemo: String = "DebuggerDemo"

// ColdStartLookalike - this is not really a model name, it is as a placeholder to
// indicate ColdStartLookalike candidate source, which is currently being pluged into
// indicate ColdStartLookalike candidate source, which is currently being plugged into
// CustomizedRetrievalCandidateGeneration temporarily.
val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineC
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.memcached.{Client => MemcachedClient}

object TweetBasedTwHINSimlarityEngineModule extends TwitterModule {
object TweetBasedTwHINSimilarityEngineModule extends TwitterModule {
@Provides
@Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine)
def providesTweetBasedTwHINANNSimilarityEngine(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ object CertoTopicTweetSimilarityEngine {
case class Query(
topicId: TopicId,
maxCandidates: Int,
certoScoreTheshold: Double)
certoScoreThreshold: Double)

def fromParams(
topicId: TopicId,
Expand All @@ -86,7 +86,7 @@ object CertoTopicTweetSimilarityEngine {
Query(
topicId = topicId,
maxCandidates = maxCandidates,
certoScoreTheshold = params(TopicTweetParams.CertoScoreThresholdParam)
certoScoreThreshold = params(TopicTweetParams.CertoScoreThresholdParam)
),
params
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ case class WalsStats(scope: String, scopedStats: StatsReceiver) {
}

// StatsMap maintains a mapping from Model's input signature to a stats receiver
// The Wals model suports multiple input signature which can run different graphs internally and
// The Wals model supports multiple input signature which can run different graphs internally and
// can have a different performance profile.
// Invoking StatsReceiver.stat() on each request can create a new stat object and can be expensive
// in performance critical paths.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ object SimilarityEngine extends Logging {
* We need some further refactor work to make it more flexible.
*
* @param deciderConfig Gate the Engine by a decider. If specified,
* @param enableFeatureSwitch. DO NOT USE IT FOR NOW. It needs some refactorting. Please set it to None (SD-20268)
* @param enableFeatureSwitch. DO NOT USE IT FOR NOW. It needs some refactoring. Please set it to None (SD-20268)
*/
case class GatingConfig(
deciderConfig: Option[DeciderConfig],
Expand Down Expand Up @@ -107,7 +107,7 @@ object SimilarityEngine extends Logging {
*
* @param underlyingStore un-cached store implementation
* @param keyPrefix a prefix differentiates 2 stores if they share the same key space.
* e.x. 2 implementations of ReadableStore[UserId, Seq[Candidiate] ]
* e.x. 2 implementations of ReadableStore[UserId, Seq[Candidate] ]
* can use prefix "store_v1", "store_v2"
* @return A ReadableStore with a MemCache wrapper
*/
Expand Down
2 changes: 1 addition & 1 deletion cr-mixer/thrift/src/main/thrift/product_context.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ namespace java com.twitter.cr_mixer.thriftjava
#@namespace strato com.twitter.cr_mixer

struct HomeContext {
2: optional i32 maxResults // enabled for QuaityFactor related DDGs only
2: optional i32 maxResults // enabled for QualityFactor related DDGs only
} (persisted='true', hasPersonalData='false')

struct NotificationsContext {
Expand Down
14 changes: 7 additions & 7 deletions cr-mixer/thrift/src/main/thrift/scribe.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ struct VITTweetCandidatesScribe {
2: required i64 userId (personalDataType = 'UserId')
3: required list<VITTweetCandidateScribe> candidates
7: required product.Product product
8: required list<ImpressesedBucketInfo> impressedBuckets
8: required list<ImpressedBucketInfo> impressedBuckets
} (persisted='true', hasPersonalData = 'true')

struct VITTweetCandidateScribe {
Expand All @@ -34,7 +34,7 @@ struct GetTweetsRecommendationsScribe {
3: required Result result
4: optional i64 traceId
5: optional PerformanceMetrics performanceMetrics
6: optional list<ImpressesedBucketInfo> impressedBuckets
6: optional list<ImpressedBucketInfo> impressedBuckets
} (persisted='true', hasPersonalData = 'true')

struct SourceSignal {
Expand Down Expand Up @@ -90,7 +90,7 @@ union Result {
6: TopLevelApiResult topLevelApiResult
} (persisted='true', hasPersonalData = 'true')

struct ImpressesedBucketInfo {
struct ImpressedBucketInfo {
1: required i64 experimentId (personalDataType = 'ExperimentId')
2: required string bucketName
3: required i32 version
Expand All @@ -106,7 +106,7 @@ struct GetRelatedTweetsScribe {
5: optional i64 guestId (personalDataType = 'GuestId')
6: optional i64 traceId
7: optional PerformanceMetrics performanceMetrics
8: optional list<ImpressesedBucketInfo> impressedBuckets
8: optional list<ImpressedBucketInfo> impressedBuckets
} (persisted='true', hasPersonalData = 'true')

struct RelatedTweetTopLevelApiResult {
Expand All @@ -118,7 +118,7 @@ struct RelatedTweetTopLevelApiResult {
union RelatedTweetResult {
1: RelatedTweetTopLevelApiResult relatedTweetTopLevelApiResult
2: FetchCandidatesResult fetchCandidatesResult
3: PreRankFilterResult preRankFilterResult # results after seqential filters
3: PreRankFilterResult preRankFilterResult # results after sequential filters
# if later we need rankResult, we can add it here
} (persisted='true', hasPersonalData = 'true')

Expand All @@ -130,7 +130,7 @@ struct GetUtegTweetsScribe {
3: required UtegTweetResult utegTweetResult
4: optional i64 traceId
5: optional PerformanceMetrics performanceMetrics
6: optional list<ImpressesedBucketInfo> impressedBuckets
6: optional list<ImpressedBucketInfo> impressedBuckets
} (persisted='true', hasPersonalData = 'true')

struct UtegTweetTopLevelApiResult {
Expand All @@ -153,7 +153,7 @@ struct GetAdsRecommendationsScribe {
3: required AdsRecommendationsResult result
4: optional i64 traceId
5: optional PerformanceMetrics performanceMetrics
6: optional list<ImpressesedBucketInfo> impressedBuckets
6: optional list<ImpressedBucketInfo> impressedBuckets
} (persisted='true', hasPersonalData = 'true')

struct AdsRecommendationTopLevelApiResult {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class RepeatedProfileVisitsSource @Inject() (
val recommendationThreshold = params.getInt(RepeatedProfileVisitsParams.RecommendationThreshold)
val bucketingThreshold = params.getInt(RepeatedProfileVisitsParams.BucketingThreshold)

// Get the list of repeatedly visited profilts. Only keep accounts with >= bucketingThreshold visits.
// Get the list of repeatedly visited profiles. Only keep accounts with >= bucketingThreshold visits.
val repeatedVisitedAccountsStitch: Stitch[Map[Long, Int]] =
getRepeatedVisitedAccounts(params, userId).map(_.filter(kv => kv._2 >= bucketingThreshold))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ object StratoClientModule extends TwitterModule {
val WTFPostNuxFeaturesPath = "ml/featureStore/onboarding/wtfPostNuxFeatures.User"
val ElectionCandidatesPath = "onboarding/electionAccounts"
val UserUserGraphPath = "recommendations/userUserGraph"
val WtfDissmissEventsPath = "onboarding/wtfDismissEvents"
val WtfDismissEventsPath = "onboarding/wtfDismissEvents"
val RelatableAccountsPath = "onboarding/userrecs/relatableAccounts"
val ExtendedNetworkCandidatesPath = "search/account_search/extendedNetworkCandidatesMH"
val LabeledNotificationPath = "frigate/magicrecs/labeledPushRecsAggregated.User"
Expand Down Expand Up @@ -234,7 +234,7 @@ object StratoClientModule extends TwitterModule {
Unit,
(Long, (Long, Long)),
WhoToFollowDismissEventDetails
](WtfDissmissEventsPath)
](WtfDismissEventsPath)

@Provides
@Singleton
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ object GuiceNamedConstants {
final val TWO_HOP_RANDOM_WALK_FETCHER = "two_hop_random_walk_fetcher"
final val USER_RECOMMENDABILITY_FETCHER = "user_recommendability_fetcher"
final val USER_STATE_FETCHER = "user_state_fetcher"
final val UTT_ACCOUNT_RECOMMENDATIONS_FETCHER = "utt_account_recomendations_fetcher"
final val UTT_ACCOUNT_RECOMMENDATIONS_FETCHER = "utt_account_recommendations_fetcher"
final val UTT_SEED_ACCOUNTS_FETCHER = "utt_seed_accounts_fetcher"

final val ELECTION_CANDIDATES_FETCHER = "election_candidates_fetcher"
Expand Down
Loading