Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for retrying messages within replicator processor #827

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gen/go/admin/idl.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 46 additions & 2 deletions .gen/go/history/historyservice_replicateevents.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions .gen/go/history/idl.go

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions .gen/go/shared/idl.go

Large diffs are not rendered by default.

110 changes: 110 additions & 0 deletions .gen/go/shared/types.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions common/messaging/kafkaClient.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ func (c *kafkaClient) NewConsumer(currentCluster, sourceCluster, consumerName st
sourceTopics := c.config.getTopicsForCadenceCluster(sourceCluster)

topicKafkaCluster := c.config.getKafkaClusterForTopic(sourceTopics.Topic)
retryTopicKafkaCluster := c.config.getKafkaClusterForTopic(currentTopics.RetryTopic)
dqlTopicKafkaCluster := c.config.getKafkaClusterForTopic(currentTopics.DLQTopic)
topicList := kafka.ConsumerTopicList{
kafka.ConsumerTopic{
Expand All @@ -50,11 +49,6 @@ func (c *kafkaClient) NewConsumer(currentCluster, sourceCluster, consumerName st
Cluster: topicKafkaCluster,
BrokerList: c.config.getBrokersForKafkaCluster(topicKafkaCluster),
},
RetryQ: kafka.Topic{
Name: currentTopics.RetryTopic,
Cluster: retryTopicKafkaCluster,
BrokerList: c.config.getBrokersForKafkaCluster(retryTopicKafkaCluster),
},
DLQ: kafka.Topic{
Name: currentTopics.DLQTopic,
Cluster: dqlTopicKafkaCluster,
Expand Down
19 changes: 18 additions & 1 deletion common/metrics/defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,8 @@ const (
ReplicatorQueueProcessorScope
// ReplicatorTaskHistoryScope is the scope used for history task processing by replicator queue processor
ReplicatorTaskHistoryScope
// ReplicateHistoryEventsScope is the scope used by historyReplicator API for applying events
ReplicateHistoryEventsScope

NumHistoryScopes
)
Expand Down Expand Up @@ -426,6 +428,10 @@ const (
const (
// ReplicationScope is the scope used by all metric emitted by replicator
ReplicatorScope = iota + NumCommonScopes
// DomainReplicationTaskScope is the scope used by domain task replication processing
DomainReplicationTaskScope
// HistoryReplicationTaskScope is the scope used by history task replication processing
HistoryReplicationTaskScope

NumWorkerScopes
)
Expand Down Expand Up @@ -575,6 +581,7 @@ var ScopeDefs = map[ServiceIdx]map[int]scopeDefinition{
HistoryEventNotificationScope: {operation: "HistoryEventNotification"},
ReplicatorQueueProcessorScope: {operation: "ReplicatorQueueProcessor"},
ReplicatorTaskHistoryScope: {operation: "ReplicatorTaskHistory"},
ReplicateHistoryEventsScope: {operation: "ReplicateHistoryEvents"},
},
// Matching Scope Names
Matching: {
Expand All @@ -590,7 +597,9 @@ var ScopeDefs = map[ServiceIdx]map[int]scopeDefinition{
},
// Worker Scope Names
Worker: {
ReplicatorScope: {operation: "Replicator"},
ReplicatorScope: {operation: "Replicator"},
DomainReplicationTaskScope: {operation: "DomainReplicationTask"},
HistoryReplicationTaskScope: {operation: "HistoryReplicationTask"},
},
}

Expand All @@ -609,6 +618,7 @@ const (
CadenceErrQueryFailedCounter
CadenceErrLimitExceededCounter
CadenceErrContextTimeoutCounter
CadenceErrRetryTaskCounter
PersistenceRequests
PersistenceFailures
PersistenceLatency
Expand Down Expand Up @@ -672,6 +682,9 @@ const (
HistoryEventNotificationFanoutLatency
HistoryEventNotificationInFlightMessageGauge
HistoryEventNotificationFailDeliveryCount
StaleReplicationEventsCounter
BufferedReplicationTaskCounter
HistoryConflictsCounter
)

// Matching metrics enum
Expand Down Expand Up @@ -710,6 +723,7 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
CadenceErrQueryFailedCounter: {metricName: "cadence.errors.query-failed", metricType: Counter},
CadenceErrLimitExceededCounter: {metricName: "cadence.errors.limit-exceeded", metricType: Counter},
CadenceErrContextTimeoutCounter: {metricName: "cadence.errors.context-timeout", metricType: Counter},
CadenceErrRetryTaskCounter: {metricName: "cadence.errors.retry-task", metricType: Counter},
PersistenceRequests: {metricName: "persistence.requests", metricType: Counter},
PersistenceFailures: {metricName: "persistence.errors", metricType: Counter},
PersistenceLatency: {metricName: "persistence.latency", metricType: Timer},
Expand Down Expand Up @@ -768,6 +782,9 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
HistoryEventNotificationFanoutLatency: {metricName: "history-event-notification-fanout-latency", metricType: Timer},
HistoryEventNotificationInFlightMessageGauge: {metricName: "history-event-notification-inflight-message-gauge", metricType: Gauge},
HistoryEventNotificationFailDeliveryCount: {metricName: "history-event-notification-fail-delivery-count", metricType: Counter},
StaleReplicationEventsCounter: {metricName: "stale-replication-events", metricType: Counter},
BufferedReplicationTaskCounter: {metricName: "buffered-replication-tasks", metricType: Counter},
HistoryConflictsCounter: {metricName: "history-conflicts", metricType: Counter},
},
Matching: {
PollSuccessCounter: {metricName: "poll.success"},
Expand Down
1 change: 1 addition & 0 deletions idl/github.com/uber/cadence/history.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ service HistoryService {
3: shared.EntityNotExistsError entityNotExistError,
4: ShardOwnershipLostError shardOwnershipLostError,
5: shared.LimitExceededError limitExceededError,
6: shared.RetryTaskError retryTaskError,
)

/**
Expand Down
4 changes: 4 additions & 0 deletions idl/github.com/uber/cadence/shared.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ exception AccessDeniedError {
1: required string message
}

exception RetryTaskError {
1: required string message
}

enum WorkflowIdReusePolicy {
/*
* allow start a workflow execution using the same workflow ID,
Expand Down
2 changes: 2 additions & 0 deletions service/history/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,8 @@ func (h *Handler) updateErrorMetric(scope int, err error) {
h.metricsClient.IncCounter(scope, metrics.CadenceErrCancellationAlreadyRequestedCounter)
case *gen.LimitExceededError:
h.metricsClient.IncCounter(scope, metrics.CadenceErrLimitExceededCounter)
case *gen.RetryTaskError:
h.metricsClient.IncCounter(scope, metrics.CadenceErrRetryTaskCounter)
case *yarpcerrors.Status:
if err.Code() == yarpcerrors.CodeDeadlineExceeded {
h.metricsClient.IncCounter(scope, metrics.CadenceErrContextTimeoutCounter)
Expand Down
Loading