diff --git a/api/BUILD b/api/BUILD
index 7099a7abee5d..7ae40630173c 100644
--- a/api/BUILD
+++ b/api/BUILD
@@ -181,6 +181,7 @@ proto_library(
         "//envoy/extensions/filters/http/oauth2/v3:pkg",
         "//envoy/extensions/filters/http/on_demand/v3:pkg",
         "//envoy/extensions/filters/http/original_src/v3:pkg",
+        "//envoy/extensions/filters/http/rate_limit_quota/v3:pkg",
         "//envoy/extensions/filters/http/ratelimit/v3:pkg",
         "//envoy/extensions/filters/http/rbac/v3:pkg",
         "//envoy/extensions/filters/http/router/v3:pkg",
@@ -283,6 +284,7 @@ proto_library(
         "//envoy/service/listener/v3:pkg",
         "//envoy/service/load_stats/v3:pkg",
         "//envoy/service/metrics/v3:pkg",
+        "//envoy/service/rate_limit_quota/v3:pkg",
         "//envoy/service/ratelimit/v3:pkg",
         "//envoy/service/route/v3:pkg",
         "//envoy/service/runtime/v3:pkg",
diff --git a/api/envoy/extensions/filters/http/rate_limit_quota/v3/BUILD b/api/envoy/extensions/filters/http/rate_limit_quota/v3/BUILD
new file mode 100644
index 000000000000..39b7d6bb45d1
--- /dev/null
+++ b/api/envoy/extensions/filters/http/rate_limit_quota/v3/BUILD
@@ -0,0 +1,15 @@
+# DO NOT EDIT. This file is generated by tools/proto_format/proto_sync.py.
+
+load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package")
+
+licenses(["notice"])  # Apache 2
+
+api_proto_package(
+    deps = [
+        "//envoy/config/core/v3:pkg",
+        "//envoy/type/v3:pkg",
+        "@com_github_cncf_udpa//udpa/annotations:pkg",
+        "@com_github_cncf_udpa//xds/annotations/v3:pkg",
+        "@com_github_cncf_udpa//xds/type/matcher/v3:pkg",
+    ],
+)
diff --git a/api/envoy/extensions/filters/http/rate_limit_quota/v3/rate_limit_quota.proto b/api/envoy/extensions/filters/http/rate_limit_quota/v3/rate_limit_quota.proto
new file mode 100644
index 000000000000..11ed9e120115
--- /dev/null
+++ b/api/envoy/extensions/filters/http/rate_limit_quota/v3/rate_limit_quota.proto
@@ -0,0 +1,418 @@
+syntax = "proto3";
+
+package envoy.extensions.filters.http.rate_limit_quota.v3;
+
+import "envoy/config/core/v3/base.proto";
+import "envoy/config/core/v3/extension.proto";
+import "envoy/config/core/v3/grpc_service.proto";
+import "envoy/type/v3/http_status.proto";
+import "envoy/type/v3/ratelimit_strategy.proto";
+
+import "google/protobuf/duration.proto";
+import "google/protobuf/wrappers.proto";
+import "google/rpc/status.proto";
+
+import "xds/annotations/v3/status.proto";
+import "xds/type/matcher/v3/matcher.proto";
+
+import "udpa/annotations/status.proto";
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.extensions.filters.http.rate_limit_quota.v3";
+option java_outer_classname = "RateLimitQuotaProto";
+option java_multiple_files = true;
+option go_package = "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/http/rate_limit_quota/v3;rate_limit_quotav3";
+option (udpa.annotations.file_status).package_version_status = ACTIVE;
+option (xds.annotations.v3.file_status).work_in_progress = true;
+
+// [#protodoc-title: Rate Limit Quota]
+// Rate Limit Quota :ref:`configuration overview <config_http_filters_rate_limit_quota>`.
+// [#comment:TODO(sergiitk): extension envoy.filters.http.rate_limit_quota]
+
+// Configures the Rate Limit Quota filter.
+//
+// Can be overridden in the per-route and per-host configurations.
+// The more specific definition completely overrides the less specific definition.
+// [#next-free-field: 7]
+message RateLimitQuotaFilterConfig {
+  // Configures the gRPC Rate Limit Quota Service (RLQS) RateLimitQuotaService.
+  config.core.v3.GrpcService rlqs_server = 1 [(validate.rules).message = {required: true}];
+
+  // The application domain to use when calling the service. This enables sharing the quota
+  // server between different applications without fear of overlap.
+  // E.g., "envoy".
+  string domain = 2 [(validate.rules).string = {min_len: 1}];
+
+  // The match tree to use for grouping incoming requests into buckets.
+  //
+  // Example:
+  //
+  // .. validated-code-block:: yaml
+  //   :type-name: xds.type.matcher.v3.Matcher
+  //
+  //   matcher_list:
+  //     matchers:
+  //     # Assign requests with header['env'] set to 'staging' to the bucket { name: 'staging' }
+  //     - predicate:
+  //         single_predicate:
+  //           input:
+  //             typed_config:
+  //               '@type': type.googleapis.com/envoy.type.matcher.v3.HttpRequestHeaderMatchInput
+  //               header_name: env
+  //           value_match:
+  //             exact: staging
+  //       on_match:
+  //         action:
+  //           typed_config:
+  //             '@type': type.googleapis.com/envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings
+  //             bucket_id_builder:
+  //               bucket_id_builder:
+  //                 name:
+  //                   string_value: staging
+  //
+  //     # Assign requests with header['user_group'] set to 'admin' to the bucket { acl: 'admin_users' }
+  //     - predicate:
+  //         single_predicate:
+  //           input:
+  //             typed_config:
+  //               '@type': type.googleapis.com/xds.type.matcher.v3.HttpAttributesCelMatchInput
+  //           custom_match:
+  //             typed_config:
+  //               '@type': type.googleapis.com/xds.type.matcher.v3.CelMatcher
+  //               expr_match:
+  //                 # Shortened for illustration purposes. Here should be parsed CEL expression:
+  //                 # request.headers['user_group'] == 'admin'
+  //                 parsed_expr: {}
+  //       on_match:
+  //         action:
+  //           typed_config:
+  //             '@type': type.googleapis.com/envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings
+  //             bucket_id_builder:
+  //               bucket_id_builder:
+  //                 acl:
+  //                   string_value: admin_users
+  //
+  //   # Catch-all clause for the requests not matched by any of the matchers.
+  //   # In this example, deny all requests.
+  //   on_no_match:
+  //     action:
+  //       typed_config:
+  //         '@type': type.googleapis.com/envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings
+  //         no_assignment_behavior:
+  //           fallback_rate_limit:
+  //             blanket_rule: DENY_ALL
+  //
+  // .. attention::
+  //  The first matched group wins. Once the request is matched into a bucket, matcher
+  //  evaluation ends.
+  //
+  // Use ``on_no_match`` field to assign the catch-all bucket. If a request is not matched
+  // into any bucket, and there's no  ``on_no_match`` field configured, the request will be
+  // ALLOWED by default. It will NOT be reported to the RLQS server.
+  //
+  // Refer to :ref:`Unified Matcher API <envoy_v3_api_msg_.xds.type.matcher.v3.Matcher>`
+  // documentation for more information on the matcher trees.
+  xds.type.matcher.v3.Matcher bucket_matchers = 3 [(validate.rules).message = {required: true}];
+
+  // If set, this will enable -- but not necessarily enforce -- the rate limit for the given
+  // fraction of requests.
+  //
+  // Defaults to 100% of requests.
+  config.core.v3.RuntimeFractionalPercent filter_enabled = 4;
+
+  // If set, this will enforce the rate limit decisions for the given fraction of requests.
+  // For requests that are not enforced the filter will still obtain the quota and include it
+  // in the load computation, however the request will always be allowed regardless of the outcome
+  // of quota application. This allows validation or testing of the rate limiting service
+  // infrastructure without disrupting existing traffic.
+  //
+  // Note: this only applies to the fraction of enabled requests.
+  //
+  // Defaults to 100% of requests.
+  config.core.v3.RuntimeFractionalPercent filter_enforced = 5;
+
+  // Specifies a list of HTTP headers that should be added to each request that
+  // has been rate limited and is also forwarded upstream. This can only occur when the
+  // filter is enabled but not enforced.
+  repeated config.core.v3.HeaderValueOption request_headers_to_add_when_not_enforced = 6
+      [(validate.rules).repeated = {max_items: 10}];
+}
+
+// Per-route and per-host configuration overrides. The more specific definition completely
+// overrides the less specific definition.
+message RateLimitQuotaOverride {
+  // The application domain to use when calling the service. This enables sharing the quota
+  // server between different applications without fear of overlap.
+  // E.g., "envoy".
+  //
+  // If empty, inherits the value from the less specific definition.
+  string domain = 1;
+
+  // The match tree to use for grouping incoming requests into buckets.
+  //
+  // If set, fully overrides the bucket matchers provided on the less specific definition.
+  // If not set, inherits the value from the less specific definition.
+  //
+  // See usage example: :ref:`RateLimitQuotaFilterConfig.bucket_matchers
+  // <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaFilterConfig.bucket_matchers>`.
+  xds.type.matcher.v3.Matcher bucket_matchers = 2;
+}
+
+// Rate Limit Quota Bucket Settings to apply on the successful ``bucket_matchers`` match.
+//
+// Specify this message in the :ref:`Matcher.OnMatch.action
+// <envoy_v3_api_field_.xds.type.matcher.v3.Matcher.OnMatch.action>` field of the
+// ``bucket_matchers`` matcher tree to assign the matched requests to the Quota Bucket.
+// Usage example: :ref:`RateLimitQuotaFilterConfig.bucket_matchers
+// <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaFilterConfig.bucket_matchers>`.
+// [#next-free-field: 6]
+message RateLimitQuotaBucketSettings {
+  // Configures the behavior after the first request has been matched to the bucket, and before the
+  // the RLQS server returns the first quota assignment.
+  message NoAssignmentBehavior {
+    oneof no_assignment_behavior {
+      option (validate.required) = true;
+
+      // Apply pre-configured rate limiting strategy until the server sends the first assignment.
+      type.v3.RateLimitStrategy fallback_rate_limit = 1;
+    }
+  }
+
+  // Specifies the behavior when the bucket's assignment has expired, and cannot be refreshed for
+  // any reason.
+  message ExpiredAssignmentBehavior {
+    // Reuse the last known quota assignment, effectively extending it for the duration
+    // specified in the :ref:`expired_assignment_behavior_timeout
+    // <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior.expired_assignment_behavior_timeout>`
+    // field.
+    message ReuseLastAssignment {
+    }
+
+    // Limit the time :ref:`ExpiredAssignmentBehavior
+    // <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior>`
+    // is applied. If the server doesn't respond within this duration:
+    //
+    // 1. Selected ``ExpiredAssignmentBehavior`` is no longer applied.
+    // 2. The bucket is abandoned. The process of abandoning the bucket is described in the
+    //    :ref:`AbandonAction <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`
+    //    message.
+    // 3. If a new request is matched into the bucket that has become abandoned,
+    //    the data plane restarts the subscription to the bucket. The process of restarting the
+    //    subscription is described in the :ref:`AbandonAction
+    //    <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`
+    //    message.
+    //
+    // If the field is not set, the ``ExpiredAssignmentBehavior`` time is **not limited**:
+    // it applies to the bucket until replaced by an *active* assignment.
+    google.protobuf.Duration expired_assignment_behavior_timeout = 1
+        [(validate.rules).duration = {gt {}}];
+
+    oneof expired_assignment_behavior {
+      option (validate.required) = true;
+
+      // Apply the rate limiting strategy to all requests matched into the bucket until the RLQS
+      // server sends a new assignment, or the :ref:`expired_assignment_behavior_timeout
+      // <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior.expired_assignment_behavior_timeout>`
+      // runs out.
+      type.v3.RateLimitStrategy fallback_rate_limit = 2;
+
+      // Reuse the last *active* assignment until the RLQS server sends a new assignment, or the
+      // :ref:`expired_assignment_behavior_timeout
+      // <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior.expired_assignment_behavior_timeout>`
+      // runs out.
+      ReuseLastAssignment reuse_last_assignment = 3;
+    }
+  }
+
+  // Customize the deny response to the requests over the rate limit.
+  message DenyResponseSettings {
+    // HTTP response code to deny for HTTP requests (gRPC excluded).
+    // Defaults to 429 (:ref:`StatusCode.TooManyRequests<envoy_v3_api_enum_value_type.v3.StatusCode.TooManyRequests>`).
+    type.v3.HttpStatus http_status = 1;
+
+    // HTTP response body used to deny for HTTP requests (gRPC excluded).
+    // If not set, an empty body is returned.
+    google.protobuf.BytesValue http_body = 2;
+
+    // Configure the deny response for gRPC requests over the rate limit.
+    // Allows to specify the `RPC status code
+    // <https://cloud.google.com/natural-language/docs/reference/rpc/google.rpc#google.rpc.Code>`_,
+    // and the error message.
+    // Defaults to the Status with the RPC Code ``UNAVAILABLE`` and empty message.
+    //
+    // To identify gRPC requests, Envoy checks that the ``Content-Type`` header is
+    // ``application/grpc``, or one of the various ``application/grpc+`` values.
+    //
+    // .. note::
+    //   The HTTP code for a gRPC response is always 200.
+    google.rpc.Status grpc_status = 3;
+
+    // Specifies a list of HTTP headers that should be added to each response for requests that
+    // have been rate limited. Applies both to plain HTTP, and gRPC requests.
+    // The headers are added even when the rate limit quota was not enforced.
+    repeated config.core.v3.HeaderValueOption response_headers_to_add = 4
+        [(validate.rules).repeated = {max_items: 10}];
+  }
+
+  // ``BucketIdBuilder`` makes it possible to build :ref:`BucketId
+  // <envoy_v3_api_msg_service.rate_limit_quota.v3.BucketId>` with values substituted
+  // from the dynamic properties associated with each individual request. See usage examples in
+  // the docs to :ref:`bucket_id_builder
+  // <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.bucket_id_builder>`
+  // field.
+  message BucketIdBuilder {
+    // Produces the value of the :ref:`BucketId
+    // <envoy_v3_api_msg_service.rate_limit_quota.v3.BucketId>` map.
+    message ValueBuilder {
+      oneof value_specifier {
+        option (validate.required) = true;
+
+        // Static string value — becomes the value in the :ref:`BucketId
+        // <envoy_v3_api_msg_service.rate_limit_quota.v3.BucketId>` map as is.
+        string string_value = 1;
+
+        // Dynamic value — evaluated for each request. Must produce a string output, which becomes
+        // the value in the :ref:`BucketId <envoy_v3_api_msg_service.rate_limit_quota.v3.BucketId>`
+        // map. For example, extensions with the ``envoy.matching.http.input`` category can be used.
+        config.core.v3.TypedExtensionConfig custom_value = 2;
+      }
+    }
+
+    // The map translated into the ``BucketId`` map.
+    //
+    // The ``string key`` of this map and becomes the key of ``BucketId`` map as is.
+    //
+    // The ``ValueBuilder value`` for the key can be:
+    //
+    // * static ``StringValue string_value`` — becomes the value in the ``BucketId`` map as is.
+    // * dynamic ``TypedExtensionConfig custom_value`` — evaluated for each request. Must produce
+    //   a string output, which becomes the value in the the ``BucketId`` map.
+    //
+    // See usage examples in the docs to :ref:`bucket_id_builder
+    // <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.bucket_id_builder>`
+    // field.
+    map<string, ValueBuilder> bucket_id_builder = 1 [(validate.rules).map = {min_pairs: 1}];
+  }
+
+  // ``BucketId`` builder.
+  //
+  // :ref:`BucketId <envoy_v3_api_msg_service.rate_limit_quota.v3.BucketId>` is a map from
+  // the string key to the string value which serves as bucket identifier common for on
+  // the control plane and the data plane.
+  //
+  // While ``BucketId`` is always static, ``BucketIdBuilder`` allows to populate map values
+  // with the dynamic properties associated with the each individual request.
+  //
+  // Example 1: static fields only
+  //
+  // ``BucketIdBuilder``:
+  //
+  // .. validated-code-block:: yaml
+  //   :type-name: envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.BucketIdBuilder
+  //
+  //   bucket_id_builder:
+  //     name:
+  //       string_value: my_bucket
+  //     hello:
+  //       string_value: world
+  //
+  // Produces the following ``BucketId`` for all requests:
+  //
+  // .. validated-code-block:: yaml
+  //   :type-name: envoy.service.rate_limit_quota.v3.BucketId
+  //
+  //   bucket:
+  //     name: my_bucket
+  //     hello: world
+  //
+  // Example 2: static and dynamic fields
+  //
+  // .. validated-code-block:: yaml
+  //   :type-name: envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.BucketIdBuilder
+  //
+  //   bucket_id_builder:
+  //     name:
+  //       string_value: my_bucket
+  //     env:
+  //       custom_value:
+  //         typed_config:
+  //           '@type': type.googleapis.com/envoy.type.matcher.v3.HttpRequestHeaderMatchInput
+  //           header_name: environment
+  //
+  // In this example, the value of ``BucketId`` key ``env`` is substituted from the ``environment``
+  // request header.
+  //
+  // This is equivalent to the following *pseudo-code*:
+  //
+  // .. code-block:: yaml
+  //
+  //    name: 'my_bucket'
+  //    env: $header['environment']
+  //
+  // For example, the request with the HTTP header ``env`` set to ``staging`` will produce
+  // the following ``BucketId``:
+  //
+  // .. validated-code-block:: yaml
+  //   :type-name: envoy.service.rate_limit_quota.v3.BucketId
+  //
+  //   bucket:
+  //     name: my_bucket
+  //     env: staging
+  //
+  // For the request with the HTTP header ``environment`` set to ``prod``, will produce:
+  //
+  // .. validated-code-block:: yaml
+  //   :type-name: envoy.service.rate_limit_quota.v3.BucketId
+  //
+  //   bucket:
+  //     name: my_bucket
+  //     env: prod
+  //
+  // .. note::
+  //   The order of ``BucketId`` keys do not matter. Buckets ``{ a: 'A', b: 'B' }`` and
+  //   ``{ b: 'B', a: 'A' }`` are identical.
+  //
+  // If not set, requests will NOT be reported to the server, and will always limited
+  // according to :ref:`no_assignment_behavior
+  // <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.no_assignment_behavior>`
+  // configuration.
+  BucketIdBuilder bucket_id_builder = 1;
+
+  // The interval at which the data plane (RLQS client) is to report quota usage for this bucket.
+  //
+  // When the first request is matched to a bucket with no assignment, the data plane is to report
+  // the request immediately in the :ref:`RateLimitQuotaUsageReports
+  // <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaUsageReports>` message.
+  // For the RLQS server, this signals that the data plane is now subscribed to
+  // the quota assignments in this bucket, and will start sending the assignment as described in
+  // the :ref:`RLQS documentation <envoy_v3_api_file_envoy/service/rate_limit_quota/v3/rlqs.proto>`.
+  //
+  // After sending the initial report, the data plane is to continue reporting the bucket usage with
+  // the internal specified in this field.
+  // [#comment: 100000000 nanoseconds = 0.1 seconds]
+  google.protobuf.Duration reporting_interval = 2 [(validate.rules).duration = {
+    required: true
+    gt {nanos: 100000000}
+  }];
+
+  // Customize the deny response to the requests over the rate limit.
+  // If not set, the filter will be configured as if an empty message is set,
+  // and will behave according to the defaults specified in :ref:`DenyResponseSettings
+  // <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.DenyResponseSettings>`.
+  DenyResponseSettings deny_response_settings = 3;
+
+  // Configures the behavior in the "no assignment" state: after the first request has been
+  // matched to the bucket, and before the the RLQS server returns the first quota assignment.
+  //
+  // If not set, the default behavior is to allow all requests.
+  NoAssignmentBehavior no_assignment_behavior = 4;
+
+  // Configures the behavior in the "expired assignment" state: the bucket's assignment has expired,
+  // and cannot be refreshed.
+  //
+  // If not set, the bucket is abandoned when its *active* assignment expires.
+  // The process of abandoning the bucket, and restarting the subscription is described in the
+  // :ref:`AbandonAction <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`
+  // message.
+  ExpiredAssignmentBehavior expired_assignment_behavior = 5;
+}
diff --git a/api/envoy/service/rate_limit_quota/v3/BUILD b/api/envoy/service/rate_limit_quota/v3/BUILD
new file mode 100644
index 000000000000..1ed447c6f271
--- /dev/null
+++ b/api/envoy/service/rate_limit_quota/v3/BUILD
@@ -0,0 +1,14 @@
+# DO NOT EDIT. This file is generated by tools/proto_format/proto_sync.py.
+
+load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package")
+
+licenses(["notice"])  # Apache 2
+
+api_proto_package(
+    has_services = True,
+    deps = [
+        "//envoy/type/v3:pkg",
+        "@com_github_cncf_udpa//udpa/annotations:pkg",
+        "@com_github_cncf_udpa//xds/annotations/v3:pkg",
+    ],
+)
diff --git a/api/envoy/service/rate_limit_quota/v3/rlqs.proto b/api/envoy/service/rate_limit_quota/v3/rlqs.proto
new file mode 100644
index 000000000000..7f10870b6289
--- /dev/null
+++ b/api/envoy/service/rate_limit_quota/v3/rlqs.proto
@@ -0,0 +1,250 @@
+syntax = "proto3";
+
+package envoy.service.rate_limit_quota.v3;
+
+import "envoy/type/v3/ratelimit_strategy.proto";
+
+import "google/protobuf/duration.proto";
+
+import "xds/annotations/v3/status.proto";
+
+import "udpa/annotations/status.proto";
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.service.rate_limit_quota.v3";
+option java_outer_classname = "RlqsProto";
+option java_multiple_files = true;
+option go_package = "github.com/envoyproxy/go-control-plane/envoy/service/rate_limit_quota/v3;rate_limit_quotav3";
+option java_generic_services = true;
+option (udpa.annotations.file_status).package_version_status = ACTIVE;
+option (xds.annotations.v3.file_status).work_in_progress = true;
+
+// [#protodoc-title: Rate Limit Quota Service (RLQS)]
+
+// The Rate Limit Quota Service (RLQS) is a Envoy global rate limiting service that allows to
+// delegate rate limit decisions to a remote service. The service will aggregate the usage reports
+// from multiple data plane instances, and distribute Rate Limit Assignments to each instance
+// based on its business logic. The logic is outside of the scope of the protocol API.
+//
+// The protocol is designed as a streaming-first API. It utilizes watch-like subscription model.
+// The data plane groups requests into Quota Buckets as directed by the filter config,
+// and periodically reports them to the RLQS server along with the Bucket identifier, :ref:`BucketId
+// <envoy_v3_api_msg_service.rate_limit_quota.v3.BucketId>`. Once RLQS server has collected enough
+// reports to make a decision, it'll send back the assignment with the rate limiting instructions.
+//
+// The first report sent by the data plane is interpreted by the RLQS server as a "watch" request,
+// indicating that the data plane instance is interested in receiving further updates for the
+// ``BucketId``. From then on, RLQS server may push assignments to this instance at will, even if
+// the instance is not sending usage reports. It's the responsibility of the RLQS server
+// to determine when the data plane instance didn't send ``BucketId`` reports for too long,
+// and to respond with the :ref:`AbandonAction
+// <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`,
+// indicating that the server has now stopped sending quota assignments for the ``BucketId`` bucket,
+// and the data plane instance should :ref:`abandon
+// <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.abandon_action>`
+// it.
+//
+// Refer to Rate Limit Quota :ref:`configuration overview <config_http_filters_rate_limit_quota>`
+// for further details.
+
+// Defines the Rate Limit Quota Service (RLQS).
+service RateLimitQuotaService {
+  // Main communication channel: the data plane sends usage reports to the RLQS server,
+  // and the server asynchronously responding with the assignments.
+  rpc StreamRateLimitQuotas(stream RateLimitQuotaUsageReports)
+      returns (stream RateLimitQuotaResponse) {
+  }
+}
+
+message RateLimitQuotaUsageReports {
+  // The usage report for a bucket.
+  //
+  // .. note::
+  //   Note that the first report sent for a ``BucketId`` indicates to the RLQS server that
+  //   the RLQS client is subscribing for the future assignments for this ``BucketId``.
+  message BucketQuotaUsage {
+    // ``BucketId`` for which request quota usage is reported.
+    BucketId bucket_id = 1 [(validate.rules).message = {required: true}];
+
+    // Time elapsed since the last report.
+    google.protobuf.Duration time_elapsed = 2 [(validate.rules).duration = {
+      required: true
+      gt {}
+    }];
+
+    // Requests the data plane has allowed through.
+    uint64 num_requests_allowed = 3;
+
+    // Requests throttled.
+    uint64 num_requests_denied = 4;
+  }
+
+  // All quota requests must specify the domain. This enables sharing the quota
+  // server between different applications without fear of overlap.
+  // E.g., "envoy".
+  //
+  // Should only be provided in the first report, all subsequent messages on the same
+  // stream are considered to be in the same domain. In case the domain needs to be
+  // changes, close the stream, and reopen a new one with the different domain.
+  string domain = 1 [(validate.rules).string = {min_len: 1}];
+
+  // A list of quota usage reports. The list is processed by the RLQS server in the same order
+  // it's provided by the client.
+  repeated BucketQuotaUsage bucket_quota_usages = 2 [(validate.rules).repeated = {min_items: 1}];
+}
+
+message RateLimitQuotaResponse {
+  // Commands the data plane to apply one of the actions to the bucket with the
+  // :ref:`bucket_id <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.bucket_id>`.
+  message BucketAction {
+    // Quota assignment for the bucket. Configures the rate limiting strategy and the duration
+    // for the given :ref:`bucket_id
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.bucket_id>`.
+    //
+    // **Applying the first assignment to the bucket**
+    //
+    // Once the data plane receives the ``QuotaAssignmentAction``, it must send the current usage
+    // report for the bucket, and start rate limiting requests matched into the bucket
+    // using the strategy configured in the :ref:`rate_limit_strategy
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction.rate_limit_strategy>`
+    // field. The assignment becomes bucket's *active* assignment.
+    //
+    // **Expiring the assignment**
+    //
+    // The duration of the assignment defined in the :ref:`assignment_time_to_live
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction.assignment_time_to_live>`
+    // field. When the duration runs off, the assignment is *expired*, and no longer *active*.
+    // The data plane should stop applying the rate limiting strategy to the bucket, and transition
+    // the bucket to the "expired assignment" state. This activates the behavior configured in the
+    // :ref:`expired_assignment_behavior <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.expired_assignment_behavior>`
+    // field.
+    //
+    // **Replacing the assignment**
+    //
+    // * If the rate limiting strategy is different from bucket's *active* assignment, or
+    //   the current bucket assignment is *expired*, the data plane must immediately
+    //   end the current assignment, report the bucket usage, and apply the new assignment.
+    //   The new assignment becomes bucket's *active* assignment.
+    // * If the rate limiting strategy is the same as the bucket's *active* (not *expired*)
+    //   assignment, the data plane should extend the duration of the *active* assignment
+    //   for the duration of the new assignment provided in the :ref:`assignment_time_to_live
+    //   <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction.assignment_time_to_live>`
+    //   field. The *active* assignment is considered unchanged.
+    message QuotaAssignmentAction {
+      // A duration after which the assignment is be considered *expired*. The process of the
+      // expiration is described :ref:`above
+      // <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`.
+      //
+      // * If unset, the assignment has no expiration date.
+      // * If set to ``0``, the assignment expires immediately, forcing the client into the
+      //   :ref:`"expired assignment"
+      //   <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior.expired_assignment_behavior_timeout>`
+      //   state. This may be used by the RLQS server in cases when it needs clients to proactively
+      //   fall back to the pre-configured :ref:`ExpiredAssignmentBehavior
+      //   <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.ExpiredAssignmentBehavior>`,
+      //   f.e. before the server going into restart.
+      //
+      // .. attention::
+      //   Note that :ref:`expiring
+      //   <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`
+      //   the assignment is not the same as :ref:`abandoning
+      //   <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`
+      //   the assignment. While expiring the assignment just transitions the bucket to
+      //   the "expired assignment" state; abandoning the assignment completely erases
+      //   the bucket from the data plane memory, and stops the usage reports.
+      google.protobuf.Duration assignment_time_to_live = 2 [(validate.rules).duration = {gte {}}];
+
+      // Configures the local rate limiter for the request matched to the bucket.
+      //
+      // If not set, allow all requests.
+      type.v3.RateLimitStrategy rate_limit_strategy = 3;
+    }
+
+    // Abandon action for the bucket. Indicates that the RLQS server will no longer be
+    // sending updates for the given :ref:`bucket_id
+    // <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.bucket_id>`.
+    //
+    // If no requests are reported for a bucket, after some time the server considers the bucket
+    // inactive. The server stops tracking the bucket, and instructs the the data plane to abandon
+    // the bucket via this message.
+    //
+    // **Abandoning the assignment**
+    //
+    // The data plane is to erase the bucket (including its usage data) from the memory.
+    // It should stop tracking the bucket, and stop reporting its usage. This effectively resets
+    // the data plane to the state prior to matching the first request into the bucket.
+    //
+    // **Restarting the subscription**
+    //
+    // If a new request is matched into a bucket previously abandoned, the data plane must behave
+    // as if it has never tracked the bucket, and it's the first request matched into it:
+    //
+    // 1. The process of :ref:`subscription and reporting
+    //    <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.reporting_interval>`
+    //    starts from the beginning.
+    // 2. The bucket transitions to the :ref:`"no assignment"
+    //    <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.no_assignment_behavior>`
+    //    state.
+    // 3. Once the new assignment is received, it's applied per
+    //    "Applying the first assignment to the bucket" section of the :ref:`QuotaAssignmentAction
+    //    <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`.
+    message AbandonAction {
+    }
+
+    // ``BucketId`` for which request the action is applied.
+    BucketId bucket_id = 1 [(validate.rules).message = {required: true}];
+
+    oneof bucket_action {
+      option (validate.required) = true;
+
+      // Apply the quota assignment to the bucket.
+      //
+      // Commands the data plane to apply a rate limiting strategy to the bucket.
+      // The process of applying and expiring the rate limiting strategy is detailed in the
+      // :ref:`QuotaAssignmentAction
+      // <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction>`
+      // message.
+      QuotaAssignmentAction quota_assignment_action = 2;
+
+      // Abandon the bucket.
+      //
+      // Commands the data plane to abandon the bucket.
+      // The process of abandoning the bucket is described in the :ref:`AbandonAction
+      // <envoy_v3_api_msg_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.AbandonAction>`
+      // message.
+      AbandonAction abandon_action = 3;
+    }
+  }
+
+  // An ordered list of actions to be applied to the buckets. The actions are applied in the
+  // given order, from top to bottom.
+  repeated BucketAction bucket_action = 1 [(validate.rules).repeated = {min_items: 1}];
+}
+
+// The identifier for the bucket. Used to match the bucket between the control plane (RLQS server),
+// and the data plane (RLQS client), f.e.:
+//
+// * the data plane sends a usage report for requests matched into the bucket with ``BucketId``
+//   to the control plane
+// * the control plane sends an assignment for the bucket with ``BucketId`` to the data plane
+//   Bucket ID.
+//
+// Example:
+//
+// .. validated-code-block:: yaml
+//   :type-name: envoy.service.rate_limit_quota.v3.BucketId
+//
+//   bucket:
+//     name: my_bucket
+//     env: staging
+//
+// .. note::
+//   The order of ``BucketId`` keys do not matter. Buckets ``{ a: 'A', b: 'B' }`` and
+//   ``{ b: 'B', a: 'A' }`` are identical.
+message BucketId {
+  map<string, string> bucket = 1 [(validate.rules).map = {
+    min_pairs: 1
+    keys {string {min_len: 1}}
+    values {string {min_len: 1}}
+  }];
+}
diff --git a/api/envoy/type/v3/BUILD b/api/envoy/type/v3/BUILD
index ee92fb652582..ec1e778e06e5 100644
--- a/api/envoy/type/v3/BUILD
+++ b/api/envoy/type/v3/BUILD
@@ -5,5 +5,8 @@ load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package")
 licenses(["notice"])  # Apache 2
 
 api_proto_package(
-    deps = ["@com_github_cncf_udpa//udpa/annotations:pkg"],
+    deps = [
+        "@com_github_cncf_udpa//udpa/annotations:pkg",
+        "@com_github_cncf_udpa//xds/annotations/v3:pkg",
+    ],
 )
diff --git a/api/envoy/type/v3/ratelimit_strategy.proto b/api/envoy/type/v3/ratelimit_strategy.proto
new file mode 100644
index 000000000000..a86da55b8543
--- /dev/null
+++ b/api/envoy/type/v3/ratelimit_strategy.proto
@@ -0,0 +1,79 @@
+syntax = "proto3";
+
+package envoy.type.v3;
+
+import "envoy/type/v3/ratelimit_unit.proto";
+import "envoy/type/v3/token_bucket.proto";
+
+import "xds/annotations/v3/status.proto";
+
+import "udpa/annotations/status.proto";
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.type.v3";
+option java_outer_classname = "RatelimitStrategyProto";
+option java_multiple_files = true;
+option go_package = "github.com/envoyproxy/go-control-plane/envoy/type/v3;typev3";
+option (udpa.annotations.file_status).package_version_status = ACTIVE;
+option (xds.annotations.v3.file_status).work_in_progress = true;
+
+// [#protodoc-title: Rate Limit Strategies]
+
+message RateLimitStrategy {
+  // Choose between allow all and deny all.
+  enum BlanketRule {
+    ALLOW_ALL = 0;
+    DENY_ALL = 1;
+  }
+
+  // Best-effort limit of the number of requests per time unit.
+  //
+  // Allows to specify the desired requests per second (RPS, QPS), requests per minute (QPM, RPM),
+  // etc., without specifying a rate limiting algorithm implementation.
+  //
+  // ``RequestsPerTimeUnit`` strategy does not demand any specific rate limiting algorithm to be
+  // used (in contrast to the :ref:`TokenBucket <envoy_v3_api_msg_type.v3.TokenBucket>`,
+  // for example). It implies that the implementation details of rate limiting algorithm are
+  // irrelevant as long as the configured number of "requests per time unit" is achieved.
+  //
+  // Note that the ``TokenBucket`` is still a valid implementation of the ``RequestsPerTimeUnit``
+  // strategy, and may be chosen to enforce the rate limit. However, there's no guarantee it will be
+  // the ``TokenBucket`` in particular, and not the Leaky Bucket, the Sliding Window, or any other
+  // rate limiting algorithm that fulfills the requirements.
+  message RequestsPerTimeUnit {
+    // The desired number of requests per :ref:`time_unit
+    // <envoy_v3_api_field_type.v3.RateLimitStrategy.RequestsPerTimeUnit.time_unit>` to allow.
+    // If set to ``0``, deny all (equivalent to ``BlanketRule.DENY_ALL``).
+    //
+    // .. note::
+    //   Note that the algorithm implementation determines the course of action for the requests
+    //   over the limit. As long as the ``requests_per_time_unit`` converges on the desired value,
+    //   it's allowed to treat this field as a soft-limit: allow bursts, redistribute the allowance
+    //   over time, etc.
+    //
+    uint64 requests_per_time_unit = 1;
+
+    // The unit of time. Ignored when :ref:`requests_per_time_unit
+    // <envoy_v3_api_field_type.v3.RateLimitStrategy.RequestsPerTimeUnit.requests_per_time_unit>`
+    // is ``0`` (deny all).
+    RateLimitUnit time_unit = 2 [(validate.rules).enum = {defined_only: true}];
+  }
+
+  oneof strategy {
+    option (validate.required) = true;
+
+    // Allow or Deny the requests.
+    // If unset, allow all.
+    BlanketRule blanket_rule = 1 [(validate.rules).enum = {defined_only: true}];
+
+    // Best-effort limit of the number of requests per time unit, f.e. requests per second.
+    // Does not prescribe any specific rate limiting algorithm, see :ref:`RequestsPerTimeUnit
+    // <envoy_v3_api_msg_type.v3.RateLimitStrategy.RequestsPerTimeUnit>` for details.
+    RequestsPerTimeUnit requests_per_time_unit = 2;
+
+    // Limit the requests by consuming tokens from the Token Bucket.
+    // Allow the same number of requests as the number of tokens available in
+    // the token bucket.
+    TokenBucket token_bucket = 3;
+  }
+}
diff --git a/api/versioning/BUILD b/api/versioning/BUILD
index fd85c4e0c18f..da23a1e78e64 100644
--- a/api/versioning/BUILD
+++ b/api/versioning/BUILD
@@ -118,6 +118,7 @@ proto_library(
         "//envoy/extensions/filters/http/oauth2/v3:pkg",
         "//envoy/extensions/filters/http/on_demand/v3:pkg",
         "//envoy/extensions/filters/http/original_src/v3:pkg",
+        "//envoy/extensions/filters/http/rate_limit_quota/v3:pkg",
         "//envoy/extensions/filters/http/ratelimit/v3:pkg",
         "//envoy/extensions/filters/http/rbac/v3:pkg",
         "//envoy/extensions/filters/http/router/v3:pkg",
@@ -224,6 +225,7 @@ proto_library(
         "//envoy/service/listener/v3:pkg",
         "//envoy/service/load_stats/v3:pkg",
         "//envoy/service/metrics/v3:pkg",
+        "//envoy/service/rate_limit_quota/v3:pkg",
         "//envoy/service/ratelimit/v3:pkg",
         "//envoy/service/route/v3:pkg",
         "//envoy/service/runtime/v3:pkg",
diff --git a/docs/root/api-v3/service/service.rst b/docs/root/api-v3/service/service.rst
index a65686099df1..80e0693dc8e8 100644
--- a/docs/root/api-v3/service/service.rst
+++ b/docs/root/api-v3/service/service.rst
@@ -11,6 +11,7 @@ Services
   health/v3/*
   metrics/v3/*
   ratelimit/v3/*
+  rate_limit_quota/v3/*
   runtime/v3/*
   status/v3/*
   tap/v3/*
diff --git a/docs/root/api-v3/types/types.rst b/docs/root/api-v3/types/types.rst
index a86620519ff8..b6cc7b417259 100644
--- a/docs/root/api-v3/types/types.rst
+++ b/docs/root/api-v3/types/types.rst
@@ -11,6 +11,7 @@ Types
   ../type/v3/percent.proto
   ../type/v3/range.proto
   ../type/v3/ratelimit_unit.proto
+  ../type/v3/ratelimit_strategy.proto
   ../type/v3/semantic_version.proto
   ../type/v3/token_bucket.proto
   ../type/http/v3/cookie.proto
diff --git a/docs/root/configuration/http/http_filters/http_filters.rst b/docs/root/configuration/http/http_filters/http_filters.rst
index a1c034cd5188..54b6f7ad5d18 100644
--- a/docs/root/configuration/http/http_filters/http_filters.rst
+++ b/docs/root/configuration/http/http_filters/http_filters.rst
@@ -43,6 +43,7 @@ HTTP filters
   on_demand_updates_filter
   original_src_filter
   rate_limit_filter
+  rate_limit_quota_filter
   rbac_filter
   router_filter
   set_metadata_filter
diff --git a/docs/root/configuration/http/http_filters/rate_limit_quota_filter.rst b/docs/root/configuration/http/http_filters/rate_limit_quota_filter.rst
new file mode 100644
index 000000000000..ffaacc367d72
--- /dev/null
+++ b/docs/root/configuration/http/http_filters/rate_limit_quota_filter.rst
@@ -0,0 +1,168 @@
+.. _config_http_filters_rate_limit_quota:
+
+Rate Limit Quota
+================
+
+* Global rate limiting :ref:`architecture overview <arch_overview_global_rate_limit>`
+* :ref:`v3 API reference <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaFilterConfig>`
+
+This filter provides implementation of the global rate limit quota :ref:`protocol <envoy_v3_api_file_envoy/service/rate_limit_quota/v3/rlqs.proto>`.
+The rate limit quota service (RLQS) provides quota assignments to each Envoy instance connected to the service. In addition to enforcing rate limit quota assignments,
+this filter periodically reports request rates for each assignment to the RLQS, allowing it to rebalance quota assignments between Envoy instances depending on the
+individual load of each Envoy instance. When quota assignments change the RLQS proactively pushes them to Envoy.
+
+The HTTP rate limit quota filter will call the rate limit quota service when it is configured in the HTTP connection manager filter chain. Filter configuration
+defines the RLQS service and definitions of request buckets that will receive quota assignments. Request buckets are defined by a set of matchers that determine
+if a request is subject to the rate limit quota assigned to that bucket. Each matcher can contain multiple buckets by the means of the
+:ref:`bucket_id_builder <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.bucket_id_builder>`. The bucket ID builder allows
+request buckets to be generated dynamically based on request attributes, such as request header value.
+
+If a request does not match any set of matchers then quota assignment for the "catch all" bucket configured by the ``on_no_match`` field of the
+:ref:`bucket_matchers <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaFilterConfig.bucket_matchers>` is applied. If the ``on_no_match``
+configuration is not provided, all unmatched requests are not rate limited.
+
+Bucket definitions can be overridden in the virtual host or route configurations. The more specific definition completely overrides the less specific definition.
+
+Initially all Envoy's quota assignments are empty. The rate limit quota filter requests quota assignment from RLQS when the request matches a bucket for the first time.
+The behavior of the filter while it waits for the initial assignment is determined by the ``no_assignment_behavior`` value. In this state requests can either all be
+immediately allowed, denied or enqueued until quota assignment is received.
+
+A quota assignment may have associated :ref:`time to live <envoy_v3_api_field_service.rate_limit_quota.v3.RateLimitQuotaResponse.BucketAction.QuotaAssignmentAction.assignment_time_to_live>`.
+The RLQS is expected to update the assignment before TTL runs out. If RLQS failed to update the assignment and its TTL
+has expired, the filter can be configured to continue using the last quota assignment or fall back to a value predefined in the
+:ref:`expired assignment configuration <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.expired_assignment_behavior>`.
+
+The rate limit quota filter reports the request load for each bucket to the RLQS with the configured ``reporting_interval``. The RLQS may rebalance quota assignments based on the request
+load that each Envoy receives and push new quota assignments to Envoys.
+
+When connection to RLQS server fails the filter will fall back to either the
+:ref:`no assignment behavior <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.no_assignment_behavior>`
+if it has not yet received rate limit quota or to the
+:ref:`expired assignment behavior <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.expired_assignment_behavior>` if
+connection could not be re-established by the time the existing quota expired.
+
+Example 1
+^^^^^^^^^
+
+In this example HTTP connection manager has the following bucket definitions in the rate limit quota filter
+:ref:`configuration <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaFilterConfig>`. This
+configuration enables rate limit quota filter with 3 buckets. Note that bucket ID is a map of key-value pairs.
+
+1.  Bucket id ``name: prod-rate-limit-quota`` for all requests with the ``deployment: prod`` header present. Until RLQS assigns a quota
+    all requests are allowed.
+
+1.  Bucket id ``name: staging-rate-limit-quota`` for all requests with the ``deployment: staging`` header present. Until RLQS assigns a quota
+    all requests are denied.
+
+1.  Bucket id ``name: default-rate-limit-quota`` for all other requests. Until RLQS assigns a quota 1K RPS quota is applied.
+
+.. code-block:: yaml
+
+  rlqs_server:
+    envoy_grpc:
+      cluster_name: rate_limit_quota_service
+  domain: "acme-services"
+  matcher:
+    matcher_list:
+      matchers:
+      - predicate:
+        - single_predicate:
+            input:
+              name: request-headers
+              typed_config:
+                "@type": type.googleapis.com/envoy.type.matcher.v3.HttpRequestHeaderMatchInput
+                header_name: deployment
+            value_match:
+              exact: prod
+        on_match:
+          action:
+            name: prod-bucket
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings
+              bucket_id_builder:
+                bucket_id_builder:
+                  "name":
+                    string_value: "prod-rate-limit-quota"
+              reporting_interval: 60s
+              no_assignment_behavior:
+                blanket_rule: ALLOW_ALL
+      - predicate:
+        - single_predicate:
+            input:
+              name: request-headers
+              typed_config:
+                "@type": type.googleapis.com/envoy.type.matcher.v3.HttpRequestHeaderMatchInput
+                header_name: deployment
+            value_match:
+              exact: staging
+        on_match:
+          action:
+            name: staging-bucket
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings
+              bucket_id_builder:
+                bucket_id_builder:
+                  "name":
+                    string_value: "staging-rate-limit-quota"
+              reporting_interval: 60s
+              no_assignment_behavior:
+                blanket_rule: DENY_ALL
+    # The "catch all" bucket settings
+    on_no_match:
+      action:
+        name: default-bucket
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings
+          bucket_id_builder:
+            bucket_id_builder:
+              "name":
+                string_value: "default-rate-limit-quota"
+          reporting_interval: 60s
+          deny_response_settings:
+            http_status_code: 429
+          no_assignment_behavior:
+            blanket_rule: ALLOW_ALL
+          expired_assignment_behavior:
+            fallback_rate_limit:
+              requests_per_time_unit:
+                requests_per_time_unit: 1000
+                time_unit: 1s
+
+
+Rate Limit Quota Override
+-------------------------
+
+Rate limit filter :ref:`configuration <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaFilterConfig>` can be overridden
+at the virtual host or route levels using the :ref:`RateLimitQuotaOverride <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaOverride>`
+configuration. The more specific configuration fully overrides less specific configuration.
+
+Matcher extensions
+------------------
+
+TODO
+
+Statistics
+----------
+
+The rate limit filter outputs statistics in the *cluster.<route target cluster>.rate_limit_quota.* namespace.
+429 responses or the configured
+:ref:`rate limited status <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.DenyResponseSettings.http_status>`
+are emitted to the normal cluster :ref:`dynamic HTTP statistics <config_cluster_manager_cluster_stats_dynamic_http>`.
+
+.. csv-table::
+  :header: Name, Type, Description
+  :widths: 1, 1, 2
+
+  buckets, Counter, Total number of request buckets created
+  assignments, Counter, Total rate limit assignments received from the rate limit quota service
+  error, Counter, Total errors contacting the rate limit quota service
+  over_limit, Counter, Total requests that exceeded assigned rate limit
+  no_assigment, Counter, "Total requests that were applied the
+  :ref:`no_assigment_behavior <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.no_assignment_behavior>`"
+  expired_assigment, Counter, "Total requests that were applied the
+  :ref:`expired_assignment_behavior <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.expired_assignment_behavior>`"
+
+Dynamic Metadata
+----------------
+
+TODO
diff --git a/docs/root/configuration/other_features/rate_limit.rst b/docs/root/configuration/other_features/rate_limit.rst
index 4fa374cb4a69..e831f5292a4e 100644
--- a/docs/root/configuration/other_features/rate_limit.rst
+++ b/docs/root/configuration/other_features/rate_limit.rst
@@ -16,3 +16,28 @@ Envoy expects the rate limit service to support the gRPC IDL specified in
 :ref:`rls.proto <envoy_v3_api_file_envoy/service/ratelimit/v3/rls.proto>`. See the IDL documentation
 for more information on how the API works. See Envoy's reference implementation
 `here <https://github.com/envoyproxy/ratelimit>`_.
+
+.. _config_rate_limit_quota_service:
+
+Rate limit quota service
+========================
+
+Envoy uses global rate limit quota service when it needs to obtain rate limit quota assignments for incoming
+requests. If the rate limit quota service is not available Envoy uses the
+:ref:`no assignment behavior <envoy_v3_api_field_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaBucketSettings.no_assignment_behavior>`
+configuration.
+
+
+* :ref:`v3 API reference <envoy_v3_api_msg_extensions.filters.http.rate_limit_quota.v3.RateLimitQuotaFilterConfig>`
+
+
+gRPC rate limit quota service IDL
+---------------------------------
+
+Envoy expects the rate limit quota service to support the gRPC IDL specified in
+:ref:`rls.proto <envoy_v3_api_file_envoy/service/rate_limit_quota/v3/rlqs.proto>`. See the IDL documentation
+for more information on how the API works.
+
+Open source reference implementation of the rate limiting service is currently unavailable. The rate limit
+quota extension can be presently used with the Google Cloud Rate Limit Service.
+
diff --git a/docs/root/intro/arch_overview/other_features/global_rate_limiting.rst b/docs/root/intro/arch_overview/other_features/global_rate_limiting.rst
index 538c5a7a1fe7..ed846c803142 100644
--- a/docs/root/intro/arch_overview/other_features/global_rate_limiting.rst
+++ b/docs/root/intro/arch_overview/other_features/global_rate_limiting.rst
@@ -13,6 +13,16 @@ tight enough circuit breaking limit on each downstream host such that the system
 normally during typical request patterns but still prevent cascading failure when the system starts
 to fail. Global rate limiting is a good solution for this case.
 
+Envoy provides two global rate limiting implementations:
+
+#. Per connection or per HTTP request rate limit check.
+#. Quota based, with periodic load reports that allows fair sharing of a global rate limit
+   among multiple instances of Envoy. This implementation is suitable for large Envoy deployments with
+   high request per second load that may not be evenly balanced across all Envoy instances.
+
+Per connection or per HTTP request rate limiting
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 Envoy integrates directly with a global gRPC rate limiting service. Although any service that
 implements the defined RPC/IDL protocol can be used, Envoy provides a `reference implementation <https://github.com/envoyproxy/ratelimit>`_
 written in Go which uses a Redis backend. Envoy’s rate limit integration has the following features:
@@ -36,3 +46,17 @@ global rate limit service. For example, a local token bucket rate limit can abso
 in load that might otherwise overwhelm a global rate limit service. Thus, the rate limit is applied
 in two stages. The initial coarse grained limiting is performed by the token bucket limit before
 a fine grained global limit finishes the job.
+
+Quota based rate limiting
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Open source reference implementation of the rate limiting service is currently unavailable. The rate limit
+quota extension can be presently used with the Google Cloud Rate Limit Service.
+
+.. TODO(yavlasov): Add links to GCP docs and reference implementation when available.
+
+Quota based global rate limit can only be applied to HTTP requests. Envoy will bucketize requests and
+request quota assignments from the rate limit quota service using the HTTP filter
+:ref:`configuration <config_http_filters_rate_limit_quota>`.
+
+Rate limit quota service :ref:`configuration <config_rate_limit_quota_service>`.
diff --git a/tools/spelling/spelling_dictionary.txt b/tools/spelling/spelling_dictionary.txt
index aa4aeae03eaf..d9c0423f6cbc 100644
--- a/tools/spelling/spelling_dictionary.txt
+++ b/tools/spelling/spelling_dictionary.txt
@@ -314,6 +314,7 @@ RFC
 RHS
 RLE
 RLS
+RLQS
 RNG
 RPC
 RSA