From 6f730dc268c3712d65efb5b0ab251a714cb85ec4 Mon Sep 17 00:00:00 2001 From: Steve Loeppky Date: Wed, 16 Nov 2022 10:26:59 +0000 Subject: [PATCH] Doc improvements and changelog for resource manager (#9413) Co-authored-by: Antonio Navarro Perez --- core/node/libp2p/rcmgr.go | 12 ++-- core/node/libp2p/rcmgr_defaults.go | 48 +-------------- docs/changelogs/v0.17.md | 23 +++++++ docs/config.md | 96 +++++++++++++++++++++++++----- 4 files changed, 115 insertions(+), 64 deletions(-) diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index 49c9d382399..35894dc7234 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -52,18 +52,22 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { return nil, opts, fmt.Errorf("opening IPFS_PATH: %w", err) } - limits, err := createDefaultLimitConfig(cfg) + limitConfig, err := createDefaultLimitConfig(cfg) if err != nil { return nil, opts, err } + // The logic for defaults and overriding with specified SwarmConfig.ResourceMgr.Limits + // is documented in docs/config.md. + // Any changes here should be reflected there. if cfg.ResourceMgr.Limits != nil { l := *cfg.ResourceMgr.Limits - l.Apply(limits) - limits = l + // This effectively overrides the computed default LimitConfig with any vlues from cfg.ResourceMgr.Limits + l.Apply(limitConfig) + limitConfig = l } - limiter := rcmgr.NewFixedLimiter(limits) + limiter := rcmgr.NewFixedLimiter(limitConfig) str, err := rcmgrObs.NewStatsTraceReporter() if err != nil { diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index 849d7e82aee..fe50ea22a60 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -44,51 +44,8 @@ var noLimitIncrease = rcmgr.BaseLimitIncrease{ // This file defines implicit limit defaults used when Swarm.ResourceMgr.Enabled // createDefaultLimitConfig creates LimitConfig to pass to libp2p's resource manager. -// libp2p's resource manager provides tremendous flexibility but also adds a lot of complexity. -// The intent of the default config here is to provide good defaults, -// and where the defaults aren't good enough, -// to expose a good set of higher-level "knobs" to users to satisfy most use cases -// without requiring users to wade into all the intricacies of libp2p's resource manager. -// -// The inputs one can specify in SwarmConfig are: -// - cfg.ResourceMgr.MaxMemory: This is the max amount of memory in bytes to allow libp2p to use. -// libp2p's resource manager will prevent additional resource creation while this limit is hit. -// If this value isn't specified, 1/8th of the total system memory is used. -// - cfg.ResourceMgr.MaxFileDescriptors: This is the maximum number of file descriptors to allow libp2p to use. -// libp2p's resource manager will prevent additional file descriptor consumption while this limit is hit. -// If this value isn't specified, the maximum between 1/2 of system FD limit and 4096 is used. -// - Swarm.ConnMgr.HighWater: If a connection manager is specified, libp2p's resource manager -// will allow 2x more connections than the HighWater mark -// so the connection manager has "space and time" to close "least useful" connections. -// -// With these inputs defined, limits are created at the system, transient, and peer scopes. -// Other scopes are ignored (by being set to infinity). -// The reason these scopes are chosen is because: -// - system - This gives us the coarse-grained control we want so we can reason about the system as a whole. -// It is the backstop, and allows us to reason about resource consumption more easily -// since don't have think about the interaction of many other scopes. -// - transient - Limiting connections that are in process of being established provides backpressure so not too much work queues up. -// - peer - The peer scope doesn't protect us against intentional DoS attacks. -// It's just as easy for an attacker to send 100 requests/second with 1 peerId vs. 10 requests/second with 10 peers. -// We are reliant on the system scope for protection here in the malicious case. -// The reason for having a peer scope is to protect against unintentional DoS attacks -// (e.g., bug in a peer which is causing it to "misbehave"). -// In the unintional case, we want to make sure a "misbehaving" node doesn't consume more resources than necessary. -// -// Within these scopes, limits are just set on memory, FD, and inbound connections/streams. -// Limits are set based on the inputs above. -// We trust this node to behave properly and thus ignore outbound connection/stream limits. -// We apply any limits that libp2p has for its protocols/services -// since we assume libp2p knows best here. -// -// This leaves 3 levels of resource management protection: -// 1. The user who does nothing and uses defaults - In this case they get some sane defaults -// based on the amount of memory and file descriptors their system has. -// This should protect the node from many attacks. -// 2. Slightly more advanced user - They can tweak the above by passing in config on -// maxMemory, maxFD, or maxConns with Swarm.HighWater.ConnMgr. -// 3. Power user - They specify all the limits they want set via Swarm.ResourceMgr.Limits -// and we don't do any defaults/overrides. We pass that config blindly into libp2p resource manager. +// The defaults follow the documentation in docs/config.md. +// Any changes in the logic here should be reflected there. func createDefaultLimitConfig(cfg config.SwarmConfig) (rcmgr.LimitConfig, error) { maxMemoryDefaultString := humanize.Bytes(uint64(memory.TotalMemory()) / 8) maxMemoryString := cfg.ResourceMgr.MaxMemory.WithDefault(maxMemoryDefaultString) @@ -105,7 +62,6 @@ func createDefaultLimitConfig(cfg config.SwarmConfig) (rcmgr.LimitConfig, error) FD: int(numFD), // By default, we just limit connections on the inbound side. - // Note that the limit gets adjusted below if "cfg.ConnMgr.HighWater" is set. Conns: bigEnough, ConnsInbound: rcmgr.DefaultLimits.SystemBaseLimit.ConnsInbound, // same as libp2p default ConnsOutbound: bigEnough, diff --git a/docs/changelogs/v0.17.md b/docs/changelogs/v0.17.md index 8cab7499126..b6a562c0cac 100644 --- a/docs/changelogs/v0.17.md +++ b/docs/changelogs/v0.17.md @@ -10,6 +10,7 @@ Below is an outline of all that is in this release, so you get a sense of all th - [v0.17.0](#v0170) - [Overview](#overview) - [🔦 Highlights](#-highlights) + - [libp2p resource management enabled by default](#libp2p-resource-management-enabled-by-default) - [Implicit connection manager limits](#implicit-connection-manager-limits) - [TAR Response Format on Gateways](#tar-response-format-on-gateways) - [Dialling `/wss` peer behind a reverse proxy](#dialling-wss-peer-behind-a-reverse-proxy) @@ -20,6 +21,28 @@ Below is an outline of all that is in this release, so you get a sense of all th +#### libp2p resource management enabled by default + +To help protect nodes from DoS (resource exhaustion) and eclipse attacks, +go-libp2p released a [Network Resource Manager](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager) with a host of improvements throughout 2022. + +Kubo first [exposed this functionality in Kubo 0.13](https://github.com/ipfs/kubo/blob/master/docs/changelogs/v0.13.md#-libp2p-network-resource-manager-swarmresourcemgr), +but it was disabled by default. + +The resource manager is now enabled by default to protect nodes. +The defaults balance providing protection from various attacks while still enabling normal usecases to work as expected. + +If you want to adjust the defaults, then you can: +1. bound the amount of memory and file descriptors that libp2p will use with [Swarm.ResourceMgr.MaxMemory](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgrmaxmemory) +and [Swarm.ResourceMgr.MaxFileDescriptors](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgrmaxfiledescriptors) and/or +2. override any specific resource scopes/limits with [Swarm.ResourceMgr.Limits](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgrlimits) + +See [Swarm.ResourceMgr](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgr) for +1. what limits are set by default, +2. example override configuration, +3. how to access prometheus metrics and view grafana dashboards of resource usage, and +4. how to set explicit "allow lists" to protect against eclipse attacks. + #### Implicit connection manager limits Starting with this release, `ipfs init` will no longer store the default diff --git a/docs/config.md b/docs/config.md index da4276cd969..d3287da17e7 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1808,30 +1808,78 @@ Type: `optionalDuration` ### `Swarm.ResourceMgr` -The [libp2p Network Resource Manager](https://github.com/libp2p/go-libp2p-resource-manager#readme) allows setting limits per a scope, +The [libp2p Network Resource Manager](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#readme) allows setting limits per [Resource Scope](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#resource-scopes), and tracking recource usage over time. -#### `Swarm.ResourceMgr.Enabled` +##### Levels of Configuration + +libp2p's resource manager provides tremendous flexibility but also adds a lot of complexity. +There are these levels of limit configuration for resource management protection: +1. "The user who does nothing" - In this case they get some sane defaults discussed below + based on the amount of memory and file descriptors their system has. + This should protect the node from many attacks. +2. "Slightly more advanced user" - They can tweak the default limits discussed below. + Where the defaults aren't good enough, a good set of higher-level "knobs" are exposed to satisfy most use cases + without requiring users to wade into all the intricacies of libp2p's resource manager. + The "knobs"/inputs are `Swarm.ResourceMgr.MaxMemory` and `Swarm.ResourceMgr.MaxFileDescriptors` as described below. +3. "Power user" - They specify all the default limits from below they want override via `Swarm.ResourceMgr.Limits`; + +##### Default Limits + +With these inputs defined, [resource manager limits](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#limits) are created at the +[system](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#the-system-scope), +[transient](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#the-transient-scope), +and [peer](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#peer-scopes) scopes. +Other scopes are ignored (by being set to "~infinity". + +The reason these scopes are chosen is because: +- system - This gives us the coarse-grained control we want so we can reason about the system as a whole. + It is the backstop, and allows us to reason about resource consumption more easily + since don't have think about the interaction of many other scopes. +- transient - Limiting connections that are in process of being established provides backpressure so not too much work queues up. +- peer - The peer scope doesn't protect us against intentional DoS attacks. + It's just as easy for an attacker to send 100 requests/second with 1 peerId vs. 10 requests/second with 10 peers. + We are reliant on the system scope for protection here in the malicious case. + The reason for having a peer scope is to protect against unintentional DoS attacks + (e.g., bug in a peer which is causing it to "misbehave"). + In the unintional case, we want to make sure a "misbehaving" node doesn't consume more resources than necessary. + +Within these scopes, limits are just set on +[memory](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#memory), +[file descriptors (FD)](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#file-descriptors), [*inbound* connections](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#connections), +and [*inbound* streams](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#streams). +Limits are set based on the inputs above. +We trust this node to behave properly and thus don't limit *outbound* connection/stream limits. +We apply any limits that libp2p has for its protocols/services +since we assume libp2p knows best here. + +** libp2p resource monitoring ** +For [monitoring libp2p resource usage](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#monitoring), +various `*rcmgr_*` metrics can be accessed as the prometheus endpoint at `{Addresses.API}/debug/metrics/prometheus` (default: `http://127.0.0.1:5001/debug/metrics/prometheus`). +There are also [pre-built Grafana dashboards](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager/obs/grafana-dashboards) that can be added to a Grafana instance. -Enables the libp2p Network Resource Manager and auguments the default limits -using user-defined ones in `Swarm.ResourceMgr.Limits` (if present). +#### `Swarm.ResourceMgr.Enabled` -Various `*rcmgr_*` metrics can be accessed as the prometheus endpoint at `{Addresses.API}/debug/metrics/prometheus` (default: `http://127.0.0.1:5001/debug/metrics/prometheus`) +Enables the libp2p Resource Manager using limits based on the defaults and/or other configuration as discussed above. Default: `true` - Type: `flag` #### `Swarm.ResourceMgr.MaxMemory` -The maximum amount of memory that the libp2p resource manager will allow. +This is the max amount of memory to allow libp2p to use. +libp2p's resource manager will prevent additional resource creation while this limit is reached. +This value is also used to scale the limit on various resources at various scopes +when the default limits (discuseed above) are used. +For example, increasing this value will increase the default limit for incoming connections. Default: `[TOTAL_SYSTEM_MEMORY]/8` Type: `optionalBytes` #### `Swarm.ResourceMgr.MaxFileDescriptors` -Define the maximum number of file descriptors that libp2p can use. +This is the maximum number of file descriptors to allow libp2p to use. +libp2p's resource manager will prevent additional file descriptor consumption while this limit is reached. This param is ignored on Windows. @@ -1842,21 +1890,26 @@ Type: `optionalInteger` Map of resource limits [per scope](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#resource-scopes). -The map supports fields from [`ScalingLimitConfig`](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L21-L59) -struct from [go-libp2p-resource-manager](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#readme). +The map supports fields from the [`LimitConfig` struct](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L111). +[`BaseLimit`s](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit.go#L89) can be set for any scope, and within the `BaseLimit`, all limit s are optional. + +The `Swarm.ResourceMgr.Limits` override the default limits described above. +Any override `BaseLimits` or limit s from `Swarm.ResourceMgr.Limits` +that aren't specified will use the default limits. + +Example #1: setting limits for a specific scope ```json { "Swarm": { "ResourceMgr": { - "Enabled": true, "Limits": { "System": { + "Memory": 1073741824, + "FD": 512, "Conns": 1024, "ConnsInbound": 256, "ConnsOutbound": 1024, - "FD": 512, - "Memory": 1073741824, "Streams": 16384, "StreamsInbound": 4096, "StreamsOutbound": 16384 @@ -1867,13 +1920,28 @@ struct from [go-libp2p-resource-manager](https://github.com/libp2p/go-libp2p/tre } ``` +Example #2: setting a specific limit +```json +{ + "Swarm": { + "ResourceMgr": { + "Limits": { + "Transient": { + "ConnsOutbound": 256, + } + } + } + } +} +``` + Current resource usage and a list of services, protocols, and peers can be obtained via `ipfs swarm stats --help` It is also possible to adjust some runtime limits via `ipfs swarm limit --help`. Changes made via `ipfs swarm limit` are persisted in `Swarm.ResourceMgr.Limits`. -Default: `{}` (use the safe implicit defaults) +Default: `{}` (use the safe implicit defaults described above) Type: `object[string->object]`