From 17d209bbd83381498ee5e4e703f91a3da4f68fee Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Wed, 6 Nov 2024 19:58:57 -0500 Subject: [PATCH] fix(client): wait for public reachability before registering (#4) * try waiting for reachability changing * fix: reachability test only when it matters doing this in `Present` was too late, it was called in the middle of the ACME dance, and we want to avoid the entire ACME management and flow if we are not publicly reachable. this change delays management only if there is no certificate cached locally, and impacts only the very first cold start when p2p-forge registration needs to occur. entire ACME/p2p-forge flow can be delayed/disabled on nodes that are not (yet) publicly diallable by only calling `ManageAsync` in `func (m *P2PForgeCertMgr) Start() error` once we have connectivity checks passed. for now, we just listen for network.ReachabilityPublic, but this can be refined further in the future. * refactor: withHostConnectivity moved logic to reusable funcs and adjusted logger for better ux * log: clarify certmgr is paused in Private network --------- Co-authored-by: Marcin Rataj --- client/acme.go | 95 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 8 deletions(-) diff --git a/client/acme.go b/client/acme.go index 43e0bcc..d942b8c 100644 --- a/client/acme.go +++ b/client/acme.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "crypto/x509" + "errors" "fmt" "io" "net/http" @@ -11,12 +12,15 @@ import ( "sync" "time" + "github.com/libp2p/go-libp2p/core/network" + httppeeridauth "github.com/libp2p/go-libp2p/p2p/http/auth" "go.uber.org/zap" "github.com/caddyserver/certmagic" logging "github.com/ipfs/go-log/v2" "github.com/libp2p/go-libp2p/config" + "github.com/libp2p/go-libp2p/core/event" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" "github.com/mholt/acmez/v2" @@ -172,7 +176,9 @@ func WithTrustedRoots(trustedRoots *x509.CertPool) P2PForgeCertMgrOptions { } } -// WithAllowPrivateForgeAddrs is meant for testing +// WithAllowPrivateForgeAddrs is meant for testing or skipping all the +// connectivity checks libp2p node needs to pass before it can request domain +// and start ACME DNS-01 challenge. func WithAllowPrivateForgeAddrs() P2PForgeCertMgrOptions { return func(config *P2PForgeCertMgrConfig) error { config.allowPrivateForgeAddresses = true @@ -308,11 +314,10 @@ func NewP2PForgeCertMgr(opts ...P2PForgeCertMgrOptions) (*P2PForgeCertMgr, error if !ok { return nil } - peerID := hostFn().ID() - pidStr := peer.ToCid(peerID).Encode(multibase.MustNewEncoder(multibase.Base36)) - certName := fmt.Sprintf("*.%s.%s", pidStr, mgrCfg.forgeDomain) + + name := certName(hostFn().ID(), mgrCfg.forgeDomain) for _, san := range sanList { - if san == certName { + if san == name { // When the certificate is loaded mark that it has been so we know we are good to use the domain name // TODO: This won't handle if the cert expires and cannot get renewed mgr.certCheckMx.Lock() @@ -333,17 +338,74 @@ func NewP2PForgeCertMgr(opts ...P2PForgeCertMgrOptions) (*P2PForgeCertMgr, error } func (m *P2PForgeCertMgr) Start() error { + if m.cfg == nil || m.hostFn == nil { + return errors.New("unable to start without a certmagic and libp2p host") + } m.ctx, m.cancel = context.WithCancel(context.Background()) go func() { - pb36 := peer.ToCid(m.hostFn().ID()).Encode(multibase.MustNewEncoder(multibase.Base36)) + log := m.log.Named("start") + h := m.hostFn() + name := certName(h.ID(), m.forgeDomain) + certExists := localCertExists(m.ctx, m.cfg, name) + startCertManagement := func() { + if err := m.cfg.ManageAsync(m.ctx, []string{name}); err != nil { + log.Error(err) + } + } - if err := m.cfg.ManageAsync(m.ctx, []string{fmt.Sprintf("*.%s.%s", pb36, m.forgeDomain)}); err != nil { - m.log.Error(err) + if certExists { + log.Infof("found preexisting cert for %q in local storage", name) + } else { + log.Infof("no cert found for %q", name) + } + + // Start immediatelly if either: + // (A) preexisting certificate is found in certmagic storage + // (B) allowPrivateForgeAddresses flag is set + if certExists || m.allowPrivateForgeAddresses { + startCertManagement() + } else { + // No preexisting cert found. + // We will get a new one, but don't want to ask for one + // if our node is not publicly diallable. + // To avoid ERROR(s) in log and unnecessary retries we wait for libp2p + // confirmation that node is publicly reachable before sending + // multiaddrs to p2p-forge's registration endpoint. + withHostConnectivity(m.ctx, log, h, startCertManagement) } }() return nil } +// withHostConnectivity executes callback func only after certain libp2p connectivity checks / criteria against passed host are fullfilled. +// The main purpose is to not bother CA ACME endpoint or p2p-forge registration endpoint if we know the peer is not +// ready to use TLS cert. +func withHostConnectivity(ctx context.Context, log *zap.SugaredLogger, h host.Host, callback func()) { + log.Infof("waiting until libp2p reports event network.ReachabilityPublic") + sub, err := h.EventBus().Subscribe(new(event.EvtLocalReachabilityChanged)) + if err != nil { + log.Error(err) + return + } + defer sub.Close() + select { + case e := <-sub.Out(): + evt := e.(event.EvtLocalReachabilityChanged) // guaranteed safe + log.Infof("libp2p reachability status changed to %s", evt.Reachability) + if evt.Reachability == network.ReachabilityPublic { + callback() + return + } else if evt.Reachability == network.ReachabilityPrivate { + log.Infof("certificate will not be requested while libp2p reachability status is %s", evt.Reachability) + } + case <-ctx.Done(): + if ctx.Err() != context.Canceled { + log.Error(fmt.Errorf("aborted while waiting for libp2p reachability status discovery: %w", ctx.Err())) + } + return + } +} + func (m *P2PForgeCertMgr) Stop() { m.cancel() } @@ -371,6 +433,22 @@ func (m *P2PForgeCertMgr) AddressFactory() config.AddrsFactory { return m.createAddrsFactory(m.allowPrivateForgeAddresses) } +// localCertExists returns true if a certificate matching passed name is already present in certmagic.Storage +func localCertExists(ctx context.Context, cfg *certmagic.Config, name string) bool { + if cfg == nil || cfg.Storage == nil || len(cfg.Issuers) == 0 { + return false + } + acmeIssuer := cfg.Issuers[0].(*certmagic.ACMEIssuer) + certKey := certmagic.StorageKeys.SiteCert(acmeIssuer.IssuerKey(), name) + return cfg.Storage.Exists(ctx, certKey) +} + +// certName returns a string with DNS wildcard for use in TLS cert ("*.peerid.forgeDomain") +func certName(id peer.ID, suffixDomain string) string { + pb36 := peer.ToCid(id).Encode(multibase.MustNewEncoder(multibase.Base36)) + return fmt.Sprintf("*.%s.%s", pb36, suffixDomain) +} + func (m *P2PForgeCertMgr) createAddrsFactory(allowPrivateForgeAddrs bool) config.AddrsFactory { var p2pForgeWssComponent = multiaddr.StringCast(fmt.Sprintf("/tls/sni/*.%s/ws", m.forgeDomain)) @@ -407,6 +485,7 @@ func (d *dns01P2PForgeSolver) Wait(ctx context.Context, challenge acme.Challenge func (d *dns01P2PForgeSolver) Present(ctx context.Context, challenge acme.Challenge) error { h := d.hostFn() addrs := h.Addrs() + var advertisedAddrs []multiaddr.Multiaddr if !d.allowPrivateForgeAddresses {