Skip to content

Commit

Permalink
Verify TLS certificate on endpoints that are used between agents only (
Browse files Browse the repository at this point in the history
  • Loading branch information
lgfa29 committed Apr 19, 2022
1 parent 18c060d commit e28ed2f
Show file tree
Hide file tree
Showing 12 changed files with 616 additions and 190 deletions.
3 changes: 3 additions & 0 deletions .changelog/11956.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:security
server: validate mTLS certificate names on agent to agent endpoints
```
80 changes: 80 additions & 0 deletions .semgrep/rpc_endpoint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
rules:
# Check potentially unauthenticated RPC endpoints
- id: "rpc-potentially-unauthenticated"
patterns:
- pattern: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := $X.$Y.ResolveToken(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := $U.requestACLToken(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := $T.NamespaceValidator(...)
...
# Pattern used by endpoints called exclusively between agents
# (server -> server or client -> server)
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateLocalClientTLSCertificate(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateLocalServerTLSCertificate(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateTLSCertificate(...)
...
# Pattern used by some Node endpoints.
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
return $A.deregister(...)
...
- metavariable-pattern:
metavariable: $METHOD
patterns:
# Endpoints that are expected not to have authentication.
- pattern-not: '"ACL.Bootstrap"'
- pattern-not: '"ACL.ResolveToken"'
- pattern-not: '"ACL.UpsertOneTimeToken"'
- pattern-not: '"ACL.ExchangeOneTimeToken"'
- pattern-not: '"CSIPlugin.Get"'
- pattern-not: '"CSIPlugin.List"'
- pattern-not: '"Status.Leader"'
- pattern-not: '"Status.Peers"'
- pattern-not: '"Status.Version"'
message: "RPC method $METHOD appears to be unauthenticated"
languages:
- "go"
severity: "WARNING"
paths:
include:
- "*_endpoint.go"
8 changes: 8 additions & 0 deletions nomad/alloc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import (
type Alloc struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// List is used to list the allocations in the system
Expand Down Expand Up @@ -224,6 +227,11 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest,
}
defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(a.srv, a.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", a.srv.Region(), err)
}

allocs := make([]*structs.Allocation, len(args.AllocIDs))

// Setup the blocking query. We wait for at least one of the requested
Expand Down
8 changes: 8 additions & 0 deletions nomad/deployment_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ import (
type Deployment struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// GetDeployment is used to request information about a specific deployment
Expand Down Expand Up @@ -504,6 +507,11 @@ func (d *Deployment) Reap(args *structs.DeploymentDeleteRequest,
}
defer metrics.MeasureSince([]string{"nomad", "deployment", "reap"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(d.srv, d.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", d.srv.Region(), err)
}

// Update via Raft
_, index, err := d.srv.raftApply(structs.DeploymentDeleteRequestType, args)
if err != nil {
Expand Down
38 changes: 38 additions & 0 deletions nomad/eval_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ const (
type Eval struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// GetEval is used to request information about a specific evaluation
Expand Down Expand Up @@ -87,6 +90,11 @@ func (e *Eval) Dequeue(args *structs.EvalDequeueRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is at least one scheduler
if len(args.Schedulers) == 0 {
return fmt.Errorf("dequeue requires at least one scheduler type")
Expand Down Expand Up @@ -172,6 +180,11 @@ func (e *Eval) Ack(args *structs.EvalAckRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ack the EvalID
if err := e.srv.evalBroker.Ack(args.EvalID, args.Token); err != nil {
return err
Expand All @@ -187,6 +200,11 @@ func (e *Eval) Nack(args *structs.EvalAckRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Nack the EvalID
if err := e.srv.evalBroker.Nack(args.EvalID, args.Token); err != nil {
return err
Expand All @@ -202,6 +220,11 @@ func (e *Eval) Update(args *structs.EvalUpdateRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
return fmt.Errorf("only a single eval can be updated")
Expand Down Expand Up @@ -232,6 +255,11 @@ func (e *Eval) Create(args *structs.EvalUpdateRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
return fmt.Errorf("only a single eval can be created")
Expand Down Expand Up @@ -277,6 +305,11 @@ func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericRe
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
return fmt.Errorf("only a single eval can be reblocked")
Expand Down Expand Up @@ -319,6 +352,11 @@ func (e *Eval) Reap(args *structs.EvalDeleteRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Update via Raft
_, index, err := e.srv.raftApply(structs.EvalDeleteRequestType, args)
if err != nil {
Expand Down
6 changes: 4 additions & 2 deletions nomad/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis
reply.Warnings = structs.MergeMultierrorWarnings(warnings...)

// Check job submission permissions
if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
aclObj, err := j.srv.ResolveToken(args.AuthToken)
if err != nil {
return err
} else if aclObj != nil {
if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
Expand Down Expand Up @@ -1867,7 +1868,8 @@ func (j *Job) Dispatch(args *structs.JobDispatchRequest, reply *structs.JobDispa
defer metrics.MeasureSince([]string{"nomad", "job", "dispatch"}, time.Now())

// Check for submit-job permissions
if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
aclObj, err := j.srv.ResolveToken(args.AuthToken)
if err != nil {
return err
} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityDispatchJob) {
return structs.ErrPermissionDenied
Expand Down
10 changes: 10 additions & 0 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,11 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
}
defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(n.srv, n.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", n.srv.Region(), err)
}

// Ensure at least a single alloc
if len(args.Alloc) == 0 {
return fmt.Errorf("must update at least one allocation")
Expand Down Expand Up @@ -1920,6 +1925,11 @@ func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.Em
}
defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(n.srv, n.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", n.srv.Region(), err)
}

if len(args.NodeEvents) == 0 {
return fmt.Errorf("no node events given")
}
Expand Down
8 changes: 8 additions & 0 deletions nomad/plan_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ import (
type Plan struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// Submit is used to submit a plan to the leader
Expand All @@ -23,6 +26,11 @@ func (p *Plan) Submit(args *structs.PlanRequest, reply *structs.PlanResponse) er
}
defer metrics.MeasureSince([]string{"nomad", "plan", "submit"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(p.srv, p.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", p.srv.Region(), err)
}

if args.Plan == nil {
return fmt.Errorf("cannot submit nil plan")
}
Expand Down
62 changes: 41 additions & 21 deletions nomad/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,38 @@ type RPCContext struct {
NodeID string
}

// Certificate returns the first certificate available in the chain.
func (ctx *RPCContext) Certificate() *x509.Certificate {
if ctx == nil || len(ctx.VerifiedChains) == 0 || len(ctx.VerifiedChains[0]) == 0 {
return nil
}

return ctx.VerifiedChains[0][0]
}

// ValidateCertificateForName returns true if the RPC context certificate is valid
// for the given domain name.
func (ctx *RPCContext) ValidateCertificateForName(name string) error {
if ctx == nil || !ctx.TLS {
return nil
}

cert := ctx.Certificate()
if cert == nil {
return errors.New("missing certificate information")
}
for _, dnsName := range cert.DNSNames {
if dnsName == name {
return nil
}
}
if cert.Subject.CommonName == name {
return nil
}

return fmt.Errorf("certificate not valid for %q", name)
}

// listen is used to listen for incoming RPC connections
func (r *rpcHandler) listen(ctx context.Context) {
defer close(r.listenerCh)
Expand Down Expand Up @@ -838,30 +870,18 @@ func (r *rpcHandler) validateRaftTLS(rpcCtx *RPCContext) error {
return nil
}

// defensive conditions: these should have already been enforced by handleConn
if rpcCtx == nil || !rpcCtx.TLS {
return errors.New("non-TLS connection attempted")
}
if len(rpcCtx.VerifiedChains) == 0 || len(rpcCtx.VerifiedChains[0]) == 0 {
// this should never happen, as rpcNameAndRegionValidate should have enforced it
return errors.New("missing cert info")
}

// check that `server.<region>.nomad` is present in cert
expected := "server." + r.Region() + ".nomad"

cert := rpcCtx.VerifiedChains[0][0]
for _, dnsName := range cert.DNSNames {
if dnsName == expected {
// Certificate is valid for the expected name
return nil
err := rpcCtx.ValidateCertificateForName(expected)
if err != nil {
cert := rpcCtx.Certificate()
if cert != nil {
err = fmt.Errorf("request certificate is only valid for %s: %v", cert.DNSNames, err)
}
}
if cert.Subject.CommonName == expected {
// Certificate is valid for the expected name
return nil

return fmt.Errorf("unauthorized raft connection from %s: %v", rpcCtx.Conn.RemoteAddr(), err)
}

r.logger.Warn("unauthorized connection", "required_hostname", expected, "found", cert.DNSNames)
return fmt.Errorf("certificate is invalid for expected role or region: %q", expected)
// Certificate is valid for the expected name
return nil
}
Loading

0 comments on commit e28ed2f

Please sign in to comment.