diff --git a/README.md b/README.md index 8a36625a6..812241c07 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,6 @@ To query the KubeHound graph data requires using the [Gremlin](https://tinkerpop + Download and install the application from https://gdotv.com/ + Create a connection to the local janusgraph instance by following the steps here https://docs.gdotv.com/connection-management/ and using `hostname=localhost` + Navigate to the query editor and enter a sample query e.g `g.V().count()`. See detailed instructions here: https://docs.gdotv.com/query-editor/#run-your-query -+ See the provided [cheatsheet](./pkg/kubehound/graph/CHEATSHEET.md) for examples of useful queries for various use cases. ## Development diff --git a/pkg/kubehound/core/core.go b/pkg/kubehound/core/core.go index 56b0f1ed4..aa37c1a91 100644 --- a/pkg/kubehound/core/core.go +++ b/pkg/kubehound/core/core.go @@ -62,6 +62,9 @@ func buildGraph(ctx context.Context, cfg *config.KubehoundConfig, storedb stored log.I.Info("Loading graph edge definitions") edges := edge.Registered() + if err := edges.Verify(); err != nil { + return fmt.Errorf("edge registry verification: %w", err) + } log.I.Info("Loading graph builder") builder, err := graph.NewBuilder(cfg, storedb, graphdb, cache, edges) diff --git a/pkg/kubehound/graph/CHEATSHEET.md b/pkg/kubehound/graph/CHEATSHEET.md deleted file mode 100644 index b9c0c73a2..000000000 --- a/pkg/kubehound/graph/CHEATSHEET.md +++ /dev/null @@ -1,230 +0,0 @@ -# KubeHound Cheat Sheet - -A one-stop-shop of KubeHound queries for all use cases. - -- [Basic Gremlin](#basic-gremlin) -- [Simple KubeHound queries](#simple-kubehound-queries) -- [Critical assets](#critical-assets) -- [Basic path queries](#basic-path-queries) -- [Attack paths from compromised assets](#attack-paths-from-compromised-assets) - - [Containers](#containers) - - [Credentials](#credentials) - - [Endpoints](#endpoints) -- [Critical asset exposure](#critical-asset-exposure) -- [CVE impact assessment](#cve-impact-assessment) -- [Threat modelling](#threat-modelling) -- [Risk metrics](#risk-metrics) -- [Tips](#tips) - -## Basic Gremlin - -For gremlin fundamentals consult the following: - -+ [Basic](https://dkuppitz.github.io/gremlin-cheat-sheet/101.html) -+ [Advanced](https://dkuppitz.github.io/gremlin-cheat-sheet/102.html) - -For large clusters it is recommended to add a `limit()` step to ALL queries where the graph output will be examined in the UI to prevent overloading the UI. An example looking for attack paths possible from a sample of 5 containers would look like: - -```groovy -g.V().hasLabel("Container").limit(5).outE() -``` - -## Simple KubeHound queries - -Count the number of pods in the cluster: - -```groovy -g.V().hasLabel("Pod").count() -``` - -View all the possible container escapes in the cluster: - -```groovy -g.V().hasLabel("Container").outE().inV().hasLabel("Node").path() -``` - -List the names of all possible attacks in a cluster with total count: - -```groovy -g.E().groupCount().by(label) -``` - -View all the mounted host path volumes in the cluster: - -```groovy -g.V().hasLabel("Volume").has("type", "HostPath").groupCount().by("sourcePath") -``` - -View host path mounts that can be exploited to access a node: - -```groovy -g.E().hasLabel("EXPLOIT_HOST_READ", "EXPLOIT_HOST_WRITE").outV().groupCount().by("sourcePath") -``` - -View all service endpoints by service name in the cluster, here we are using the [EndpointExposureType](../../pkg/kubehound/models/shared/constants.go) enum value to filter only on services: - -```groovy -g.V().hasLabel("Endpoint").has("exposure", 3).groupCount().by("serviceEndpoint") -``` - -## Critical assets - -Certain assets in a cluster will have full control over the cluster leading to a huge number of possible attack paths being generated by a single asset. The canonical example being the `cluster-admin` role. An attacker compromising such an asset would have full control of the cluster. As such we consider it to be a termination condition for attack paths. This enables a generic mechanism for querying attack paths by traversing the graph until a critical asset is encountered. Example below: - -```groovy -g.V().has("name", "starting-point").repeat(out().simplePath()).until(has("critical", true)) -``` - -## Basic path queries - -All paths between a volume and identity: - -```groovy -g.V().hasLabel("Volume").repeat(out().simplePath()).until(hasLabel("Identity")).path() -``` - -All paths (up to 5 hops) between a container and a node: - -```groovy -g.V().hasLabel("Container").repeat(out().simplePath()).until(hasLabel("Node").or().loops().is(5)).hasLabel("Node").path() -``` - -## Attack paths from compromised assets - -### Containers - -Attack paths (up to 10 hops) from a known breached container (in this case the `nsenter-pod` container) to any critical asset: - -```groovy -g.V().hasLabel("Container").has("name", "nsenter-pod").repeat(out().simplePath()).until(has("critical", true).or().loops().is(10)).has("critical", true).path() -``` - -Attack paths (up to 10 hops) from a known backdoored container image (in this case the `config-file-writer-go:` container) to any critical asset: - -```groovy -g.V().hasLabel("Container").has("image", TextP.containing("eu.gcr.io/datadog-staging/config-file-writer-go")).repeat(out().simplePath()).until(has("critical", true).or().loops().is(10)).has("critical", true).path() -``` - -### Credentials - -All attack paths (up to 6 hops) from any compomised credential to a critical asset: - -```groovy -g.V().hasLabel("Identity").repeat(out().simplePath()).until(has("critical", true).or().loops().is(6)).has("critical", true).path().limit(5) -``` - -Attack paths (up to 10 hops) from a known breached credential (in this case the `pod-patch-sa` service account) to a critical asset: - -```groovy -g.V().hasLabel("Identity").has("name", "pod-patch-sa").repeat(out().simplePath()).until(has("critical", true).or().loops().is(10)).has("critical", true).path() -``` - -### Endpoints - -All attack paths (up to 6 hops) from any endpoint to a critical asset: - -```groovy -g.V().hasLabel("Endpoint").repeat(out().simplePath()).until(has("critical", true).or().loops().is(6)).has("critical", true).path().limit(5) -``` - -Attack paths (up to 10 hops) from a known dangerous endpoint (e.g JMX) to a critical asset: - -```groovy -g.V().hasLabel("Endpoint").has("portName", "jmx").repeat(out().simplePath()).until(has("critical", true).or().loops().is(6)).has("critical", true).path().limit(5) -``` - -## Critical asset exposure - -All attack paths (up to 5 hops) to a specific critical asset (in this case the `system:auth-delegator`) permission set from containers/identities/nodes: - -```groovy -g.V().hasLabel("Container", "Identity", "Node").repeat(out().simplePath()).until(has("name", "system:auth-delegator").or().loops().is(5)).has("name", "system:auth-delegator").hasLabel("Role").path() -``` - -## CVE impact assessment - -If we take for example the [Log4J vulnerability affecting elastic search](https://www.elastic.co/blog/log4j2-vulnerability-what-to-know-security-vulnerability-learn-more-elastic-support). First evaluate whether any vulnerable images are running within the cluster: - -```groovy -g.V().hasLabel("Container").has("image", TextP.containing("elasticsearch")).groupCount().by("image") -``` - -Now check for any exposed services that could be affected and have a path to a critical asset that can be prioritised for patching/remediation: - -```groovy -g.V().hasLabel("Container").has("image", "dockerhub.com/elasticsearch:7.1.4").where(inE("ENDPOINT_EXPLOIT").outV().has("exposure", gte(3))).where(repeat(out().simplePath()).until(has("critical", true).or().loops().is(10)).has("critical", true).limit(1)) -``` - -## Threat modelling - -All unique attack paths by labels to a specific asset (in this case the `cluster-admin` role): - -```groovy -g.V().hasLabel("Container", "Identity").repeat(out().simplePath()).until(has("name", "cluster-admin").or().loops().is(5)).has("name", "cluster-admin").hasLabel("Role").path().as("p").by(label).dedup().select("p").path() -``` - -All unique attack paths by labels to a ANY critical asset: - -```groovy -g.V().hasLabel("Container", "Identity").repeat(out().simplePath()).until(has("critical", true).or().loops().is(5)).has("critical", true).path().as("p").by(label).dedup().select("p").path() -``` - -## Risk metrics - -**What is the shortest exploitable path between an exposed service and a critical asset?** - -In this case we can look for service endpoints and query the minimum path size to reach a critical assets. Again we are using the [EndpointExposureType](../../pkg/kubehound/models/shared/constants.go) enum value to filter only on services: - -```groovy -g.V().hasLabel("Endpoint").has("exposure", gte(3)).repeat(out().simplePath()).until(has("critical", true).or().loops().is(7)).has("critical", true).path().count(local).min() -``` - -**What percentage of internet facing services have an exploitable path to a critical asset?** - -Again we are using the [EndpointExposureType](../../pkg/kubehound/models/shared/constants.go) enum value to filter only on services - -```groovy -// Base case -g.V().hasLabel("Endpoint").has("exposure", gte(3)).count() - -// Has a critical path -g.V().hasLabel("Endpoint").has("exposure", gte(3)).where(repeat(out().simplePath()).until(has("critical", true).or().loops().is(10)).has("critical", true).limit(1)).count() -``` - -**What percentage level of attack path reduction was achieved by the introduction of a control?** - -To verify concrete impact, this can be achieved by comparing the difference in the key risk metrics above, before and after the control change. To simulate the impact of introducing a control (e.g to evaluate ROI), we can add conditions to our path queries. For example if we wanted to evaluate the impact of adding a gatekeeper rule that would deny the use of `hostPid=true` we could do the following: - -```groovy -// Calculate the base case -g.V().hasLabel("Endpoint").has("exposure", gte(3)).repeat(out().simplePath()).until(has("critical", true).or().loops().is(6)).has("critical", true).path().count() - -// Calculate the impact of preventing CE_NSENTER attack -g.V().hasLabel("Endpoint").has("exposure", gte(3)).repeat(outE().not(hasLabel("CE_NSENTER")).inV().simplePath()).emit().until(has("critical", true).or().loops().is(6)).has("critical", true).path().count() -``` - -**What type of control would cut off the largest number of attack paths to a specific asset in our clusters?** - -We can count the number of instances of unique attack paths using: - -```groovy -g.V().hasLabel("Container").repeat(outE().inV().simplePath()).emit().until(has("critical", true).or().loops().is(6)).has("critical", true).path().by(label).groupCount() -``` - -This gives an output of the form: - -```groovy -{ - "path[Container, CE_MODULE_LOAD, Node, POD_ATTACH, Pod, CONTAINER_ATTACH, Container, IDENTITY_ASSUME, Identity, PERMISSION_DISCOVER, PermissionSet]" : 18, - "path[Container, IDENTITY_ASSUME, Identity, PERMISSION_DISCOVER, PermissionSet, TOKEN_BRUTEFORCE, Identity, PERMISSION_DISCOVER, PermissionSet, TOKEN_BRUTEFORCE, Identity, PERMISSION_DISCOVER, PermissionSet]" : 1824, -} -``` - -We can further reduce this to group by attacks, rather than full paths in post-processing or modifying the query. - -## Tips - -+ Always put a max hop count on path queries or runtime can get very long -+ For queries to be displayed in the UI, try to limit the output to 1000 elements or less -+ Enable large cluster optimizations if queries are returning too slowly -+ Try to filter the initial element of queries by namespace/service/app to avoid generating too many results \ No newline at end of file diff --git a/pkg/kubehound/graph/builder.go b/pkg/kubehound/graph/builder.go index fe9dc84ee..78e0f83d9 100644 --- a/pkg/kubehound/graph/builder.go +++ b/pkg/kubehound/graph/builder.go @@ -139,6 +139,15 @@ func (b *Builder) Run(ctx context.Context) error { return err } + // Dependent edges must be built last, sequentially + l.Info("Starting dependent edge construction") + for label, e := range b.edges.Dependent() { + err := b.buildEdge(ctx, label, e, oic, l) + if err != nil { + return fmt.Errorf("building dependent edge %s: %w", label, err) + } + } + l.Info("Completed edge construction") return nil } diff --git a/pkg/kubehound/graph/edge/builder.go b/pkg/kubehound/graph/edge/builder.go index e12a19d14..bc014fe7e 100644 --- a/pkg/kubehound/graph/edge/builder.go +++ b/pkg/kubehound/graph/edge/builder.go @@ -15,7 +15,7 @@ import ( var __ = gremlin.T__ var P = gremlin.P -// Edge interface defines objects used to construct edges within our graph database through processing data from the intermediate store. +// Builder interface defines objects used to construct edges within our graph database through processing data from the intermediate store. //go:generate mockery --name Builder --output mocks --case underscore --filename edge.go --with-expecter type Builder interface { @@ -43,3 +43,12 @@ type Builder interface { Stream(ctx context.Context, store storedb.Provider, cache cache.CacheReader, process types.ProcessEntryCallback, complete types.CompleteQueryCallback) error } + +// DependentBuilder interface defines objects used to construct edges with dependencies on other edges in the graph. +// Dependent edges are built last and their dependencies cannot be dependent edges themselves. +type DependentBuilder interface { + Builder + + // Dependencies returns the edge labels of all dependencies. + Dependencies() []string +} diff --git a/pkg/kubehound/graph/edge/registry.go b/pkg/kubehound/graph/edge/registry.go index 0fdb8fbf2..40f86cdba 100644 --- a/pkg/kubehound/graph/edge/registry.go +++ b/pkg/kubehound/graph/edge/registry.go @@ -1,6 +1,7 @@ package edge import ( + "fmt" "sync" "github.com/DataDog/KubeHound/pkg/telemetry/log" @@ -9,21 +10,24 @@ import ( type RegistrationFlag uint8 const ( - RegisterDefault RegistrationFlag = 1 << iota // Default edge - RegisterGraphMutation // Edge can mutate the graph + RegisterDefault RegistrationFlag = 1 << iota // Default edge + RegisterGraphMutation // Edge can mutate the graph + RegisterGraphDependency // Edge has a dependency on default/mutating edges ) // Registry holds details of edges (i.e attacks) registered in KubeHound. type Registry struct { - mutating map[string]Builder - simple map[string]Builder + mutating map[string]Builder + simple map[string]Builder + dependent map[string]DependentBuilder } // newRegistry creates a new registry instance. This should not be called directly. func newRegistry() *Registry { r := &Registry{ - mutating: make(map[string]Builder), - simple: make(map[string]Builder), + mutating: make(map[string]Builder), + simple: make(map[string]Builder), + dependent: make(map[string]DependentBuilder), } return r @@ -52,18 +56,53 @@ func (r *Registry) Simple() map[string]Builder { return r.simple } +// Dependent returns the map of registered edge builders with default edge dependencies. +func (r *Registry) Dependent() map[string]DependentBuilder { + return r.dependent +} + +// Verify verifies the integrity and consistency of the registry. +// Function should only be called once all edges have been registered via init() calls. +func (r *Registry) Verify() error { + // Ensure all dependent edges have dependencies registered in mutating or default collections + for name, builder := range r.dependent { + for _, d := range builder.Dependencies() { + _, depSimple := r.simple[d] + _, depMutating := r.mutating[d] + + if !depSimple && !depMutating { + return fmt.Errorf("unregistered dependency (%s) for dependent edge %s", d, name) + } + } + } + + return nil +} + // Register loads the provided edge into the registry. func Register(edge Builder, flags RegistrationFlag) { registry := Registered() - if flags&RegisterGraphMutation != 0 { + switch { + case flags&RegisterGraphMutation != 0: log.I.Debugf("Registering mutating edge builder %s -> %s", edge.Name(), edge.Label()) - if _, ok := registry.mutating[edge.Name()]; ok { log.I.Fatalf("edge name collision: %s", edge.Name()) } registry.mutating[edge.Name()] = edge - } else { + case flags&RegisterGraphDependency != 0: + log.I.Debugf("Registering dependent edge builder %s -> %s", edge.Name(), edge.Label()) + if _, ok := registry.dependent[edge.Name()]; ok { + log.I.Fatalf("edge name collision: %s", edge.Name()) + } + + dependent, ok := edge.(DependentBuilder) + if !ok { + log.I.Fatalf("dependent edge must implement DependentBuilder: %s", edge.Name()) + } + + registry.dependent[edge.Name()] = dependent + default: log.I.Debugf("Registering default edge builder %s -> %s", edge.Name(), edge.Label()) if _, ok := registry.simple[edge.Name()]; ok { log.I.Fatalf("edge name collision: %s", edge.Name())