Skip to content

Commit

Permalink
improvement: allow for more concurrency when using cloud builder (#5955)
Browse files Browse the repository at this point in the history
* improvement: allow for more concurrency when using cloud builder

- Allow more fine-grained status and execute task node concurrency control in the
plugins using setters on the action.
- Assign concurrency group by node execution type (status, execute), action type
(exec, container) and the concurrency setting (assuming that actions with a separate
max concurrency setting are a different workload class).
- Fine-tune the build concurrency settings in the container and kubernetes plugins in the
`validate` and `getStatus` handlers for every action.
- This speeds up `garden build -f` in an internal project from ~40 seconds to ~14 seconds.

* chore: apply feedback from code review
  • Loading branch information
stefreak authored Apr 18, 2024
1 parent d15f38e commit 4717da8
Show file tree
Hide file tree
Showing 19 changed files with 198 additions and 20 deletions.
25 changes: 25 additions & 0 deletions core/src/actions/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,31 @@ export abstract class BaseAction<
})
}

get statusConcurrencyLimit(): number | undefined {
return this._config.internal.statusConcurrencyLimit
}

/**
* Allows plugins to control the concurrency limit of action status task nodes.
*
* Falls back to default concurrency limit defined in the Task class.
*/
set statusConcurrencyLimit(limit: number | undefined) {
this._config.internal.statusConcurrencyLimit = limit
}

/**
* Allows plugins to control the concurrency limit of action execution task nodes.
*
* Falls back to default concurrency limit defined in the Task class.
*/
set executeConcurrencyLimit(limit: number | undefined) {
this._config.internal.executeConcurrencyLimit = limit
}
get executeConcurrencyLimit(): number | undefined {
return this._config.internal.executeConcurrencyLimit
}

abstract getExecuteTask(baseParams: Omit<BaseActionTaskParams, "action">): ExecuteTask
}

Expand Down
3 changes: 3 additions & 0 deletions core/src/actions/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ export interface BaseActionConfig<K extends ActionKind = ActionKind, T = string,
remoteClonePath?: string
moduleName?: string
moduleVersion?: ModuleVersion

statusConcurrencyLimit?: number
executeConcurrencyLimit?: number
}

// Flow/execution control
Expand Down
46 changes: 41 additions & 5 deletions core/src/graph/nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import type { GraphSolver } from "./solver.js"
import { metadataForLog } from "./common.js"
import { Profile } from "../util/profiling.js"
import { styles } from "../logger/styles.js"
import { gardenEnv } from "../constants.js"

export interface InternalNodeTypes {
status: StatusTaskNode
Expand All @@ -36,12 +37,14 @@ export interface TaskNodeParams<T extends Task> {
@Profile()
export abstract class TaskNode<T extends Task = Task> {
abstract readonly executionType: NodeType

public readonly type: string
public startedAt?: Date
public readonly task: T
public readonly statusOnly: boolean

public abstract readonly concurrencyLimit: number
public abstract readonly concurrencyGroupKey: string

protected solver: GraphSolver
protected dependants: { [key: string]: TaskNode }
protected result?: GraphResult<any>
Expand Down Expand Up @@ -212,8 +215,15 @@ export interface TaskRequestParams<T extends Task = Task> extends TaskNodeParams

@Profile()
export class RequestTaskNode<TaskType extends Task = Task> extends TaskNode<TaskType> {
// FIXME: this is a bit of a TS oddity, but it does work...
executionType = <NodeType>"request"
executionType: NodeType = "request"

override get concurrencyLimit() {
return gardenEnv.GARDEN_HARD_CONCURRENCY_LIMIT
}

override get concurrencyGroupKey() {
return this.executionType
}

public readonly requestedAt: Date
public readonly batchId: string
Expand Down Expand Up @@ -257,7 +267,20 @@ export class RequestTaskNode<TaskType extends Task = Task> extends TaskNode<Task

@Profile()
export class ProcessTaskNode<T extends Task = Task> extends TaskNode<T> {
executionType = <NodeType>"process"
executionType: NodeType = "process"

override get concurrencyLimit() {
return this.task.executeConcurrencyLimit
}

/**
* Tasks with different limits will be grouped in separate concurrency groups.
*
* E.g. if 50 build tasks have limit of 5, and 30 build tasks have limit of 10, then 15 build tasks will execute concurrently.
*/
override get concurrencyGroupKey() {
return `${this.executionType}-${this.task.type}-${this.task.executeConcurrencyLimit}`
}

describe() {
return `processing ${this.task.getDescription()}`
Expand Down Expand Up @@ -319,7 +342,20 @@ export class ProcessTaskNode<T extends Task = Task> extends TaskNode<T> {

@Profile()
export class StatusTaskNode<T extends Task = Task> extends TaskNode<T> {
executionType = <NodeType>"status"
executionType: NodeType = "status"

override get concurrencyLimit() {
return this.task.statusConcurrencyLimit
}

/**
* Tasks with different limits will be grouped in separate concurrency groups.
*
* E.g. if 50 build tasks have limit of 5, and 30 build tasks have limit of 10, then 15 build tasks will execute concurrently.
*/
override get concurrencyGroupKey() {
return `${this.executionType}-${this.task.type}-${this.task.executeConcurrencyLimit}`
}

describe() {
return `resolving status for ${this.task.getDescription()}`
Expand Down
4 changes: 2 additions & 2 deletions core/src/graph/solver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,10 +312,10 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
const inProgressByGroup = groupBy(inProgressNodes, "type")

// Enforce concurrency limits per task type
const grouped = groupBy(pending, (n) => n.task.type)
const grouped = groupBy(pending, (n) => n.concurrencyGroupKey)
const limitedByGroup = Object.values(grouped).flatMap((nodes) => {
// Note: We can be sure there is at least one node in the array
const groupLimit = nodes[0].task.concurrencyLimit
const groupLimit = nodes[0].concurrencyLimit
const inProgress = inProgressByGroup[nodes[0].type] || []
return nodes.slice(0, groupLimit - inProgress.length)
})
Expand Down
21 changes: 20 additions & 1 deletion core/src/plugins/container/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,38 @@ import { joinWithPosix } from "../../util/fs.js"
import type { Resolved } from "../../actions/types.js"
import dedent from "dedent"
import { splitFirst } from "../../util/string.js"
import type { ContainerProviderConfig } from "./container.js"
import {
CONTAINER_BUILD_CONCURRENCY_LIMIT_CLOUD_BUILDER,
CONTAINER_BUILD_CONCURRENCY_LIMIT_LOCAL,
CONTAINER_STATUS_CONCURRENCY_LIMIT,
type ContainerProviderConfig,
} from "./container.js"
import type { Writable } from "stream"
import type { ActionLog } from "../../logger/log-entry.js"
import type { PluginContext } from "../../plugin-context.js"
import type { SpawnOutput } from "../../util/util.js"
import { cloudbuilder } from "./cloudbuilder.js"
import { styles } from "../../logger/styles.js"

export const validateContainerBuild: BuildActionHandler<"validate", ContainerBuildAction> = async ({ action }) => {
// configure concurrency limit for build status task nodes.
action.statusConcurrencyLimit = CONTAINER_STATUS_CONCURRENCY_LIMIT

return {}
}

export const getContainerBuildStatus: BuildActionHandler<"getStatus", ContainerBuildAction> = async ({
ctx,
action,
log,
}) => {
// configure concurrency limit for build execute task nodes.
if (await cloudbuilder.isConfiguredAndAvailable(ctx, action)) {
action.executeConcurrencyLimit = CONTAINER_BUILD_CONCURRENCY_LIMIT_CLOUD_BUILDER
} else {
action.executeConcurrencyLimit = CONTAINER_BUILD_CONCURRENCY_LIMIT_LOCAL
}

const outputs = action.getOutputs()
const { identifier } = (await containerHelpers.getLocalImageInfo(outputs.localImageId, log, ctx)) || {}

Expand Down
11 changes: 9 additions & 2 deletions core/src/plugins/container/cloudbuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const cloudBuilderAvailability = new LRUCache<string, CloudBuilderAvailability>(

// public API
export const cloudbuilder = {
async isConfiguredAndAvailable(ctx: PluginContext, action: Resolved<ContainerBuildAction>) {
isConfigured(ctx: PluginContext): boolean {
const { isCloudBuilderEnabled } = getConfiguration(ctx)
if (!isCloudBuilderEnabled) {
return false
Expand All @@ -55,8 +55,15 @@ export const cloudbuilder = {
return false
}

const availability = await getAvailability(ctx, action)
return true
},

async isConfiguredAndAvailable(ctx: PluginContext, action: Resolved<ContainerBuildAction>) {
if (!cloudbuilder.isConfigured(ctx)) {
return false
}

const availability = await getAvailability(ctx, action)
return availability.available
},

Expand Down
14 changes: 12 additions & 2 deletions core/src/plugins/container/container.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ import type {
ContainerRuntimeActionConfig,
} from "./moduleConfig.js"
import { containerModuleOutputsSchema, containerModuleSpecSchema, defaultDockerfileName } from "./moduleConfig.js"
import { buildContainer, getContainerBuildActionOutputs, getContainerBuildStatus } from "./build.js"
import {
buildContainer,
getContainerBuildActionOutputs,
getContainerBuildStatus,
validateContainerBuild,
} from "./build.js"
import type { ConfigureModuleParams } from "../../plugin/handlers/Module/configure.js"
import { dedent, naturalList } from "../../util/string.js"
import type { Provider, BaseProviderConfig } from "../../config/provider.js"
Expand All @@ -43,10 +48,14 @@ import type { Resolved } from "../../actions/types.js"
import { getDeployedImageId } from "../kubernetes/container/util.js"
import type { DeepPrimitiveMap } from "../../config/common.js"
import { joi } from "../../config/common.js"
import { DEFAULT_DEPLOY_TIMEOUT_SEC } from "../../constants.js"
import { DEFAULT_DEPLOY_TIMEOUT_SEC, gardenEnv } from "../../constants.js"
import type { ExecBuildConfig } from "../exec/build.js"
import type { PluginToolSpec } from "../../plugin/tools.js"

export const CONTAINER_STATUS_CONCURRENCY_LIMIT = gardenEnv.GARDEN_HARD_CONCURRENCY_LIMIT
export const CONTAINER_BUILD_CONCURRENCY_LIMIT_LOCAL = 5
export const CONTAINER_BUILD_CONCURRENCY_LIMIT_CLOUD_BUILDER = 20

export interface ContainerProviderConfig extends BaseProviderConfig {
dockerBuildExtraFlags?: string[]
gardenCloudBuilder?: {
Expand Down Expand Up @@ -532,6 +541,7 @@ export const gardenPlugin = () =>
build: buildContainer,
getStatus: getContainerBuildStatus,
publish: publishContainerBuild,
validate: validateContainerBuild,
},
},
],
Expand Down
34 changes: 34 additions & 0 deletions core/src/plugins/kubernetes/container/extensions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@ import type {
ContainerRunAction,
ContainerTestAction,
} from "../../container/config.js"
import {
CONTAINER_BUILD_CONCURRENCY_LIMIT_CLOUD_BUILDER,
CONTAINER_BUILD_CONCURRENCY_LIMIT_LOCAL,
CONTAINER_STATUS_CONCURRENCY_LIMIT,
} from "../../container/container.js"
import type { ContainerBuildMode, KubernetesPluginContext, KubernetesProvider } from "../config.js"
import {
CONTAINER_BUILD_CONCURRENCY_LIMIT_REMOTE_KUBERNETES,
CONTAINER_STATUS_CONCURRENCY_LIMIT_REMOTE_KUBERNETES,
} from "../kubernetes.js"
import { getPortForwardHandler } from "../port-forward.js"
import { k8sGetRunResult } from "../run-results.js"
import { k8sGetTestResult } from "../test-results.js"
Expand Down Expand Up @@ -66,6 +75,20 @@ export const k8sContainerBuildExtension = (): BuildActionExtension<ContainerBuil
}
},

validate: async ({ ctx, action }) => {
const provider = ctx.provider as KubernetesProvider

// override build task status concurrency
if (provider.config.deploymentRegistry) {
action.statusConcurrencyLimit = CONTAINER_STATUS_CONCURRENCY_LIMIT_REMOTE_KUBERNETES
} else {
// if there's no deployment registry, we are building locally.
action.statusConcurrencyLimit = CONTAINER_STATUS_CONCURRENCY_LIMIT
}

return {}
},

build: async (params) => {
const { ctx, action } = params

Expand All @@ -80,6 +103,17 @@ export const k8sContainerBuildExtension = (): BuildActionExtension<ContainerBuil

getStatus: async (params) => {
const { ctx, action } = params
const provider = ctx.provider as KubernetesProvider

// override build task execute concurrency
if (await cloudbuilder.isConfiguredAndAvailable(ctx, action)) {
action.executeConcurrencyLimit = CONTAINER_BUILD_CONCURRENCY_LIMIT_CLOUD_BUILDER
} else if (provider.config.buildMode === "local-docker") {
action.executeConcurrencyLimit = CONTAINER_BUILD_CONCURRENCY_LIMIT_LOCAL
} else {
// build mode is remote
action.executeConcurrencyLimit = CONTAINER_BUILD_CONCURRENCY_LIMIT_REMOTE_KUBERNETES
}

const buildMode = await getBuildMode({
ctx,
Expand Down
3 changes: 3 additions & 0 deletions core/src/plugins/kubernetes/kubernetes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ import { kubernetesPodRunDefinition, kubernetesPodTestDefinition } from "./kuber
import { kubernetesExecRunDefinition, kubernetesExecTestDefinition } from "./kubernetes-type/kubernetes-exec.js"
import { makeDocsLinkPlain, makeDocsLinkStyled } from "../../docs/common.js"

export const CONTAINER_BUILD_CONCURRENCY_LIMIT_REMOTE_KUBERNETES = 5
export const CONTAINER_STATUS_CONCURRENCY_LIMIT_REMOTE_KUBERNETES = 20

export async function configureProvider({
namespace,
projectName,
Expand Down
26 changes: 24 additions & 2 deletions core/src/tasks/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,19 @@ interface TaskEvents<O extends ValidResultType> {
export abstract class BaseTask<O extends ValidResultType = ValidResultType> extends TypedEventEmitter<TaskEvents<O>> {
abstract type: string

// How many tasks of this exact type are allowed to run concurrently
concurrencyLimit = 10
/**
* How many execute task nodes of this exact type are allowed to run concurrently
*
* Children can override this to set a custom concurrency limit.
*/
abstract executeConcurrencyLimit: number

/**
* How many get-status task nodes of this exact type are allowed to run concurrently
*
* Children can override this to set a custom concurrency limit.
*/
abstract statusConcurrencyLimit: number

public readonly garden: Garden
public readonly log: Log
Expand Down Expand Up @@ -624,6 +635,17 @@ export abstract class ExecuteActionTask<
},
> extends BaseActionTask<T, O & ExecuteActionOutputs<T>> {
override executeTask = true
protected defaultExecuteConcurrencyLimit = 10
protected defaultStatusConcurrencyLimit = 10

override get executeConcurrencyLimit(): number {
return this.action.executeConcurrencyLimit || this.defaultExecuteConcurrencyLimit
}

override get statusConcurrencyLimit(): number {
return this.action.statusConcurrencyLimit || this.defaultStatusConcurrencyLimit
}

abstract override type: Lowercase<T["kind"]>

abstract override getStatus(params: ActionTaskStatusParams<T>): Promise<O & ExecuteActionOutputs<T>>
Expand Down
3 changes: 2 additions & 1 deletion core/src/tasks/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import { wrapActiveSpan } from "../util/open-telemetry/spans.js"
@Profile()
export class BuildTask extends ExecuteActionTask<BuildAction, BuildStatus> {
type = "build" as const
override concurrencyLimit = 5
override defaultStatusConcurrencyLimit = 5
override defaultExecuteConcurrencyLimit = 5
eventName = "buildStatus" as const

getDescription() {
Expand Down
4 changes: 3 additions & 1 deletion core/src/tasks/delete-deploy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ export interface DeleteDeployTaskParams extends BaseActionTaskParams<DeployActio

export class DeleteDeployTask extends BaseActionTask<DeployAction, DeployStatus> {
type = "delete-deploy"
override concurrencyLimit = 10
override executeConcurrencyLimit = 10
override statusConcurrencyLimit = 10

dependantsFirst: boolean
deleteDeployNames: string[]

Expand Down
3 changes: 2 additions & 1 deletion core/src/tasks/deploy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ function printIngresses(status: DeployStatus, log: ActionLog) {
@Profile()
export class DeployTask extends ExecuteActionTask<DeployAction, DeployStatus> {
type = "deploy" as const
override concurrencyLimit = 10
override defaultStatusConcurrencyLimit = 10
override defaultExecuteConcurrencyLimit = 10

events?: PluginEventBroker
startSync: boolean
Expand Down
3 changes: 2 additions & 1 deletion core/src/tasks/publish.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ export interface PublishTaskParams extends BaseActionTaskParams<BuildAction> {

export class PublishTask extends BaseActionTask<BuildAction, PublishActionResult> {
type = "publish"
override concurrencyLimit = 5
override executeConcurrencyLimit = 5
override statusConcurrencyLimit = 5

/**
* Only defined if --tag option is used in the garden publish command.
Expand Down
Loading

0 comments on commit 4717da8

Please sign in to comment.