Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expedited image manifest digest reporting #4177

Merged
merged 4 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ additional details on each available environment variable.
| `ECS_APPARMOR_CAPABLE` | `true` | Whether AppArmor is available on the container instance. | `false` | `false` |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` | 10m | Default time to wait to delete containers for a stopped task (see also `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER`). If set to less than 1 second, the value is ignored. | 3h | 3h |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER` | 1h | Jitter value for the task engine cleanup wait duration. When specified, the actual cleanup wait duration time for each task will be the duration specified in `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` plus a random duration between 0 and the jitter duration. | blank | blank |
| `ECS_MANIFEST_PULL_TIMEOUT` | 10m | Timeout before giving up on fetching image manifest for a container image. | 1m | 1m |
| `ECS_CONTAINER_STOP_TIMEOUT` | 10m | Instance scoped configuration for time to wait for the container to exit normally before being forcibly killed. | 30s | 30s |
| `ECS_CONTAINER_START_TIMEOUT` | 10m | Timeout before giving up on starting a container. | 3m | 8m |
| `ECS_CONTAINER_CREATE_TIMEOUT` | 10m | Timeout before giving up on creating a container. Minimum value is 1m. If user sets a value below minimum it will be set to min. | 4m | 4m |
Expand Down
15 changes: 15 additions & 0 deletions agent/api/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

resourcestatus "github.com/aws/amazon-ecs-agent/agent/taskresource/status"
referenceutil "github.com/aws/amazon-ecs-agent/agent/utils/reference"
apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status"
apierrors "github.com/aws/amazon-ecs-agent/ecs-agent/api/errors"
"github.com/aws/amazon-ecs-agent/ecs-agent/credentials"
Expand Down Expand Up @@ -1521,3 +1522,17 @@ func (c *Container) GetImageName() string {
containerImage := strings.Split(c.Image, ":")[0]
return containerImage
}

// Checks if the container has a resolved image manifest digest.
// Always returns false for internal containers as those are out-of-scope of digest resolution.
func (c *Container) DigestResolved() bool {
return !c.IsInternal() && c.GetImageDigest() != ""
}

// Checks if the container's image requires manifest digest resolution.
// Manifest digest resolution is required if the container's image reference does not
// have a digest.
// Always returns false for internal containers as those are out-of-scope of digest resolution.
func (c *Container) DigestResolutionRequired() bool {
return !c.IsInternal() && referenceutil.GetDigestFromImageRef(c.Image) == ""
}
25 changes: 25 additions & 0 deletions agent/api/container/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1342,3 +1342,28 @@ func getContainer(hostConfig string, credentialSpecs []string) *Container {
c.CredentialSpecs = credentialSpecs
return c
}

func TestDigestResolved(t *testing.T) {
t.Run("never resolved for internal container", func(t *testing.T) {
assert.False(t, (&Container{Type: ContainerServiceConnectRelay}).DigestResolved())
})
t.Run("digest resolved if it is populated", func(t *testing.T) {
assert.True(t, (&Container{ImageDigest: "digest"}).DigestResolved())
})
t.Run("digest not resolved if it is not populated", func(t *testing.T) {
assert.False(t, (&Container{}).DigestResolved())
})
}

func TestDigestResolutionRequired(t *testing.T) {
t.Run("never required for internal containers", func(t *testing.T) {
assert.False(t, (&Container{Type: ContainerServiceConnectRelay}).DigestResolutionRequired())
})
t.Run("required if not found in image reference", func(t *testing.T) {
assert.True(t, (&Container{Image: "alpine"}).DigestResolutionRequired())
})
t.Run("not required if found in image reference", func(t *testing.T) {
image := "ubuntu@sha256:ed6d2c43c8fbcd3eaa44c9dab6d94cb346234476230dc1681227aa72d07181ee"
assert.False(t, (&Container{Image: image}).DigestResolutionRequired())
})
}
77 changes: 51 additions & 26 deletions agent/api/statechange.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func NewTaskStateChangeEvent(task *apitask.Task, reason string) (TaskStateChange
return event, ErrShouldNotSendEvent{task.Arn}
}
taskKnownStatus := task.GetKnownStatus()
if !taskKnownStatus.BackendRecognized() {
if taskKnownStatus != apitaskstatus.TaskManifestPulled && !taskKnownStatus.BackendRecognized() {
return event, errors.Errorf(
"create task state change event api: status not recognized by ECS: %v",
taskKnownStatus)
Expand All @@ -140,6 +140,14 @@ func NewTaskStateChangeEvent(task *apitask.Task, reason string) (TaskStateChange
"create task state change event api: status [%s] already sent",
taskKnownStatus.String())
}
if taskKnownStatus == apitaskstatus.TaskManifestPulled && !task.HasAContainerWithResolvedDigest() {
return event, ErrShouldNotSendEvent{
fmt.Sprintf(
"create task state change event api: status %s not eligible for backend reporting as"+
" no digests were resolved",
apitaskstatus.TaskManifestPulled.String()),
}
}

event = TaskStateChange{
TaskARN: task.Arn,
Expand All @@ -161,11 +169,21 @@ func NewContainerStateChangeEvent(task *apitask.Task, cont *apicontainer.Contain
return event, err
}
contKnownStatus := cont.GetKnownStatus()
if !contKnownStatus.ShouldReportToBackend(cont.GetSteadyStateStatus()) {
if contKnownStatus != apicontainerstatus.ContainerManifestPulled &&
!contKnownStatus.ShouldReportToBackend(cont.GetSteadyStateStatus()) {
return event, ErrShouldNotSendEvent{fmt.Sprintf(
"create container state change event api: status not recognized by ECS: %v",
contKnownStatus)}
}
if contKnownStatus == apicontainerstatus.ContainerManifestPulled && !cont.DigestResolved() {
// Transition to MANIFEST_PULLED state is sent to the backend only to report a resolved
// image manifest digest. No need to generate an event if the digest was not resolved
// which could happen due to various reasons.
return event, ErrShouldNotSendEvent{fmt.Sprintf(
"create container state change event api:"+
" no need to send %s event as no resolved digests were found",
apicontainerstatus.ContainerManifestPulled.String())}
}
if cont.GetSentStatus() >= contKnownStatus {
return event, ErrShouldNotSendEvent{fmt.Sprintf(
"create container state change event api: status [%s] already sent for container %s, task %s",
Expand Down Expand Up @@ -196,7 +214,7 @@ func newUncheckedContainerStateChangeEvent(task *apitask.Task, cont *apicontaine
TaskArn: task.Arn,
ContainerName: cont.Name,
RuntimeID: cont.GetRuntimeID(),
Status: contKnownStatus.BackendStatus(cont.GetSteadyStateStatus()),
Status: containerStatusChangeStatus(contKnownStatus, cont.GetSteadyStateStatus()),
ExitCode: cont.GetKnownExitCode(),
PortBindings: portBindings,
ImageDigest: cont.GetImageDigest(),
Expand All @@ -206,6 +224,27 @@ func newUncheckedContainerStateChangeEvent(task *apitask.Task, cont *apicontaine
return event, nil
}

// Maps container known status to a suitable status for ContainerStateChange.
//
// Returns ContainerRunning if known status matches steady state status,
// returns knownStatus if it is ContainerManifestPulled or ContainerStopped,
// returns ContainerStatusNone for all other cases.
func containerStatusChangeStatus(
knownStatus apicontainerstatus.ContainerStatus,
steadyStateStatus apicontainerstatus.ContainerStatus,
) apicontainerstatus.ContainerStatus {
switch knownStatus {
case steadyStateStatus:
return apicontainerstatus.ContainerRunning
case apicontainerstatus.ContainerManifestPulled:
return apicontainerstatus.ContainerManifestPulled
case apicontainerstatus.ContainerStopped:
return apicontainerstatus.ContainerStopped
default:
return apicontainerstatus.ContainerStatusNone
}
}

// NewManagedAgentChangeEvent creates a new managedAgent change event to convey managed agent state changes
// returns error if the state change doesn't need to be sent to the ECS backend.
func NewManagedAgentChangeEvent(task *apitask.Task, cont *apicontainer.Container, managedAgentName string, reason string) (ManagedAgentStateChange, error) {
Expand Down Expand Up @@ -322,24 +361,6 @@ func (change *TaskStateChange) SetTaskTimestamps() {
}
}

// ShouldBeReported checks if the statechange should be reported to backend
func (change *TaskStateChange) ShouldBeReported() bool {
// Events that should be reported:
// 1. Normal task state change: RUNNING/STOPPED
// 2. Container state change, with task status in CREATED/RUNNING/STOPPED
// The task timestamp will be sent in both of the event type
// TODO Move the Attachment statechange check into this method
if change.Status == apitaskstatus.TaskRunning || change.Status == apitaskstatus.TaskStopped {
return true
}

if len(change.Containers) != 0 {
return true
}

return false
}

func (change *TaskStateChange) ToFields() logger.Fields {
fields := logger.Fields{
"eventType": "TaskStateChange",
Expand Down Expand Up @@ -494,19 +515,23 @@ func buildContainerStateChangePayload(change ContainerStateChange) (*ecsmodel.Co
statechange.ImageDigest = aws.String(change.ImageDigest)
}

stat := change.Status.String()
if stat != apicontainerstatus.ContainerStopped.String() && stat != apicontainerstatus.ContainerRunning.String() {
// TODO: This check already exists in NewContainerStateChangeEvent and shouldn't be repeated here; remove after verifying
stat := change.Status
if stat != apicontainerstatus.ContainerManifestPulled &&
stat != apicontainerstatus.ContainerStopped &&
stat != apicontainerstatus.ContainerRunning {
logger.Warn("Not submitting unsupported upstream container state", logger.Fields{
field.Status: stat,
field.ContainerName: change.ContainerName,
field.TaskARN: change.TaskArn,
})
return nil, nil
}
if stat == "DEAD" {
stat = apicontainerstatus.ContainerStopped.String()
// TODO: This check is probably redundant as String() method never returns "DEAD"; remove after verifying
if stat.String() == "DEAD" {
stat = apicontainerstatus.ContainerStopped
}
statechange.Status = aws.String(stat)
statechange.Status = aws.String(stat.BackendStatusString())

if change.ExitCode != nil {
exitCode := int64(aws.IntValue(change.ExitCode))
Expand Down
Loading
Loading