diff --git a/pkg/autoscaler/scaling/autoscaler.go b/pkg/autoscaler/scaling/autoscaler.go index 01f815f5d942..73144843f3cc 100644 --- a/pkg/autoscaler/scaling/autoscaler.go +++ b/pkg/autoscaler/scaling/autoscaler.go @@ -169,7 +169,13 @@ func (a *autoscaler) Scale(logger *zap.SugaredLogger, now time.Time) ScaleResult } else { logger.Errorw("Failed to obtain metrics", zap.Error(err)) } - return invalidSR + if errors.Is(err, metrics.ErrNoData) && !a.panicTime.IsZero() && a.panicTime.Add(spec.StableWindow).Before(now) { + // We are not receiving data, but we panicked before, and the panic stable window has gone, we want to continue. + // We want to un-panic and scale down to 0 if the rest of the conditions allow it + logger.Debug("No data to scale on yet, and panicked before, but the stable window has gone, we want to continue") + } else { + return invalidSR + } } // Make sure we don't get stuck with the same number of pods, if the scale up rate diff --git a/pkg/reconciler/autoscaling/kpa/scaler.go b/pkg/reconciler/autoscaling/kpa/scaler.go index 5cb95f2fc719..245148f60bc8 100644 --- a/pkg/reconciler/autoscaling/kpa/scaler.go +++ b/pkg/reconciler/autoscaling/kpa/scaler.go @@ -19,6 +19,7 @@ package kpa import ( "context" "fmt" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "net/http" "time" @@ -187,7 +188,9 @@ func (ks *scaler) handleScaleToZero(ctx context.Context, pa *autoscalingv1alpha1 } cfgD := cfgs.Deployment var activationTimeout time.Duration - if progressDeadline, ok := pa.ProgressDeadline(); ok { + if revTimeout, err := ks.revisionTimeout(ctx, pa); err == nil && revTimeout > 0 { + activationTimeout = time.Duration(revTimeout) * time.Second + } else if progressDeadline, ok := pa.ProgressDeadline(); ok { activationTimeout = progressDeadline + activationTimeoutBuffer } else { activationTimeout = cfgD.ProgressDeadline + activationTimeoutBuffer @@ -325,7 +328,6 @@ func (ks *scaler) applyScale(ctx context.Context, pa *autoscalingv1alpha1.PodAut func (ks *scaler) scale(ctx context.Context, pa *autoscalingv1alpha1.PodAutoscaler, sks *netv1alpha1.ServerlessService, desiredScale int32) (int32, error) { asConfig := config.FromContext(ctx).Autoscaler logger := logging.FromContext(ctx) - if desiredScale < 0 && !pa.Status.IsActivating() { logger.Debug("Metrics are not yet being collected.") return desiredScale, nil @@ -370,3 +372,20 @@ func (ks *scaler) scale(ctx context.Context, pa *autoscalingv1alpha1.PodAutoscal logger.Infof("Scaling from %d to %d", currentScale, desiredScale) return desiredScale, ks.applyScale(ctx, pa, desiredScale, ps) } +func (ks *scaler) revisionTimeout(ctx context.Context, pa *autoscalingv1alpha1.PodAutoscaler) (int64, error) { + gvr := schema.GroupVersionResource{ + Group: "serving.knative.dev", + Version: "v1", + Resource: "revisions", + } + uRevision, err := ks.dynamicClient.Resource(gvr).Namespace(pa.Namespace).Get(ctx, pa.Name, metav1.GetOptions{}) + if err == nil { + var timeoutSeconds interface{} + var found bool + timeoutSeconds, found, err = unstructured.NestedFieldNoCopy(uRevision.Object, "spec", "timeoutSeconds") + if err == nil && found { + return timeoutSeconds.(int64), nil + } + } + return -1, err +}