Skip to content

Commit

Permalink
fix: Use GetCG to retrieve InstanceView info (#476)
Browse files Browse the repository at this point in the history
Signed-off-by: Heba Elayoty <hebaelayoty@gmail.com>
  • Loading branch information
helayoty authored Mar 3, 2023
1 parent 8df3ef3 commit 095e7a7
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 73 deletions.
28 changes: 14 additions & 14 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ require (
github.com/golang/mock v1.6.0
github.com/google/uuid v1.1.2
github.com/gorilla/mux v1.7.3
github.com/gorilla/websocket v1.4.1
github.com/gorilla/websocket v1.4.2
github.com/onsi/gomega v1.16.0
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pkg/errors v0.9.1
Expand All @@ -30,9 +30,9 @@ require (
go.opencensus.io v0.22.3
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
gotest.tools v2.2.0+incompatible
k8s.io/api v0.20.0
k8s.io/apimachinery v0.20.0
k8s.io/client-go v0.20.0
k8s.io/api v0.21.0
k8s.io/apimachinery v0.21.0
k8s.io/client-go v0.21.0
)

require (
Expand All @@ -52,8 +52,7 @@ require (
github.com/census-instrumentation/opencensus-proto v0.2.1 // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/docker/spdystream v0.0.0-20170912183627-bc6354cbbc29 // indirect
github.com/go-logr/logr v0.3.0 // indirect
github.com/go-logr/logr v0.4.0 // indirect
github.com/go-openapi/jsonpointer v0.19.3 // indirect
github.com/go-openapi/jsonreference v0.19.3 // indirect
github.com/go-openapi/spec v0.19.3 // indirect
Expand All @@ -65,7 +64,7 @@ require (
github.com/google/go-cmp v0.5.5 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/googleapis/gnostic v0.5.1 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.9.5 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.14.6 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/imdario/mergo v0.3.10 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
Expand All @@ -75,6 +74,7 @@ require (
github.com/mailru/easyjson v0.7.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/moby/spdystream v0.2.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4 // indirect
Expand All @@ -91,22 +91,22 @@ require (
golang.org/x/sys v0.5.0 // indirect
golang.org/x/term v0.5.0 // indirect
golang.org/x/text v0.7.0 // indirect
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e // indirect
google.golang.org/api v0.20.0 // indirect
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba // indirect
google.golang.org/api v0.25.0 // indirect
google.golang.org/appengine v1.6.6 // indirect
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
google.golang.org/grpc v1.27.1 // indirect
google.golang.org/genproto v0.0.0-20200527145253-8367513e4ece // indirect
google.golang.org/grpc v1.29.1 // indirect
google.golang.org/protobuf v1.26.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20220521103104-8f96da9f5d5e // indirect
k8s.io/apiserver v0.19.10 // indirect
k8s.io/component-base v0.19.10 // indirect
k8s.io/klog v1.0.0 // indirect
k8s.io/klog/v2 v2.4.0 // indirect
k8s.io/kube-openapi v0.0.0-20201113171705-d219536bb9fd // indirect
k8s.io/klog/v2 v2.8.0 // indirect
k8s.io/kube-openapi v0.0.0-20210305001622-591a79e4bda7 // indirect
k8s.io/utils v0.0.0-20201110183641-67b214c5f920 // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.15 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.0.3 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.1.0 // indirect
sigs.k8s.io/yaml v1.2.0 // indirect
)
87 changes: 60 additions & 27 deletions go.sum

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion pkg/client/client_apis.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/pkg/errors"
"github.com/virtual-kubelet/azure-aci/pkg/auth"
"github.com/virtual-kubelet/azure-aci/pkg/validation"
"github.com/virtual-kubelet/virtual-kubelet/errdefs"
"github.com/virtual-kubelet/virtual-kubelet/log"
"github.com/virtual-kubelet/virtual-kubelet/node/api"
"github.com/virtual-kubelet/virtual-kubelet/trace"
Expand Down Expand Up @@ -105,8 +106,9 @@ func (a *AzClientsAPIs) GetContainerGroup(ctx context.Context, resourceGroup, co
if err != nil {
if rawResponse.StatusCode == http.StatusNotFound {
logger.Errorf("failed to query Container Group %s, not found", containerGroupName)
return nil, err
return nil, errdefs.NotFound("cg is not found")
}
logger.Errorf("an error has occurred while getting container group info %s, status code %d", containerGroupName, rawResponse.StatusCode)
return nil, err
}

Expand Down Expand Up @@ -154,6 +156,9 @@ func (a *AzClientsAPIs) GetContainerGroupInfo(ctx context.Context, resourceGroup

response, err := a.ContainerGroupClient.Get(ctxWithResp, resourceGroup, cgName, nil)
if err != nil {
if rawResponse != nil && rawResponse.StatusCode == http.StatusNotFound {
return nil, errdefs.NotFound("cg is not found")
}
logger.Errorf("an error has occurred while getting container group info %s, status code %d", cgName, rawResponse.StatusCode)
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/featureflag/feature_flag.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
)

const (
InitContainerFeature = "init-container"
InitContainerFeature = "init-container"
ConfidentialComputeFeature = "confidential-compute"
)

Expand Down
63 changes: 50 additions & 13 deletions pkg/provider/aci.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ const (
)

const (
confidentialComputeSkuLabel = "virtual-kubelet.io/container-sku"
confidentialComputeSkuLabel = "virtual-kubelet.io/container-sku"
confidentialComputeCcePolicyLabel = "virtual-kubelet.io/confidential-compute-cce-policy"
)

Expand Down Expand Up @@ -680,25 +680,62 @@ func (p *ACIProvider) GetPods(ctx context.Context) ([]*v1.Pod, error) {
pods := make([]*v1.Pod, 0, len(cgs))

for cgIndex := range cgs {
err = validation.ValidateContainerGroup(ctx, cgs[cgIndex])
cgName := cgs[cgIndex].Name
if cgName == nil {
continue
}
// The GetContainerGroupListResult API doesn't return InstanceView status which can cause nil.
// For that, we had to get the CG info one more time.
cg, err := p.azClientsAPIs.GetContainerGroup(ctx, p.resourceGroup, *cgName)
// CG might get deleted between the getlist and get calls
if errdefs.IsNotFound(err) || cg == nil {
continue
}
if err != nil {
return nil, err
log.G(ctx).WithFields(log.Fields{
"name": *cgName,
"id": *cg.ID,
}).WithError(err).Errorf("error getting container group %s", *cgName)
continue
}

if cgs[cgIndex].Tags["NodeName"] != &p.nodeName {
err2 := validation.ValidateContainerGroup(ctx, cg)
if err2 != nil {
log.G(ctx).WithFields(log.Fields{
"name": *cgName,
"id": *cg.ID,
}).WithError(err2).Errorf("error validating container group %s", *cgName)
continue
}

pod, err := p.containerGroupToPod(ctx, cgs[cgIndex])
if err != nil {
if cg.Tags != nil && cg.Tags["NodeName"] != nil {
if *cg.Tags["NodeName"] != p.nodeName {
log.G(ctx).WithFields(log.Fields{
"name": *cgName,
"id": *cg.ID,
}).Warnf("container group %s node name does not match %s", *cgName, p.nodeName)
continue
}
} else {
log.G(ctx).WithFields(log.Fields{
"name": cgs[cgIndex].Name,
"id": cgs[cgIndex].ID,
}).WithError(err).Errorf("error converting container group %s to pod", cgs[cgIndex].Name)
"name": *cgName,
"id": *cg.ID,
}).Warnf("container group %s node name should not be nil", *cgName)
continue
}

pod, err3 := p.containerGroupToPod(ctx, cg)
if err3 != nil {
log.G(ctx).WithFields(log.Fields{
"name": *cgName,
"id": *cg.ID,
}).WithError(err3).Errorf("error converting container group %s to pod", *cgName)
continue
}
pods = append(pods, pod)

if pod != nil {
pods = append(pods, pod)
}
}

return pods, nil
Expand Down Expand Up @@ -1135,14 +1172,14 @@ func (p *ACIProvider) setConfidentialComputeProperties(ctx context.Context, pod
if ccePolicy != "" {
cg.Properties.SKU = &confidentialSku
confidentialComputeProperties := azaciv2.ConfidentialComputeProperties{
CcePolicy : &ccePolicy,
CcePolicy: &ccePolicy,
}
cg.Properties.ConfidentialComputeProperties = &confidentialComputeProperties
l.Infof("setting confidential compute properties with CCE Policy")
l.Infof("setting confidential compute properties with CCE Policy")

} else if strings.ToLower(containerGroupSku) == "confidential" {
cg.Properties.SKU = &confidentialSku
l.Infof("setting confidential container group SKU")
l.Infof("setting confidential container group SKU")
}

l.Infof("no annotations for confidential SKU")
Expand Down
8 changes: 4 additions & 4 deletions pkg/provider/aci_confidential_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import (
func TestCreatePodWithConfidentialComputeProperties(t *testing.T) {

initContainerName1 := "init-container-1"
ccePolicyString := "eyJhbGxvd19hbGwiOiB0cnVlLCAiY29udGFpbmVycyI6IHsibGVuZ3RoIjogMCwgImVsZW1lbnRzIjogbnVsbH19"
mockCtrl := gomock.NewController(t)
ccePolicyString := "eyJhbGxvd19hbGwiOiB0cnVlLCAiY29udGFpbmVycyI6IHsibGVuZ3RoIjogMCwgImVsZW1lbnRzIjogbnVsbH19"
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
aciMocks := createNewACIMock()
aciMocks.MockCreateContainerGroup = func(ctx context.Context, resourceGroup, podNS, podName string, cg *azaciv2.ContainerGroup) error {
Expand All @@ -32,7 +32,7 @@ func TestCreatePodWithConfidentialComputeProperties(t *testing.T) {
assert.Check(t, cg != nil, "Container group is nil")
assert.Check(t, containers != nil, "Containers should not be nil")
assert.Check(t, initContainers != nil, "Container group is nil")
if (len(initContainers) > 0) {
if len(initContainers) > 0 {
assert.Check(t, is.Equal(len(containers), 2), "2 Containers are expected")
assert.Check(t, is.Equal(len(initContainers), 1), "2 init containers are expected")
assert.Check(t, initContainers[0].Properties.VolumeMounts != nil, "Volume mount should be present")
Expand All @@ -41,7 +41,7 @@ func TestCreatePodWithConfidentialComputeProperties(t *testing.T) {
assert.Check(t, initContainers[0].Properties.Image != nil, "Image should be present")
assert.Check(t, *initContainers[0].Name == initContainerName1, "Name should be correct")
}
if (confidentialComputeProperties != nil) {
if confidentialComputeProperties != nil {
assert.Check(t, confidentialComputeProperties.CcePolicy != nil, "CCE policy should not be nil")
assert.Check(t, *confidentialComputeProperties.CcePolicy == ccePolicyString, "CCE policy should match")
}
Expand Down
30 changes: 26 additions & 4 deletions pkg/provider/aci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,30 @@ func TestGetPodsWithoutResourceRequestsLimits(t *testing.T) {
result = append(result, cg)
return result, nil
}
aciMocks.MockGetContainerGroupInfo =
func(ctx context.Context, resourceGroup, namespace, name, nodeName string) (*azaciv2.ContainerGroup, error) {
node := fakeNodeName
provisioning := "Creating"
return &azaciv2.ContainerGroup{
ID: &cgName,
Name: &cgName,
Tags: map[string]*string{
"CreationTimestamp": &creationTime,
"PodName": &cgName,
"Namespace": &cgName,
"ClusterName": &node,
"NodeName": &node,
"UID": &cgName,
},
Properties: &azaciv2.ContainerGroupPropertiesProperties{
ProvisioningState: &provisioning,
Containers: testsutil.CreateACIContainersListObj(runningState, "Initializing", testsutil.CgCreationTime.Add(time.Second*2), testsutil.CgCreationTime.Add(time.Second*3), true, false, false),
InstanceView: &azaciv2.ContainerGroupPropertiesInstanceView{
State: &runningState,
},
},
}, nil
}

provider, err := createTestProvider(aciMocks, nil)
if err != nil {
Expand Down Expand Up @@ -448,7 +472,7 @@ func TestGetPodWithoutResourceRequestsLimits(t *testing.T) {
testsutil.CreateACIContainersListObj(runningState, "Initializing",
testsutil.CgCreationTime.Add(time.Second*2),
testsutil.CgCreationTime.Add(time.Second*3),
false, false, false), "Succeeded"), nil
true, true, true), "Succeeded"), nil
}

aciMocks.MockGetContainerGroupList = func(ctx context.Context, resourceGroup string) ([]*azaciv2.ContainerGroup, error) {
Expand Down Expand Up @@ -584,7 +608,6 @@ func createTestProvider(aciMocks *MockACIProvider, resourceManager *manager.Reso
if err != nil {
return nil, err
}

err = os.Setenv("ACI_RESOURCE_GROUP", fakeResourceGroup)
if err != nil {
return nil, err
Expand Down Expand Up @@ -958,7 +981,6 @@ func TestCreatedPodWithContainerPort(t *testing.T) {

err = provider.CreatePod(context.Background(), pod)
assert.Check(t, err == nil, "Not expected to return error")

})
}
}
Expand Down Expand Up @@ -986,7 +1008,7 @@ func TestGetPodWithContainerID(t *testing.T) {
aciMocks.MockGetContainerGroupInfo = func(ctx context.Context, resourceGroup, namespace, name, nodeName string) (*azaciv2.ContainerGroup, error) {

cg := testsutil.CreateContainerGroupObj(podName, podNamespace, "Succeeded",
testsutil.CreateACIContainersListObj(runningState, "Initializing", testsutil.CgCreationTime.Add(time.Second*2), testsutil.CgCreationTime.Add(time.Second*3), false, false, false), "Succeeded")
testsutil.CreateACIContainersListObj(runningState, "Initializing", testsutil.CgCreationTime.Add(time.Second*2), testsutil.CgCreationTime.Add(time.Second*3), true, true, true), "Succeeded")
cgID = *cg.ID
return cg, nil
}
Expand Down
19 changes: 14 additions & 5 deletions pkg/provider/containergroup_to_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,23 @@ import (
"github.com/virtual-kubelet/azure-aci/pkg/tests"
"github.com/virtual-kubelet/azure-aci/pkg/util"
"github.com/virtual-kubelet/azure-aci/pkg/validation"
errdef "github.com/virtual-kubelet/virtual-kubelet/errdefs"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func (p *ACIProvider) containerGroupToPod(ctx context.Context, cg *azaciv2.ContainerGroup) (*v1.Pod, error) {
//cg is validated
pod, err := p.resourceManager.GetPod(*cg.Name, *cg.Tags["Namespace"])
pod, err := p.resourceManager.GetPod(*cg.Tags["PodName"], *cg.Tags["Namespace"])
// in case pod got deleted, we want to continue the workflow to kick off clean dangling pods
if errdef.IsNotFound(err) || pod == nil {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: *cg.Tags["PodName"],
Namespace: *cg.Tags["Namespace"],
},
}, nil
}
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -220,17 +230,16 @@ func getACIResourceMetaFromContainerGroup(cg *azaciv2.ContainerGroup) (*string,
// Use the Provisioning State if it's not Succeeded,
// otherwise use the state of the instance.
aciState := cg.Properties.ProvisioningState
if *aciState == "Succeeded" {
if aciState != nil && (*aciState == "Succeeded") {
aciState = cg.Properties.InstanceView.State
}

var creationTime time.Time

// cg tags is validated
ts := *cg.Tags["CreationTimestamp"]

if ts != "" {
t, err := time.Parse(tests.TimeLayout, ts)
if cg.Tags != nil && cg.Tags["CreationTimestamp"] != nil {
t, err := time.Parse(tests.TimeLayout, *cg.Tags["CreationTimestamp"])
if err != nil {
return nil, time.Now(), errors.Errorf("unable to parse the creation timestamp for container group %s", *cg.Name)
}
Expand Down
12 changes: 8 additions & 4 deletions pkg/validation/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ func ValidateContainer(ctx context.Context, container *azaciv2.Container) error
if container.Name == nil {
return errors.Errorf("container name cannot be nil")
}
if container.Properties == nil {
return errors.Errorf("container %s properties cannot be nil", *container.Name)
}
if container.Properties.Ports == nil {
return errors.Errorf("container %s Ports cannot be nil", *container.Name)
}
if container.Properties.Image == nil {
return errors.Errorf("container %s Image cannot be nil", *container.Name)
}
if container.Properties == nil {
return errors.Errorf("container %s properties cannot be nil", *container.Name)
}
if container.Properties.InstanceView == nil {
return errors.Errorf("container %s properties InstanceView cannot be nil", *container.Name)
}
Expand Down Expand Up @@ -73,7 +73,11 @@ func ValidateContainerGroup(ctx context.Context, cg *azaciv2.ContainerGroup) err
if cg.Properties.OSType != nil &&
*cg.Properties.OSType != azaciv2.OperatingSystemTypesWindows {
if cg.Properties.IPAddress == nil {
return errors.Errorf("IPAddress cannot be nil for container group %s", *cg.Name)
// In some use cases, ACI sets IPAddress as nil which can cause issues. We have to patch the struct to make the workflow continue.
emptyIP := ""
cg.Properties.IPAddress = &azaciv2.IPAddress{
IP: &emptyIP,
}
} else {
if cg.Properties.ProvisioningState == nil {
return errors.Errorf("ProvisioningState cannot be nil for container group %s", *cg.Name)
Expand Down

0 comments on commit 095e7a7

Please sign in to comment.