Skip to content

Commit

Permalink
feat: exclude evicted pod log from support bundle (#292)
Browse files Browse the repository at this point in the history
  • Loading branch information
Elsie4ever authored Aug 5, 2024
1 parent 8cd8913 commit 64c9d20
Show file tree
Hide file tree
Showing 16 changed files with 211 additions and 66 deletions.
3 changes: 3 additions & 0 deletions azext_edge/edge/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def load_iotops_help():
- {COMPAT_CLUSTER_CONFIG_APIS.as_str()}
- {COMPAT_DATAFLOW_APIS.as_str()}
Note: logs from evicted pod will not be captured, as they are inaccessible. For details
on why a pod was evicted, please refer to the related pod and node files.
examples:
- name: Basic usage with default options. This form of the command will auto detect IoT Operations APIs and build a suitable bundle
capturing the last 24 hours of container logs. The bundle will be produced in the current working directory.
Expand Down
29 changes: 18 additions & 11 deletions azext_edge/edge/providers/support/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import List, Dict, Optional, Iterable, Tuple, TypeVar, Union
from functools import partial

from azext_edge.edge.common import BundleResourceKind
from azext_edge.edge.common import BundleResourceKind, PodState
from knack.log import get_logger
from kubernetes.client.exceptions import ApiException
from kubernetes.client.models import (
Expand All @@ -34,6 +34,7 @@
generic = client.ApiClient()

DAY_IN_SECONDS: int = 60 * 60 * 24
POD_STATUS_FAILED_EVICTED: str = "evicted"

K8sRuntimeResources = TypeVar(
"K8sRuntimeResources",
Expand Down Expand Up @@ -137,17 +138,23 @@ def process_v1_pods(
init_pod_containers: List[V1Container] = pod_spec.init_containers
pod_containers.extend(init_pod_containers)

processed.extend(
_capture_pod_container_logs(
directory_path=directory_path,
pod_containers=pod_containers,
pod_name=pod_name,
pod_namespace=pod_namespace,
v1_api=v1_api,
since_seconds=since_seconds,
capture_previous_logs=capture_previous_logs,
# exclude evicted pods from log capture since they are not accessible
pod_status = pod.status
if pod_status and pod_status.phase == PodState.failed.value and\
str(pod_status.reason).lower() == POD_STATUS_FAILED_EVICTED:
logger.info(f"Pod {pod_name} in namespace {pod_namespace} is evicted. Skipping log capture.")
else:
processed.extend(
_capture_pod_container_logs(
directory_path=directory_path,
pod_containers=pod_containers,
pod_name=pod_name,
pod_namespace=pod_namespace,
v1_api=v1_api,
since_seconds=since_seconds,
capture_previous_logs=capture_previous_logs,
)
)
)

if include_metrics:
try:
Expand Down
4 changes: 3 additions & 1 deletion azext_edge/tests/edge/checks/int/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def assert_eval_core_service_runtime(
find_extra_or_missing_names(
resource_type="pods",
result_names=results,
expected_names=kubectl_pods.keys()
expected_names=kubectl_pods.keys(),
ignore_extras=True,
ignore_missing=True
)

for pod in kubectl_pods:
Expand Down
22 changes: 18 additions & 4 deletions azext_edge/tests/edge/support/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def add_pod_to_mocked_pods(
mocked_client, expected_pod_map, mock_names: List[str] = None, mock_init_containers: bool = False
):
from kubernetes.client.models import V1PodList, V1Pod, V1PodSpec, V1ObjectMeta, V1Container
from kubernetes.client.models import V1PodList, V1Pod, V1PodSpec, V1PodStatus, V1ObjectMeta, V1Container

current_pods = mocked_client.CoreV1Api().list_pod_for_all_namespaces.return_value
pod_list = current_pods.items
Expand All @@ -28,7 +28,8 @@ def add_pod_to_mocked_pods(
for pod_name in mock_names:
container_name = generate_random_string()
spec = V1PodSpec(containers=[V1Container(name=container_name)])
pod = V1Pod(metadata=V1ObjectMeta(namespace=namespace, name=pod_name), spec=spec)
status = V1PodStatus(phase="Running")
pod = V1Pod(metadata=V1ObjectMeta(namespace=namespace, name=pod_name), spec=spec, status=status)

if mock_init_containers:
pod.spec.init_containers = [V1Container(name="mock-init-container")]
Expand Down Expand Up @@ -163,7 +164,7 @@ def _handle_resource_call(*args, **kwargs):
# TODO - @digimaun make this more useful / flexible configuration.
@pytest.fixture
def mocked_list_pods(mocked_client):
from kubernetes.client.models import V1PodList, V1Pod, V1PodSpec, V1ObjectMeta, V1Container
from kubernetes.client.models import V1PodList, V1Pod, V1PodSpec, V1PodStatus, V1ObjectMeta, V1Container

expected_pod_map = {}
namespaces = [generate_random_string()]
Expand All @@ -175,10 +176,23 @@ def mocked_list_pods(mocked_client):
for pod_name in pod_names:
container_name = generate_random_string()
spec = V1PodSpec(containers=[V1Container(name=container_name)])
pod = V1Pod(metadata=V1ObjectMeta(namespace=namespace, name=pod_name), spec=spec)
status = V1PodStatus(phase="Running")
pod = V1Pod(metadata=V1ObjectMeta(namespace=namespace, name=pod_name), spec=spec, status=status)
pods.append(pod)
expected_pod_map[namespace][pod_name] = {container_name: mock_log}

# add evicted pod for testing
evicted_pod_name = "evicted_pod"
evicted_pod_spec = V1PodSpec(containers=[V1Container(name=generate_random_string())])
evicted_pod_status = V1PodStatus(phase="Failed", reason="Evicted")
evicted_pod = V1Pod(
metadata=V1ObjectMeta(namespace=namespace, name=evicted_pod_name),
spec=evicted_pod_spec,
status=evicted_pod_status
)
pods.append(evicted_pod)
expected_pod_map[namespace][evicted_pod_name] = {evicted_pod.spec.containers[0].name: mock_log}

pods_list = V1PodList(items=pods)
mocked_client.CoreV1Api().list_pod_for_all_namespaces.return_value = pods_list
mocked_client.CoreV1Api().read_namespaced_pod_log.return_value = mock_log
Expand Down
33 changes: 29 additions & 4 deletions azext_edge/tests/edge/support/create_bundle_int/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# ----------------------------------------------------------------------------------------------

from knack.log import get_logger
from typing import Dict, List, NamedTuple, Optional, Union
from typing import Dict, List, NamedTuple, Optional, Tuple, Union
from os import path
from zipfile import ZipFile
import pytest
Expand Down Expand Up @@ -137,6 +137,7 @@ def check_workload_resource_files(
file_objs: Dict[str, List[Dict[str, str]]],
expected_workload_types: List[str],
prefixes: Union[str, List[str]],
bundle_path: str,
optional_workload_types: Optional[List[str]] = None,
):
if "pod" in expected_workload_types:
Expand Down Expand Up @@ -169,7 +170,14 @@ def check_workload_resource_files(
converted_file[file["descriptor"]] = False

expected_pods = get_kubectl_workload_items(prefixes, service_type="pod")
find_extra_or_missing_names("pod", file_pods.keys(), expected_pods.keys())
check_log_for_evicted_pods(bundle_path, file_objs.get("pod", []))
find_extra_or_missing_names(
resource_type="pod",
result_names=file_pods.keys(),
expected_names=expected_pods.keys(),
ignore_extras=True,
ignore_missing=True
)

for name, files in file_pods.items():
for extension, value in files.items():
Expand All @@ -193,6 +201,23 @@ def _check_non_pod_files(workload_types: List[str], required: bool = False):
_check_non_pod_files(optional_workload_types, required=False)


def check_log_for_evicted_pods(bundle_dir: str, file_pods: List[Dict[str, str]]):
# open the file using bundle_dir and check for evicted pods
name_extension_pair = list(set([(file["name"], file["extension"]) for file in file_pods]))
# TODO: upcoming fix will get file content earlier
with ZipFile(bundle_dir, 'r') as zip:
file_names = zip.namelist()
for name, extension in name_extension_pair:
if extension == "log":
# find file path in file_names that has name and extension
file_path = next((file for file in file_names if file.endswith(name + ".yaml")), None)
if not file_path:
continue
with zip.open(file_path) as pod_content:
log_content = pod_content.read().decode("utf-8")
assert "Evicted" not in log_content, f"Evicted pod {name} log found in bundle."


def get_file_map(
walk_result: Dict[str, Dict[str, List[str]]],
ops_service: str,
Expand Down Expand Up @@ -286,7 +311,7 @@ def _get_namespace_determinating_files(
def run_bundle_command(
command: str,
tracked_files: List[str],
) -> Dict[str, Dict[str, List[str]]]:
) -> Tuple[Dict[str, Dict[str, List[str]]], str]:
result = run(command)
if not result:
pytest.skip("No bundle was created.")
Expand Down Expand Up @@ -324,7 +349,7 @@ def run_bundle_command(
# lastly add in the file (with the correct seperators)
walk_result[built_path]["files"].append(file_name)

return walk_result
return walk_result, result["bundlePath"]


def split_name(name: str) -> List[str]:
Expand Down
16 changes: 13 additions & 3 deletions azext_edge/tests/edge/support/create_bundle_int/test_akri_int.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from knack.log import get_logger
from azext_edge.edge.common import OpsServiceType
from azext_edge.edge.providers.edge_api import AKRI_API_V0
from .helpers import check_custom_resource_files, check_workload_resource_files, get_file_map, run_bundle_command
from .helpers import (
check_custom_resource_files,
check_workload_resource_files,
get_file_map,
run_bundle_command
)

logger = get_logger(__name__)

Expand All @@ -16,7 +21,7 @@ def test_create_bundle_akri(init_setup, tracked_files):
"""Test for ensuring file names and content. ONLY CHECKS AKRI."""
ops_service = OpsServiceType.akri.value
command = f"az iot ops support create-bundle --ops-service {ops_service}"
walk_result = run_bundle_command(command=command, tracked_files=tracked_files)
walk_result, bundle_path = run_bundle_command(command=command, tracked_files=tracked_files)
file_map = get_file_map(walk_result, ops_service)["aio"]

check_custom_resource_files(
Expand All @@ -28,4 +33,9 @@ def test_create_bundle_akri(init_setup, tracked_files):
expected_types = set(expected_workload_types).union(AKRI_API_V0.kinds)
assert set(file_map.keys()).issubset(expected_types)

check_workload_resource_files(file_map, expected_workload_types, "aio-akri")
check_workload_resource_files(
file_objs=file_map,
expected_workload_types=expected_workload_types,
prefixes="aio-akri",
bundle_path=bundle_path
)
19 changes: 14 additions & 5 deletions azext_edge/tests/edge/support/create_bundle_int/test_auto_int.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ def test_create_bundle(init_setup, bundle_dir, mq_traces, ops_service, tracked_f
tracked_files.append(bundle_dir)
except FileExistsError:
pass
walk_result = run_bundle_command(command=command.format(ops_service), tracked_files=tracked_files)
walk_result, _ = run_bundle_command(command=command.format(ops_service), tracked_files=tracked_files)
# generate second bundle as close as possible
if ops_service != OpsServiceType.auto.value:
auto_walk_result = run_bundle_command(
auto_walk_result, _ = run_bundle_command(
command=command.format(OpsServiceType.auto.value),
tracked_files=tracked_files
)
Expand Down Expand Up @@ -87,7 +87,11 @@ def test_create_bundle(init_setup, bundle_dir, mq_traces, ops_service, tracked_f
auto_files = sorted(auto_walk_result[directory]["files"])
ser_files = sorted(walk_result[directory]["files"])
find_extra_or_missing_names(
f"auto bundle files not found in {ops_service} bundle", auto_files, ser_files, ignore_extras=True
resource_type=f"auto bundle files not found in {ops_service} bundle",
result_names=auto_files,
expected_names=ser_files,
ignore_extras=True,
ignore_missing=True
)


Expand All @@ -96,12 +100,17 @@ def test_create_bundle_otel(init_setup, tracked_files):
# dir for unpacked files
ops_service = OpsServiceType.auto.value
command = f"az iot ops support create-bundle --ops-service {ops_service}"
walk_result = run_bundle_command(command=command, tracked_files=tracked_files)
walk_result, bundle_path = run_bundle_command(command=command, tracked_files=tracked_files)
file_map = get_file_map(walk_result, "otel")["aio"]

expected_workload_types = ["deployment", "pod", "replicaset", "service"]
assert set(file_map.keys()).issubset(set(expected_workload_types))
check_workload_resource_files(file_map, expected_workload_types, "aio-otel")
check_workload_resource_files(
file_objs=file_map,
expected_workload_types=expected_workload_types,
prefixes="aio-otel",
bundle_path=bundle_path
)


def _get_expected_services(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from knack.log import get_logger
from azext_edge.edge.common import OpsServiceType
from azext_edge.edge.providers.edge_api import CLUSTER_CONFIG_API_V1
from .helpers import check_custom_resource_files, check_workload_resource_files, get_file_map, run_bundle_command
from .helpers import (
check_custom_resource_files,
check_workload_resource_files,
get_file_map,
run_bundle_command
)

logger = get_logger(__name__)

Expand All @@ -17,7 +22,7 @@ def test_create_bundle_billing(init_setup, tracked_files):
ops_service = OpsServiceType.billing.value
ops_service = "billing"
command = f"az iot ops support create-bundle --ops-service {ops_service}"
walk_result = run_bundle_command(command=command, tracked_files=tracked_files)
walk_result, bundle_path = run_bundle_command(command=command, tracked_files=tracked_files)
file_map = get_file_map(walk_result, ops_service)

# AIO
Expand All @@ -29,7 +34,12 @@ def test_create_bundle_billing(init_setup, tracked_files):
expected_workload_types = ["cronjob", "job", "pod"]
expected_types = set(expected_workload_types).union(CLUSTER_CONFIG_API_V1.kinds)
assert set(file_map["aio"].keys()).issubset(set(expected_types))
check_workload_resource_files(file_map["aio"], expected_workload_types, ["aio-usage"])
check_workload_resource_files(
file_objs=file_map["aio"],
expected_workload_types=expected_workload_types,
prefixes=["aio-usage"],
bundle_path=bundle_path
)

# USAGE
check_custom_resource_files(
Expand All @@ -40,4 +50,9 @@ def test_create_bundle_billing(init_setup, tracked_files):
expected_workload_types = ["deployment", "pod", "replicaset", "service"]
expected_types = set(expected_workload_types).union(CLUSTER_CONFIG_API_V1.kinds)
assert set(file_map["usage"].keys()).issubset(expected_types)
check_workload_resource_files(file_map["usage"], expected_workload_types, ["billing-operator"])
check_workload_resource_files(
file_objs=file_map["usage"],
expected_workload_types=expected_workload_types,
prefixes=["billing-operator"],
bundle_path=bundle_path
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from knack.log import get_logger
from azext_edge.edge.common import OpsServiceType
from azext_edge.edge.providers.edge_api import DATAFLOW_API_V1B1
from .helpers import check_custom_resource_files, check_workload_resource_files, get_file_map, run_bundle_command
from .helpers import (
check_custom_resource_files,
check_workload_resource_files,
get_file_map,
run_bundle_command
)

logger = get_logger(__name__)

Expand All @@ -16,7 +21,7 @@ def test_create_bundle_dataflow(init_setup, tracked_files):
"""Test for ensuring file names and content. ONLY CHECKS dataflow."""
ops_service = OpsServiceType.dataflow.value
command = f"az iot ops support create-bundle --ops-service {ops_service}"
walk_result = run_bundle_command(command=command, tracked_files=tracked_files)
walk_result, bundle_path = run_bundle_command(command=command, tracked_files=tracked_files)
file_map = get_file_map(walk_result, ops_service)["aio"]

check_custom_resource_files(
Expand All @@ -31,4 +36,5 @@ def test_create_bundle_dataflow(init_setup, tracked_files):
file_objs=file_map,
expected_workload_types=expected_workload_types,
prefixes=["aio-dataflow"],
bundle_path=bundle_path
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
from knack.log import get_logger
from azext_edge.edge.common import OpsServiceType
from azext_edge.edge.providers.edge_api import DEVICEREGISTRY_API_V1
from .helpers import check_custom_resource_files, BASE_ZIP_PATH, get_file_map, run_bundle_command
from .helpers import (
check_custom_resource_files,
BASE_ZIP_PATH,
get_file_map,
run_bundle_command
)

logger = get_logger(__name__)

Expand All @@ -17,7 +22,7 @@ def test_create_bundle_deviceregistry(init_setup, tracked_files):
"""Test for ensuring file names and content. ONLY CHECKS deviceregistry."""
ops_service = OpsServiceType.deviceregistry.value
command = f"az iot ops support create-bundle --ops-service {ops_service}"
walk_result = run_bundle_command(command=command, tracked_files=tracked_files)
walk_result, _ = run_bundle_command(command=command, tracked_files=tracked_files)
if not walk_result[BASE_ZIP_PATH]["folders"]:
pytest.skip(f"No bundles created for {ops_service}.")
file_map = get_file_map(walk_result, ops_service)["aio"]
Expand Down
Loading

0 comments on commit 64c9d20

Please sign in to comment.