Skip to content

Commit

Permalink
feat: Improve error check while deploying Eclipse Che (#973)
Browse files Browse the repository at this point in the history
* Improve error check
* Fix test

Signed-off-by: Anatolii Bazko <abazko@redhat.com>
  • Loading branch information
tolusha authored Nov 10, 2020
1 parent 6d21950 commit 42965c4
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 167 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/e2e-minikube-helm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#
# Copyright (c) 2012-2020 Red Hat, Inc.
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
#
# Contributors:
# Red Hat, Inc. - initial API and implementation
name: chectl e2e tests
on: pull_request
jobs:
minikube-e2e-helm:
name: Minikube Helm
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v1
- name: Install minikube kubernetes cluster
run: minikube start --memory=6000
- name: Install chectl dependencies
run: yarn
- name: Run e2e tests minikube
run: |
export PLATFORM=minikube
export INSTALLER=helm
minikube addons enable ingress
yarn test --coverage=false --forceExit --testRegex=test/e2e/e2e.test.ts
28 changes: 28 additions & 0 deletions .github/workflows/e2e-minikube-operator.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#
# Copyright (c) 2012-2020 Red Hat, Inc.
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
#
# Contributors:
# Red Hat, Inc. - initial API and implementation
name: chectl e2e tests
on: pull_request
jobs:
minikube-e2e:
name: Minikube Operator
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v1
- name: Install minikube kubernetes cluster
run: minikube start --memory=6000
- name: Install chectl dependencies
run: yarn
- name: Run e2e tests minikube
run: |
export PLATFORM=minikube
export INSTALLER=operator
minikube addons enable ingress
yarn test --coverage=false --forceExit --testRegex=test/e2e/e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,6 @@
name: chectl e2e tests
on: pull_request
jobs:
minikube-e2e:
name: Minikube Operator
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Install minikube kubernetes cluster
run: minikube start --memory=6000
- name: Install chectl dependencies
run: yarn
- name: Run e2e tests minikube
run: |
export PLATFORM=minikube
export INSTALLER=operator
minikube addons enable ingress
yarn test --coverage=false --forceExit --testRegex=test/e2e/e2e.test.ts
minikube-e2e-helm:
name: Minikube Helm
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Install minikube kubernetes cluster
run: minikube start --memory=6000
- name: Install chectl dependencies
run: yarn
- name: Run e2e tests minikube
run: |
export PLATFORM=minikube
export INSTALLER=helm
minikube addons enable ingress
yarn test --coverage=false --forceExit --testRegex=test/e2e/e2e.test.ts
minishift-e2e:
name: Minishift Operator
runs-on: macos-latest
Expand Down Expand Up @@ -98,7 +68,8 @@ jobs:
oc delete secret router-certs
cat domain.crt domain.key > minishift.crt
oc create secret tls router-certs --key=domain.key --cert=minishift.crt
oc rollout latest router
sleep 5s
# oc rollout latest router
oc create secret generic self-signed-certificate --from-file=ca.crt -n=che
# RUN THE TESTS
export PLATFORM=minishift
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -516,11 +516,17 @@ OPTIONS
--helm-patch-yaml=helm-patch-yaml
Path to yaml file with Helm Chart values patch. The file format is identical to values.yaml from the chart.
--k8spoddownloadimagetimeout=k8spoddownloadimagetimeout
[default: 600000] Waiting time for Pod downloading image (in milliseconds)
--k8spoderrorrechecktimeout=k8spoderrorrechecktimeout
[default: 15000] Waiting time for Pod rechecking error (in milliseconds)
--k8spodreadytimeout=k8spodreadytimeout
[default: 130000] Waiting time for Pod Ready Kubernetes (in milliseconds)
[default: 600000] Waiting time for Pod Ready condition (in milliseconds)
--k8spodwaittimeout=k8spodwaittimeout
[default: 300000] Waiting time for Pod Wait Timeout Kubernetes (in milliseconds)
[default: 600000] Waiting time for Pod scheduled condition (in milliseconds)
--olm-channel=olm-channel
Olm channel to install Eclipse Che, f.e. stable.
Expand Down
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@
"test": "jest",
"posttest": "tslint -p test -t stylish",
"test-watch": "jest --watchAll",
"e2e-minikube": "jest ./test/e2e/minikube.test.ts --testRegex='/test/(e2e)/.*.test.ts'",
"e2e-minishift": "jest ./test/e2e/minishift.test.ts --testRegex='/test/(e2e)/.*.test.ts'",
"e2e-openshift": "jest ./test/e2e/openshift.test.ts --testRegex='/test/(e2e)/.*.test.ts'",
"prepack": "rm -rf lib && rm -rf tsconfig.tsbuildinfo && tsc -b && oclif-dev manifest && oclif-dev readme",
"e2e-minikube-helm": "export PLATFORM=minikube && export INSTALLER=helm && yarn jest ./test/e2e/e2e.test.ts --testRegex='/test/(e2e)/.*.test.ts'",
"e2e-minikube-operator": "export PLATFORM=minikube && export INSTALLER=operator && yarn jest ./test/e2e/e2e.test.ts --testRegex='/test/(e2e)/.*.test.ts'",
"e2e-minishift": "export PLATFORM=minishift && export INSTALLER=operator && yarn jest ./test/e2e/e2e.test.ts --testRegex='/test/(e2e)/.*.test.ts'",
"e2e-openshift": "export PLATFORM=openshift && export INSTALLER=operator && yarn jest ./test/e2e/e2e.test.ts --testRegex='/test/(e2e)/.*.test.ts'", "prepack": "rm -rf lib && rm -rf tsconfig.tsbuildinfo && tsc -b && oclif-dev manifest && oclif-dev readme",
"pack-binaries": "oclif-dev pack",
"postpack": "rm -f oclif.manifest.json",
"format": "tsfmt -r --useTsfmt tsfmt.json",
Expand Down
18 changes: 8 additions & 10 deletions src/api/kube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import { CatalogSource, ClusterServiceVersion, ClusterServiceVersionList, Instal
import { IdentityProvider, OAuth } from './typings/openshift'

const AWAIT_TIMEOUT_S = 30
export const DEFAULT_K8S_POD_TIMEOUT = 600000
export const DEFAULT_K8S_POD_ERROR_RECHECK_TIMEOUT = 15000

export class KubeHelper {
public static readonly KUBE_CONFIG = KubeHelper.initializeKubeConfig()
Expand All @@ -42,19 +44,15 @@ export class KubeHelper {
logHelper = new Log(KubeHelper.KUBE_CONFIG)

podWaitTimeout: number
podDownloadImageTimeout: number
podReadyTimeout: number
podErrorRecheckTimeout: number

constructor(flags?: any) {
if (flags && flags.k8spodwaittimeout) {
this.podWaitTimeout = parseInt(flags.k8spodwaittimeout, 10)
} else {
this.podWaitTimeout = 300000
}
if (flags && flags.k8spodreadytimeout) {
this.podReadyTimeout = parseInt(flags.k8spodreadytimeout, 10)
} else {
this.podReadyTimeout = 130000
}
this.podWaitTimeout = (flags && flags.k8spodwaittimeout) ? parseInt(flags.k8spodwaittimeout, 10) : DEFAULT_K8S_POD_TIMEOUT
this.podReadyTimeout = (flags && flags.k8spodreadytimeout) ? parseInt(flags.k8spodreadytimeout, 10) : DEFAULT_K8S_POD_TIMEOUT
this.podDownloadImageTimeout = (flags && flags.k8spoddownloadimagetimeout) ? parseInt(flags.k8spoddownloadimagetimeout, 10) : DEFAULT_K8S_POD_TIMEOUT
this.podErrorRecheckTimeout = (flags && flags.spoderrorrechecktimeout) ? parseInt(flags.spoderrorrechecktimeout, 10) : DEFAULT_K8S_POD_ERROR_RECHECK_TIMEOUT
}

async createNamespace(namespaceName: string, labels: any): Promise<void> {
Expand Down
2 changes: 1 addition & 1 deletion src/commands/auth/login.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ export default class Login extends Command {

try {
const username = await loginManager.setLoginContext(cheApiEndpoint, loginData)
cli.info(`Succesfully logged into ${cheApiEndpoint} as ${username}`)
cli.info(`Successfully logged into ${cheApiEndpoint} as ${username}`)
} catch (error) {
cli.error(error)
}
Expand Down
18 changes: 13 additions & 5 deletions src/commands/server/deploy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import * as notifier from 'node-notifier'
import * as os from 'os'
import * as path from 'path'

import { KubeHelper } from '../../api/kube'
import { DEFAULT_K8S_POD_ERROR_RECHECK_TIMEOUT, DEFAULT_K8S_POD_TIMEOUT, KubeHelper } from '../../api/kube'
import { cheDeployment, cheNamespace, cheOperatorCRPatchYaml, cheOperatorCRYaml, CHE_OPERATOR_CR_PATCH_YAML_KEY, CHE_OPERATOR_CR_YAML_KEY, devWorkspaceControllerNamespace, listrRenderer, skipKubeHealthzCheck as skipK8sHealthCheck } from '../../common-flags'
import { DEFAULT_CHE_OPERATOR_IMAGE, DEFAULT_DEV_WORKSPACE_CONTROLLER_IMAGE, DEFAULT_OLM_SUGGESTED_NAMESPACE, DOCS_LINK_INSTALL_RUNNING_CHE_LOCALLY } from '../../constants'
import { CheTasks } from '../../tasks/che'
Expand Down Expand Up @@ -64,12 +64,20 @@ export default class Deploy extends Command {
env: 'CHE_SERVER_BOOT_TIMEOUT'
}),
k8spodwaittimeout: string({
description: 'Waiting time for Pod Wait Timeout Kubernetes (in milliseconds)',
default: '300000'
description: 'Waiting time for Pod scheduled condition (in milliseconds)',
default: `${DEFAULT_K8S_POD_TIMEOUT}`
}),
k8spoddownloadimagetimeout: string({
description: 'Waiting time for Pod downloading image (in milliseconds)',
default: `${DEFAULT_K8S_POD_TIMEOUT}`
}),
k8spodreadytimeout: string({
description: 'Waiting time for Pod Ready Kubernetes (in milliseconds)',
default: '130000'
description: 'Waiting time for Pod Ready condition (in milliseconds)',
default: `${DEFAULT_K8S_POD_TIMEOUT}`
}),
k8spoderrorrechecktimeout: string({
description: 'Waiting time for Pod rechecking error (in milliseconds)',
default: `${DEFAULT_K8S_POD_ERROR_RECHECK_TIMEOUT}`
}),
multiuser: flags.boolean({
char: 'm',
Expand Down
92 changes: 54 additions & 38 deletions src/tasks/kube.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ import * as Listr from 'listr'
import { KubeHelper } from '../api/kube'

export class KubeTasks {
kubeHelper: KubeHelper
constructor(flags?: any) {
private readonly interval = 500
private readonly kubeHelper: KubeHelper
constructor(flags: any) {
this.kubeHelper = new KubeHelper(flags)
}

Expand All @@ -24,21 +25,31 @@ export class KubeTasks {
{
title: 'Scheduling',
task: async (_ctx: any, task: any) => {
// any way use 5 minutes (600*500=5*60*1000 ms) timeout
for (let i = 1; i <= 600; i++) {
const taskTitle = task.title
const iterations = this.kubeHelper.podWaitTimeout / this.interval
for (let i = 1; i <= iterations; i++) {
// check 'PodScheduled' condition
const failedCondition = await this.getFailedPodCondition(namespace, selector, 'PodScheduled')
if (failedCondition) {
task.title = `${task.title}...failed`
throw new Error(`Failed to schedule a pod, reason: ${failedCondition.reason}, message: ${failedCondition.message}`)
task.title = `${taskTitle}...failed, rechecking...`

// for instance we need some time for pvc provisioning...
await cli.wait(this.kubeHelper.podErrorRecheckTimeout)

const failedCondition = await this.getFailedPodCondition(namespace, selector, 'PodScheduled')
if (failedCondition) {
task.title = `${taskTitle}...failed`
throw new Error(`Failed to schedule a pod, reason: ${failedCondition.reason}, message: ${failedCondition.message}. Consider increasing error recheck timeout with --k8spoderrorrechecktimeout flag.`)
}
}

const allScheduled = await this.isPodConditionStatusPassed(namespace, selector, 'PodScheduled')
if (allScheduled) {
task.title = `${task.title}...done.`
task.title = `${taskTitle}...done`
return
}

await cli.wait(500)
await cli.wait(this.interval)
}

throw new Error(`Failed to schedule a pod: ${await this.getTimeOutErrorMessage(namespace, selector)}`)
Expand All @@ -47,22 +58,29 @@ export class KubeTasks {
{
title: 'Downloading images',
task: async (_ctx: any, task: any) => {
// any way use 5 minutes (600*500=5*60*1000 ms) timeout
for (let i = 1; i <= 600; i++) {
const taskTitle = task.title
const iterations = this.kubeHelper.podDownloadImageTimeout / this.interval
for (let i = 1; i <= iterations; i++) {
const failedState = await this.getFailedWaitingState(namespace, selector, 'Pending')
if (failedState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to download image, reason: ${failedState.reason}, message: ${failedState.message}`)
task.title = `${taskTitle}...failed, rechecking...`
await cli.wait(this.kubeHelper.podErrorRecheckTimeout)

const failedState = await this.getFailedWaitingState(namespace, selector, 'Pending')
if (failedState) {
task.title = `${taskTitle}...failed`
throw new Error(`Failed to download image, reason: ${failedState.reason}, message: ${failedState.message}.`)
}
}

const pods = await this.kubeHelper.getPodListByLabel(namespace, selector)
const allRunning = !pods.some(value => !value.status || value.status.phase !== 'Running')
if (pods.length && allRunning) {
task.title = `${task.title}...done.`
task.title = `${taskTitle}...done`
return
}

await cli.wait(500)
await cli.wait(this.interval)
}

throw new Error(`Failed to download image: ${await this.getTimeOutErrorMessage(namespace, selector)}`)
Expand All @@ -71,27 +89,38 @@ export class KubeTasks {
{
title: 'Starting',
task: async (_ctx: any, task: any) => {
// any way use 5 minutes (600*500=5*60*1000 ms) timeout
for (let i = 1; i <= 600; i++) {
const taskTitle = task.title
const iterations = this.kubeHelper.podReadyTimeout / this.interval
for (let i = 1; i <= iterations; i++) {
const failedState = await this.getFailedWaitingState(namespace, selector, 'Running')
if (failedState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to start a pod, reason: ${failedState.reason}, message: ${failedState.message}`)
task.title = `${taskTitle}...failed, rechecking...`
await cli.wait(this.kubeHelper.podErrorRecheckTimeout)

const failedState = await this.getFailedWaitingState(namespace, selector, 'Running')
if (failedState) {
task.title = `${taskTitle}...failed`
throw new Error(`Failed to start a pod, reason: ${failedState.reason}, message: ${failedState.message}`)
}
}

const terminatedState = await this.kubeHelper.getPodLastTerminatedState(namespace, selector)
if (terminatedState) {
task.title = `${task.title}...failed`
throw new Error(`Failed to start a pod, reason: ${terminatedState.reason}, message: ${terminatedState.message}`)
task.title = `${taskTitle}...failed`
let errorMsg = `Failed to start a pod, reason: ${terminatedState.reason}`
terminatedState.message && (errorMsg += `, message: ${terminatedState.message}`)
terminatedState.exitCode && (errorMsg += `, exitCode: ${terminatedState.exitCode}`)
terminatedState.signal && (errorMsg += `, signal: ${terminatedState.signal}`)
throw new Error(errorMsg)
}

const allStarted = await this.isPodConditionStatusPassed(namespace, selector, 'Ready')
if (allStarted) {
task.title = `${task.title}...done.`
task.title = `${taskTitle}...done`
return
}

await cli.wait(500)
await cli.wait(this.interval)
}

throw new Error(`Failed to start a pod: ${await this.getTimeOutErrorMessage(namespace, selector)}`)
Expand All @@ -102,14 +131,7 @@ export class KubeTasks {

private async getFailedPodCondition(namespace: string, selector: string, conditionType: string): Promise<V1PodCondition | undefined> {
const status = await this.kubeHelper.getPodCondition(namespace, selector, conditionType)
const failedPod = status.find(s => s.status === 'False' && s.message && s.reason)
if (failedPod) {
// wait 10 sec, check again and only then fail
await cli.wait(10000)

const condition = await this.kubeHelper.getPodCondition(namespace, selector, conditionType)
return condition.find(s => s.status === 'False' && s.message && s.reason)
}
return status.find(s => s.status === 'False' && s.message && s.reason)
}

private async isPodConditionStatusPassed(namespace: string, selector: string, conditionType: string): Promise<boolean> {
Expand All @@ -124,13 +146,7 @@ export class KubeTasks {
private async getFailedWaitingState(namespace: string, selector: string, state: string): Promise<V1ContainerStateWaiting | undefined> {
const waitingState = await this.kubeHelper.getPodWaitingState(namespace, selector, state)
if (waitingState && waitingState.reason && waitingState.message) {
// wait 10 sec, check again and only then fail
await cli.wait(10000)

const waitingState = await this.kubeHelper.getPodWaitingState(namespace, selector, state)
if (waitingState && waitingState.reason && waitingState.message) {
return waitingState
}
return waitingState
}
}

Expand All @@ -140,7 +156,7 @@ export class KubeTasks {
private async getTimeOutErrorMessage(namespace: string, selector: string): Promise<string> {
const pods = await this.kubeHelper.getPodListByLabel(namespace, selector)
if (!pods.length) {
return 'Timeout: there no pods.'
throw new Error(`Timeout: there are no pods in the namespace: ${namespace}, selector: ${selector}. Check Eclipse Che logs for details. Consider increasing error recheck timeout with --k8spoderrorrechecktimeout flag.`)
}

let errorMessage = 'Timeout:'
Expand Down
Loading

0 comments on commit 42965c4

Please sign in to comment.