[ML] Show warning when the model memory limit is higher than the memory available in the ML node (elastic#65652)

jgowdyelastic · jgowdyelastic · commit b6a29ccdf1d7 · 2020-05-08T13:14:54.000+01:00
* [ML] Show warning when the model memory limit is higher than the memory available in the ML node

* reverting UI check

* removing from UI job validator

* adding cap to estimate mml

* adding mml value to message

* fixing translations

* updating translations

* fixing translation ids
diff --git a/x-pack/plugins/ml/common/types/ml_server_info.ts b/x-pack/plugins/ml/common/types/ml_server_info.ts
@@ -18,6 +18,7 @@ export interface MlServerDefaults {
 
 export interface MlServerLimits {
   max_model_memory_limit?: string;
+  effective_max_model_memory_limit?: string;
 }
 
 export interface MlInfoResponse {
diff --git a/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts b/x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts
@@ -9,6 +9,7 @@ import { APICaller } from 'kibana/server';
 import { MLCATEGORY } from '../../../common/constants/field_types';
 import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
 import { fieldsServiceProvider } from '../fields_service';
+import { MlInfoResponse } from '../../../common/types/ml_server_info';
 
 interface ModelMemoryEstimationResult {
   /**
@@ -139,15 +140,9 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
     latestMs: number,
     allowMMLGreaterThanMax = false
   ): Promise<ModelMemoryEstimationResult> {
-    let maxModelMemoryLimit;
-    try {
-      const resp = await callAsCurrentUser('ml.info');
-      if (resp?.limits?.max_model_memory_limit !== undefined) {
-        maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
-      }
-    } catch (e) {
-      throw new Error('Unable to retrieve max model memory limit');
-    }
+    const info = await callAsCurrentUser<MlInfoResponse>('ml.info');
+    const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
+    const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();
 
     const { overallCardinality, maxBucketCardinality } = await getCardinalities(
       analysisConfig,
@@ -168,24 +163,40 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
       })
     ).model_memory_estimate.toUpperCase();
 
-    let modelMemoryLimit: string = estimatedModelMemoryLimit;
+    let modelMemoryLimit = estimatedModelMemoryLimit;
+    let mmlCappedAtMax = false;
     // if max_model_memory_limit has been set,
     // make sure the estimated value is not greater than it.
-    if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
-      // @ts-ignore
-      const maxBytes = numeral(maxModelMemoryLimit).value();
+    if (allowMMLGreaterThanMax === false) {
       // @ts-ignore
       const mmlBytes = numeral(estimatedModelMemoryLimit).value();
-      if (mmlBytes > maxBytes) {
+      if (maxModelMemoryLimit !== undefined) {
+        // @ts-ignore
+        const maxBytes = numeral(maxModelMemoryLimit).value();
+        if (mmlBytes > maxBytes) {
+          // @ts-ignore
+          modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
+          mmlCappedAtMax = true;
+        }
+      }
+
+      // if we've not already capped the estimated mml at the hard max server setting
+      // ensure that the estimated mml isn't greater than the effective max mml
+      if (mmlCappedAtMax === false && effectiveMaxModelMemoryLimit !== undefined) {
         // @ts-ignore
-        modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
+        const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
+        if (mmlBytes > effectiveMaxMmlBytes) {
+          // @ts-ignore
+          modelMemoryLimit = `${Math.floor(effectiveMaxMmlBytes / numeral('1MB').value())}MB`;
+        }
       }
     }
 
     return {
       estimatedModelMemoryLimit,
       modelMemoryLimit,
       ...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
+      ...(effectiveMaxModelMemoryLimit ? { effectiveMaxModelMemoryLimit } : {}),
     };
   };
 }
diff --git a/x-pack/plugins/ml/server/models/job_validation/messages.js b/x-pack/plugins/ml/server/models/job_validation/messages.js
@@ -433,6 +433,17 @@ export const getMessages = () => {
         }
       ),
     },
+    mml_greater_than_effective_max_mml: {
+      status: 'WARNING',
+      text: i18n.translate(
+        'xpack.ml.models.jobValidation.messages.mmlGreaterThanEffectiveMaxMmlMessage',
+        {
+          defaultMessage:
+            'Job will not be able to run in the current cluster because model memory limit is higher than {effectiveMaxModelMemoryLimit}.',
+          values: { effectiveMaxModelMemoryLimit: '{{effectiveMaxModelMemoryLimit}}' },
+        }
+      ),
+    },
     mml_greater_than_max_mml: {
       status: 'ERROR',
       text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage', {
diff --git a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts
@@ -24,6 +24,7 @@ describe('ML - validateModelMemoryLimit', () => {
     },
     limits: {
       max_model_memory_limit: '30mb',
+      effective_max_model_memory_limit: '40mb',
     },
   };
 
@@ -211,6 +212,30 @@ describe('ML - validateModelMemoryLimit', () => {
     });
   });
 
+  it('Called with no duration or split and mml above limit, no max setting', () => {
+    const job = getJobConfig();
+    const duration = undefined;
+    // @ts-ignore
+    job.analysis_limits.model_memory_limit = '31mb';
+
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
+      const ids = messages.map(m => m.id);
+      expect(ids).toEqual([]);
+    });
+  });
+
+  it('Called with no duration or split and mml above limit, no max setting, above effective max mml', () => {
+    const job = getJobConfig();
+    const duration = undefined;
+    // @ts-ignore
+    job.analysis_limits.model_memory_limit = '41mb';
+
+    return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
+      const ids = messages.map(m => m.id);
+      expect(ids).toEqual(['mml_greater_than_effective_max_mml']);
+    });
+  });
+
   it('Called with small number of detectors, so estimated mml is under specified mml, no max setting', () => {
     const dtrs = createDetectors(1);
     const job = getJobConfig(['instance'], dtrs);
diff --git a/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts b/x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts
@@ -10,6 +10,7 @@ import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
 import { validateJobObject } from './validate_job_object';
 import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
 import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
+import { MlInfoResponse } from '../../../common/types/ml_server_info';
 
 // The minimum value the backend expects is 1MByte
 const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
@@ -50,9 +51,9 @@ export async function validateModelMemoryLimit(
 
   // retrieve the max_model_memory_limit value from the server
   // this will be unset unless the user has set this on their cluster
-  const maxModelMemoryLimit: string | undefined = (
-    await callWithRequest('ml.info')
-  )?.limits?.max_model_memory_limit?.toUpperCase();
+  const info = await callWithRequest<MlInfoResponse>('ml.info');
+  const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
+  const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();
 
   if (runCalcModelMemoryTest) {
     const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
@@ -113,17 +114,35 @@ export async function validateModelMemoryLimit(
 
   // if max_model_memory_limit has been set,
   // make sure the user defined MML is not greater than it
-  if (maxModelMemoryLimit !== undefined && mml !== null) {
-    // @ts-ignore
-    const maxMmlBytes = numeral(maxModelMemoryLimit).value();
+  if (mml !== null) {
+    let maxMmlExceeded = false;
     // @ts-ignore
     const mmlBytes = numeral(mml).value();
-    if (mmlBytes > maxMmlBytes) {
-      messages.push({
-        id: 'mml_greater_than_max_mml',
-        maxModelMemoryLimit,
-        mml,
-      });
+
+    if (maxModelMemoryLimit !== undefined) {
+      // @ts-ignore
+      const maxMmlBytes = numeral(maxModelMemoryLimit).value();
+      if (mmlBytes > maxMmlBytes) {
+        maxMmlExceeded = true;
+        messages.push({
+          id: 'mml_greater_than_max_mml',
+          maxModelMemoryLimit,
+          mml,
+        });
+      }
+    }
+
+    if (effectiveMaxModelMemoryLimit !== undefined && maxMmlExceeded === false) {
+      // @ts-ignore
+      const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
+      if (mmlBytes > effectiveMaxMmlBytes) {
+        messages.push({
+          id: 'mml_greater_than_effective_max_mml',
+          maxModelMemoryLimit,
+          mml,
+          effectiveMaxModelMemoryLimit,
+        });
+      }
     }
   }
 
diff --git a/x-pack/plugins/translations/translations/ja-JP.json b/x-pack/plugins/translations/translations/ja-JP.json
@@ -10066,7 +10066,6 @@
     "xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "ジョブ ID が無効です。アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります。",
     "xpack.ml.models.jobValidation.messages.jobIdValidHeading": "ジョブ ID のフォーマットは有効です。",
     "xpack.ml.models.jobValidation.messages.jobIdValidMessage": "アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーライン、最初と最後を英数字にし、{maxLength, plural, one {# 文字} other {# 文字}}以内にする必要があります。",
-    "xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "モデルメモリー制限が、このクラスターに構成された最大モデルメモリー制限を超えています。",
     "xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} はモデルメモリー制限の有効な値ではありません。この値は最低 1MB で、バイト (例: 10MB) で指定する必要があります。",
     "xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "ジョブの構成の基本要件が満たされていないため、他のチェックをスキップしました。",
     "xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "バケットスパン",
diff --git a/x-pack/plugins/translations/translations/zh-CN.json b/x-pack/plugins/translations/translations/zh-CN.json
@@ -10072,7 +10072,6 @@
     "xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "作业 ID 无效.其可以包含小写字母数字（a-z 和 0-9）字符、连字符或下划线，且必须以字母数字字符开头和结尾。",
     "xpack.ml.models.jobValidation.messages.jobIdValidHeading": "作业 ID 格式有效",
     "xpack.ml.models.jobValidation.messages.jobIdValidMessage": "小写字母数字（a-z 和 0-9）字符、连字符或下划线，以字母数字字符开头和结尾，且长度不超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。",
-    "xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "模型内存限制大于为此集群配置的最大模型内存限制。",
     "xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} 不是有效的模型内存限制值。该值需要至少 1MB，且应以字节为单位（例如 10MB）指定。",
     "xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "已跳过其他检查，因为未满足作业配置的基本要求。",
     "xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "存储桶跨度",

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ export interface MlServerDefaults {`
`18`	`18`
`19`	`19`	`export interface MlServerLimits {`
`20`	`20`	`max_model_memory_limit?: string;`
	`21`	`+ effective_max_model_memory_limit?: string;`
`21`	`22`	`}`
`22`	`23`
`23`	`24`	`export interface MlInfoResponse {`