Skip to content

Commit b6a29cc

Browse files
committed
[ML] Show warning when the model memory limit is higher than the memory available in the ML node (elastic#65652)
* [ML] Show warning when the model memory limit is higher than the memory available in the ML node * reverting UI check * removing from UI job validator * adding cap to estimate mml * adding mml value to message * fixing translations * updating translations * fixing translation ids
1 parent e9c0a16 commit b6a29cc

File tree

7 files changed

+94
-29
lines changed

7 files changed

+94
-29
lines changed

x-pack/plugins/ml/common/types/ml_server_info.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export interface MlServerDefaults {
1818

1919
export interface MlServerLimits {
2020
max_model_memory_limit?: string;
21+
effective_max_model_memory_limit?: string;
2122
}
2223

2324
export interface MlInfoResponse {

x-pack/plugins/ml/server/models/calculate_model_memory_limit/calculate_model_memory_limit.ts

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { APICaller } from 'kibana/server';
99
import { MLCATEGORY } from '../../../common/constants/field_types';
1010
import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
1111
import { fieldsServiceProvider } from '../fields_service';
12+
import { MlInfoResponse } from '../../../common/types/ml_server_info';
1213

1314
interface ModelMemoryEstimationResult {
1415
/**
@@ -139,15 +140,9 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
139140
latestMs: number,
140141
allowMMLGreaterThanMax = false
141142
): Promise<ModelMemoryEstimationResult> {
142-
let maxModelMemoryLimit;
143-
try {
144-
const resp = await callAsCurrentUser('ml.info');
145-
if (resp?.limits?.max_model_memory_limit !== undefined) {
146-
maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
147-
}
148-
} catch (e) {
149-
throw new Error('Unable to retrieve max model memory limit');
150-
}
143+
const info = await callAsCurrentUser<MlInfoResponse>('ml.info');
144+
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
145+
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();
151146

152147
const { overallCardinality, maxBucketCardinality } = await getCardinalities(
153148
analysisConfig,
@@ -168,24 +163,40 @@ export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller)
168163
})
169164
).model_memory_estimate.toUpperCase();
170165

171-
let modelMemoryLimit: string = estimatedModelMemoryLimit;
166+
let modelMemoryLimit = estimatedModelMemoryLimit;
167+
let mmlCappedAtMax = false;
172168
// if max_model_memory_limit has been set,
173169
// make sure the estimated value is not greater than it.
174-
if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
175-
// @ts-ignore
176-
const maxBytes = numeral(maxModelMemoryLimit).value();
170+
if (allowMMLGreaterThanMax === false) {
177171
// @ts-ignore
178172
const mmlBytes = numeral(estimatedModelMemoryLimit).value();
179-
if (mmlBytes > maxBytes) {
173+
if (maxModelMemoryLimit !== undefined) {
174+
// @ts-ignore
175+
const maxBytes = numeral(maxModelMemoryLimit).value();
176+
if (mmlBytes > maxBytes) {
177+
// @ts-ignore
178+
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
179+
mmlCappedAtMax = true;
180+
}
181+
}
182+
183+
// if we've not already capped the estimated mml at the hard max server setting
184+
// ensure that the estimated mml isn't greater than the effective max mml
185+
if (mmlCappedAtMax === false && effectiveMaxModelMemoryLimit !== undefined) {
180186
// @ts-ignore
181-
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
187+
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
188+
if (mmlBytes > effectiveMaxMmlBytes) {
189+
// @ts-ignore
190+
modelMemoryLimit = `${Math.floor(effectiveMaxMmlBytes / numeral('1MB').value())}MB`;
191+
}
182192
}
183193
}
184194

185195
return {
186196
estimatedModelMemoryLimit,
187197
modelMemoryLimit,
188198
...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
199+
...(effectiveMaxModelMemoryLimit ? { effectiveMaxModelMemoryLimit } : {}),
189200
};
190201
};
191202
}

x-pack/plugins/ml/server/models/job_validation/messages.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,17 @@ export const getMessages = () => {
433433
}
434434
),
435435
},
436+
mml_greater_than_effective_max_mml: {
437+
status: 'WARNING',
438+
text: i18n.translate(
439+
'xpack.ml.models.jobValidation.messages.mmlGreaterThanEffectiveMaxMmlMessage',
440+
{
441+
defaultMessage:
442+
'Job will not be able to run in the current cluster because model memory limit is higher than {effectiveMaxModelMemoryLimit}.',
443+
values: { effectiveMaxModelMemoryLimit: '{{effectiveMaxModelMemoryLimit}}' },
444+
}
445+
),
446+
},
436447
mml_greater_than_max_mml: {
437448
status: 'ERROR',
438449
text: i18n.translate('xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage', {

x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ describe('ML - validateModelMemoryLimit', () => {
2424
},
2525
limits: {
2626
max_model_memory_limit: '30mb',
27+
effective_max_model_memory_limit: '40mb',
2728
},
2829
};
2930

@@ -211,6 +212,30 @@ describe('ML - validateModelMemoryLimit', () => {
211212
});
212213
});
213214

215+
it('Called with no duration or split and mml above limit, no max setting', () => {
216+
const job = getJobConfig();
217+
const duration = undefined;
218+
// @ts-ignore
219+
job.analysis_limits.model_memory_limit = '31mb';
220+
221+
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
222+
const ids = messages.map(m => m.id);
223+
expect(ids).toEqual([]);
224+
});
225+
});
226+
227+
it('Called with no duration or split and mml above limit, no max setting, above effective max mml', () => {
228+
const job = getJobConfig();
229+
const duration = undefined;
230+
// @ts-ignore
231+
job.analysis_limits.model_memory_limit = '41mb';
232+
233+
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
234+
const ids = messages.map(m => m.id);
235+
expect(ids).toEqual(['mml_greater_than_effective_max_mml']);
236+
});
237+
});
238+
214239
it('Called with small number of detectors, so estimated mml is under specified mml, no max setting', () => {
215240
const dtrs = createDetectors(1);
216241
const job = getJobConfig(['instance'], dtrs);

x-pack/plugins/ml/server/models/job_validation/validate_model_memory_limit.ts

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
1010
import { validateJobObject } from './validate_job_object';
1111
import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
1212
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
13+
import { MlInfoResponse } from '../../../common/types/ml_server_info';
1314

1415
// The minimum value the backend expects is 1MByte
1516
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
@@ -50,9 +51,9 @@ export async function validateModelMemoryLimit(
5051

5152
// retrieve the max_model_memory_limit value from the server
5253
// this will be unset unless the user has set this on their cluster
53-
const maxModelMemoryLimit: string | undefined = (
54-
await callWithRequest('ml.info')
55-
)?.limits?.max_model_memory_limit?.toUpperCase();
54+
const info = await callWithRequest<MlInfoResponse>('ml.info');
55+
const maxModelMemoryLimit = info.limits.max_model_memory_limit?.toUpperCase();
56+
const effectiveMaxModelMemoryLimit = info.limits.effective_max_model_memory_limit?.toUpperCase();
5657

5758
if (runCalcModelMemoryTest) {
5859
const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
@@ -113,17 +114,35 @@ export async function validateModelMemoryLimit(
113114

114115
// if max_model_memory_limit has been set,
115116
// make sure the user defined MML is not greater than it
116-
if (maxModelMemoryLimit !== undefined && mml !== null) {
117-
// @ts-ignore
118-
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
117+
if (mml !== null) {
118+
let maxMmlExceeded = false;
119119
// @ts-ignore
120120
const mmlBytes = numeral(mml).value();
121-
if (mmlBytes > maxMmlBytes) {
122-
messages.push({
123-
id: 'mml_greater_than_max_mml',
124-
maxModelMemoryLimit,
125-
mml,
126-
});
121+
122+
if (maxModelMemoryLimit !== undefined) {
123+
// @ts-ignore
124+
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
125+
if (mmlBytes > maxMmlBytes) {
126+
maxMmlExceeded = true;
127+
messages.push({
128+
id: 'mml_greater_than_max_mml',
129+
maxModelMemoryLimit,
130+
mml,
131+
});
132+
}
133+
}
134+
135+
if (effectiveMaxModelMemoryLimit !== undefined && maxMmlExceeded === false) {
136+
// @ts-ignore
137+
const effectiveMaxMmlBytes = numeral(effectiveMaxModelMemoryLimit).value();
138+
if (mmlBytes > effectiveMaxMmlBytes) {
139+
messages.push({
140+
id: 'mml_greater_than_effective_max_mml',
141+
maxModelMemoryLimit,
142+
mml,
143+
effectiveMaxModelMemoryLimit,
144+
});
145+
}
127146
}
128147
}
129148

x-pack/plugins/translations/translations/ja-JP.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10066,7 +10066,6 @@
1006610066
"xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "ジョブ ID が無効です。アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります。",
1006710067
"xpack.ml.models.jobValidation.messages.jobIdValidHeading": "ジョブ ID のフォーマットは有効です。",
1006810068
"xpack.ml.models.jobValidation.messages.jobIdValidMessage": "アルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーライン、最初と最後を英数字にし、{maxLength, plural, one {# 文字} other {# 文字}}以内にする必要があります。",
10069-
"xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "モデルメモリー制限が、このクラスターに構成された最大モデルメモリー制限を超えています。",
1007010069
"xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} はモデルメモリー制限の有効な値ではありません。この値は最低 1MB で、バイト (例: 10MB) で指定する必要があります。",
1007110070
"xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "ジョブの構成の基本要件が満たされていないため、他のチェックをスキップしました。",
1007210071
"xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "バケットスパン",

x-pack/plugins/translations/translations/zh-CN.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10072,7 +10072,6 @@
1007210072
"xpack.ml.models.jobValidation.messages.jobIdInvalidMessage": "作业 ID 无效.其可以包含小写字母数字(a-z 和 0-9)字符、连字符或下划线,且必须以字母数字字符开头和结尾。",
1007310073
"xpack.ml.models.jobValidation.messages.jobIdValidHeading": "作业 ID 格式有效",
1007410074
"xpack.ml.models.jobValidation.messages.jobIdValidMessage": "小写字母数字(a-z 和 0-9)字符、连字符或下划线,以字母数字字符开头和结尾,且长度不超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。",
10075-
"xpack.ml.models.jobValidation.messages.mmlGreaterThanMaxMmlMessage": "模型内存限制大于为此集群配置的最大模型内存限制。",
1007610075
"xpack.ml.models.jobValidation.messages.mmlValueInvalidMessage": "{mml} 不是有效的模型内存限制值。该值需要至少 1MB,且应以字节为单位(例如 10MB)指定。",
1007710076
"xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage": "已跳过其他检查,因为未满足作业配置的基本要求。",
1007810077
"xpack.ml.models.jobValidation.messages.successBucketSpanHeading": "存储桶跨度",

0 commit comments

Comments
 (0)