Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 114 additions & 1 deletion packages/core/src/services/toolOutputMaskingService.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ import {
ToolOutputMaskingService,
MASKING_INDICATOR_TAG,
} from './toolOutputMaskingService.js';
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
import {
SHELL_TOOL_NAME,
ACTIVATE_SKILL_TOOL_NAME,
MEMORY_TOOL_NAME,
} from '../tools/tool-names.js';
import { estimateTokenCountSync } from '../utils/tokenCalculation.js';
import type { Config } from '../config/config.js';
import type { Content, Part } from '@google/genai';
Expand Down Expand Up @@ -511,4 +515,113 @@ describe('ToolOutputMaskingService', () => {
const result = await service.mask(history, mockConfig);
expect(result.maskedCount).toBe(0); // padding is protected, tiny_tool would increase size
});

it('should never mask exempt tools (like activate_skill) even if they are deep in history', async () => {
const history: Content[] = [
{
role: 'user',
parts: [
{
functionResponse: {
name: ACTIVATE_SKILL_TOOL_NAME,
response: { output: 'High value instructions for skill' },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: MEMORY_TOOL_NAME,
response: { output: 'Important user preference' },
},
},
],
},
{
role: 'user',
parts: [
{
functionResponse: {
name: 'bulky_tool',
response: { output: 'A'.repeat(60000) },
},
},
],
},
// Protection buffer
{
role: 'user',
parts: [
{
functionResponse: {
name: 'padding',
response: { output: 'B'.repeat(60000) },
},
},
],
},
{ role: 'user', parts: [{ text: 'latest' }] },
];

mockedEstimateTokenCountSync.mockImplementation((parts: Part[]) => {
const resp = parts[0].functionResponse?.response as Record<
string,
unknown
>;
const content = (resp?.['output'] as string) ?? JSON.stringify(resp);
if (content.includes(`<${MASKING_INDICATOR_TAG}`)) return 100;

const name = parts[0].functionResponse?.name;
if (name === ACTIVATE_SKILL_TOOL_NAME) return 1000;
if (name === MEMORY_TOOL_NAME) return 500;
if (name === 'bulky_tool') return 60000;
if (name === 'padding') return 60000;
return 10;
});

const result = await service.mask(history, mockConfig);

// Both 'bulky_tool' and 'padding' should be masked.
// 'padding' (Index 3) crosses the 50k protection boundary immediately.
// ACTIVATE_SKILL and MEMORY are exempt.
expect(result.maskedCount).toBe(2);
expect(result.newHistory[0].parts?.[0].functionResponse?.name).toBe(
ACTIVATE_SKILL_TOOL_NAME,
);
expect(
(
result.newHistory[0].parts?.[0].functionResponse?.response as Record<
string,
unknown
>
)['output'],
).toBe('High value instructions for skill');

expect(result.newHistory[1].parts?.[0].functionResponse?.name).toBe(
MEMORY_TOOL_NAME,
);
expect(
(
result.newHistory[1].parts?.[0].functionResponse?.response as Record<
string,
unknown
>
)['output'],
).toBe('Important user preference');

expect(result.newHistory[2].parts?.[0].functionResponse?.name).toBe(
'bulky_tool',
);
expect(
(
result.newHistory[2].parts?.[0].functionResponse?.response as Record<
string,
unknown
>
)['output'],
).toContain(MASKING_INDICATOR_TAG);
});
});
26 changes: 25 additions & 1 deletion packages/core/src/services/toolOutputMaskingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@ import { debugLogger } from '../utils/debugLogger.js';
import { sanitizeFilenamePart } from '../utils/fileUtils.js';
import type { Config } from '../config/config.js';
import { logToolOutputMasking } from '../telemetry/loggers.js';
import { SHELL_TOOL_NAME } from '../tools/tool-names.js';
import {
SHELL_TOOL_NAME,
ACTIVATE_SKILL_TOOL_NAME,
MEMORY_TOOL_NAME,
ASK_USER_TOOL_NAME,
ENTER_PLAN_MODE_TOOL_NAME,
EXIT_PLAN_MODE_TOOL_NAME,
} from '../tools/tool-names.js';
import { ToolOutputMaskingEvent } from '../telemetry/types.js';

// Tool output masking defaults
Expand All @@ -23,6 +30,18 @@ export const MASKING_INDICATOR_TAG = 'tool_output_masked';

export const TOOL_OUTPUTS_DIR = 'tool-outputs';

/**
* Tools whose outputs are always high-signal and should never be masked,
* regardless of their position in the conversation history.
*/
const EXEMPT_TOOLS = new Set([
ACTIVATE_SKILL_TOOL_NAME,
MEMORY_TOOL_NAME,
ASK_USER_TOOL_NAME,
ENTER_PLAN_MODE_TOOL_NAME,
EXIT_PLAN_MODE_TOOL_NAME,
]);

export interface MaskingResult {
newHistory: Content[];
maskedCount: number;
Expand Down Expand Up @@ -89,6 +108,11 @@ export class ToolOutputMaskingService {
// core intent and logic, which are harder for the model to recover if lost.
if (!part.functionResponse) continue;

const toolName = part.functionResponse.name;
if (toolName && EXEMPT_TOOLS.has(toolName)) {
continue;
}

const toolOutputContent = this.getToolOutputContent(part);
if (!toolOutputContent || this.isAlreadyMasked(toolOutputContent)) {
continue;
Expand Down
Loading