Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deduplication of entries and items before sending to endpoint #71297

Merged
merged 3 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,139 @@ describe('buildEventTypeSignal', () => {
});
});

test('it should deduplicate exception entries', async () => {
const testEntries: EntriesArray = [
{ field: 'server.domain.text', operator: 'included', type: 'match', value: 'DOMAIN' },
{ field: 'server.domain.text', operator: 'included', type: 'match', value: 'DOMAIN' },
{ field: 'server.domain.text', operator: 'included', type: 'match', value: 'DOMAIN' },
{ field: 'server.ip', operator: 'included', type: 'match', value: '192.168.1.1' },
{
field: 'host.hostname.text',
operator: 'included',
type: 'match_any',
value: ['estc', 'kibana'],
},
];

const expectedEndpointExceptions = {
type: 'simple',
entries: [
{
field: 'server.domain',
operator: 'included',
type: 'exact_caseless',
value: 'DOMAIN',
},
{
field: 'server.ip',
operator: 'included',
type: 'exact_cased',
value: '192.168.1.1',
},
{
field: 'host.hostname',
operator: 'included',
type: 'exact_caseless_any',
value: ['estc', 'kibana'],
},
],
};

const first = getFoundExceptionListItemSchemaMock();
first.data[0].entries = testEntries;
mockExceptionClient.findExceptionListItem = jest.fn().mockReturnValueOnce(first);

const resp = await getFullEndpointExceptionList(mockExceptionClient, 'linux', 'v1');
expect(resp).toEqual({
entries: [expectedEndpointExceptions],
});
});

test('it should not deduplicate exception entries across nested boundaries', async () => {
const testEntries: EntriesArray = [
{
entries: [
{ field: 'nested.field', operator: 'included', type: 'match', value: 'some value' },
],
field: 'some.parentField',
type: 'nested',
},
// Same as above but not inside the nest
{ field: 'nested.field', operator: 'included', type: 'match', value: 'some value' },
];

const expectedEndpointExceptions = {
type: 'simple',
entries: [
{
entries: [
{
field: 'nested.field',
operator: 'included',
type: 'exact_cased',
value: 'some value',
},
],
field: 'some.parentField',
type: 'nested',
},
{
field: 'nested.field',
operator: 'included',
type: 'exact_cased',
value: 'some value',
},
],
};

const first = getFoundExceptionListItemSchemaMock();
first.data[0].entries = testEntries;
mockExceptionClient.findExceptionListItem = jest.fn().mockReturnValueOnce(first);

const resp = await getFullEndpointExceptionList(mockExceptionClient, 'linux', 'v1');
expect(resp).toEqual({
entries: [expectedEndpointExceptions],
});
});

test('it should deduplicate exception items', async () => {
const testEntries: EntriesArray = [
{ field: 'server.domain.text', operator: 'included', type: 'match', value: 'DOMAIN' },
{ field: 'server.ip', operator: 'included', type: 'match', value: '192.168.1.1' },
];

const expectedEndpointExceptions = {
type: 'simple',
entries: [
{
field: 'server.domain',
operator: 'included',
type: 'exact_caseless',
value: 'DOMAIN',
},
{
field: 'server.ip',
operator: 'included',
type: 'exact_cased',
value: '192.168.1.1',
},
],
};

const first = getFoundExceptionListItemSchemaMock();
first.data[0].entries = testEntries;

// Create a second exception item with the same entries
first.data[1] = getExceptionListItemSchemaMock();
first.data[1].entries = testEntries;
mockExceptionClient.findExceptionListItem = jest.fn().mockReturnValueOnce(first);

const resp = await getFullEndpointExceptionList(mockExceptionClient, 'linux', 'v1');
expect(resp).toEqual({
entries: [expectedEndpointExceptions],
});
});

test('it should ignore unsupported entries', async () => {
// Lists and exists are not supported by the Endpoint
const testEntries: EntriesArray = [
Expand Down Expand Up @@ -178,8 +311,9 @@ describe('buildEventTypeSignal', () => {
});

test('it should convert the exception lists response to the proper endpoint format while paging', async () => {
// The first call returns one exception
// The first call returns two exceptions
const first = getFoundExceptionListItemSchemaMock();
first.data.push(getExceptionListItemSchemaMock());

// The second call returns two exceptions
const second = getFoundExceptionListItemSchemaMock();
Expand All @@ -194,7 +328,8 @@ describe('buildEventTypeSignal', () => {
.mockReturnValueOnce(second)
.mockReturnValueOnce(third);
const resp = await getFullEndpointExceptionList(mockExceptionClient, 'linux', 'v1');
expect(resp.entries.length).toEqual(3);
// Expect 2 exceptions, the first two calls returned the same exception list items
expect(resp.entries.length).toEqual(2);
});

test('it should handle no exceptions', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,18 @@ export function translateToEndpointExceptions(
exc: FoundExceptionListItemSchema,
schemaVersion: string
): TranslatedExceptionListItem[] {
const entrySet = new Set();
Copy link
Contributor

@madirey madirey Jul 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would be cleaner using reduce with an accumulator that contains both the set and the filtered list... and then just return the list at the end. Or you could use filter, but using reduce could avoid the external state.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to use filter but avoided it because I thought it would just make things worse since I have to add to the set as well after checking if it exists.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think filter or reduce would be preferred over forEach here. My guess is that reduce will be the best fit, but again, I'm still learning.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @madirey here. Using reduce as she describes would be cleaner. However, I think it would be better if we pulled out the super duper deduping logic out of this function and into a separate function. That way we could test that logic independently. translateToEndpointExceptions and translateItem should do only that, translation. Deduping might be better handled elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if I agree, pulling this out would still force us to keep some sort of state within the translation functions, and just move the Set methods elsewhere. Not much use in testing that separately I think.

I'll look into filter or reduce

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We talked and now we like this. 💯

const entriesFiltered: TranslatedExceptionListItem[] = [];
if (schemaVersion === 'v1') {
return exc.data.map((item) => {
return translateItem(schemaVersion, item);
exc.data.forEach((entry) => {
const translatedItem = translateItem(schemaVersion, entry);
const entryHash = createHash('sha256').update(JSON.stringify(translatedItem)).digest('hex');
if (!entrySet.has(entryHash)) {
entriesFiltered.push(translatedItem);
entrySet.add(entryHash);
}
});
return entriesFiltered;
} else {
throw new Error('unsupported schemaVersion');
}
Expand All @@ -124,12 +132,17 @@ function translateItem(
schemaVersion: string,
item: ExceptionListItemSchema
): TranslatedExceptionListItem {
const itemSet = new Set();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again you could use a custom accumulator here to avoid the global state... not sure how much of a problem it is, maybe get another opinion on it. Looks like it will work though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not too well versed on this. Does this provide us with benefits other than syntactic sugar/style?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alexk307 I'm not the best authority on javascript, but my understanding is that it's best practice to avoid mutable state and mixed scope. This might not be so bad because itemSet is a const, but it's introducing a block scope variable that's (probably unnecessarily?) external to the scope of the reduce function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this too.

return {
type: item.type,
entries: item.entries.reduce((translatedEntries: TranslatedEntry[], entry) => {
const translatedEntry = translateEntry(schemaVersion, entry);
if (translatedEntry !== undefined && translatedEntryType.is(translatedEntry)) {
translatedEntries.push(translatedEntry);
const itemHash = createHash('sha256').update(JSON.stringify(translatedEntry)).digest('hex');
if (!itemSet.has(itemHash)) {
translatedEntries.push(translatedEntry);
itemSet.add(itemHash);
}
}
return translatedEntries;
}, []),
Expand Down