Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion modules/server/.env.schema
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS=false
CONFIG_PATH=./configs
DATA_MASK_THRESHOLD=
DEBUG=false
DOCUMENT_TYPE=''
DOWNLOAD_STREAM_BUFFER_SIZE=2000
ENABLE_DOCUMENT_HITS=true
ENABLE_LOGS=false
ES_HOST=http://localhost:9200
ES_INDEX=''
Expand All @@ -15,4 +17,3 @@ MAX_RESULTS_WINDOW=10000
PING_MS=2200
PING_PATH=/ping
PORT=5050
ENABLE_DOCUMENT_HITS=true
1 change: 1 addition & 0 deletions modules/server/src/config/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export const ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS = stringToBool(
process.env.ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS,
);
export const CONFIG_FILES_PATH = process.env.CONFIG_PATH || './configs';
export const DATA_MASK_THRESHOLD = process.env.DATA_MASK_THRESHOLD || Number.MAX_SAFE_INTEGER;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that if thresholding is enabled but no limit is set we default to everything being masked since we are using max int?

export const DEBUG_MODE = stringToBool(process.env.DEBUG);
export const DOCUMENT_TYPE = process.env.DOCUMENT_TYPE || '';
export const DOWNLOAD_STREAM_BUFFER_SIZE =
Expand Down
63 changes: 30 additions & 33 deletions modules/server/src/mapping/createConnectionResolvers.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import { IResolvers } from '@graphql-tools/utils';

import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types';
import { GetServerSideFilterFn } from '@/utils/getDefaultServerSideFilter';

import resolveAggregations from './resolveAggregations';
import resolveHits from './resolveHits';
import { createResolvers } from './resolvers';

// TODO: tighten these types
type CreateConnectionResolversArgs = {
export type CreateConnectionResolversArgs = {
createStateResolvers?: boolean;
enableAdmin: boolean;
enableDocumentHits: boolean;
dataMaskThreshold: number;
getServerSideFilter?: GetServerSideFilterFn;
Parallel: any;
type: Record<string, any>;
Expand All @@ -21,38 +20,36 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({
createStateResolvers = true,
enableAdmin,
enableDocumentHits,
dataMaskThreshold,
getServerSideFilter,
Parallel,
type,
}) => ({
[type.name]: {
aggregations: resolveAggregations({ type, getServerSideFilter }),
configs: async (parentObj, { fieldNames }: { fieldNames: string[] }) => {
return {
downloads: type.config?.[ConfigProperties.DOWNLOADS],
extended: fieldNames
? type.extendedFields.filter((extendedField: ExtendedConfigsInterface) =>
fieldNames.includes(extendedField.fieldName),
)
: type.extendedFields,
...(createStateResolvers && {
facets: type.config?.[ConfigProperties.FACETS],
matchbox: type.config?.[ConfigProperties.MATCHBOX],
table: type.config?.[ConfigProperties.TABLE],
}),
};
}) => {
const { aggregations, hits, configs } = createResolvers({
createStateResolvers,
type,
Parallel,
getServerSideFilter,
dataMaskThreshold,
enableDocumentHits,
});

return {
[type.name]: {
aggregations,
configs,
hits,
// keeping this available for backwards compatibility, but hoping to remove it
// TODO: investigate its current usage and need. remove otherwise
// Update 2023-02: ENABLE_ADMIN prevents error comes up on facets.
// `aggregation` vs numericAggregation` cannot be assessed, requires "mapping".
...(enableAdmin && {
mapping: async () => {
return type.mapping;
},
}),
},
...(enableDocumentHits && { hits: resolveHits({ type, Parallel, getServerSideFilter }) }),
// keeping this available for backwards compatibility, but hoping to remove it
// TODO: investigate its current usage and need. remove otherwise
// Update 2023-02: ENABLE_ADMIN prevents error comes up on facets.
// `aggregation` vs numericAggregation` cannot be assessed, requires "mapping".
...(enableAdmin && {
mapping: async () => {
return type.mapping;
},
}),
},
});
};
};

export default createConnectionResolvers;
43 changes: 19 additions & 24 deletions modules/server/src/mapping/createConnectionTypeDefs.js
Original file line number Diff line number Diff line change
@@ -1,51 +1,46 @@
import mappingToAggsType from './mappingToAggsType';

const generateHitsTypeString = (name, fieldsToExclude) => {
if (fieldsToExclude.includes('hits')) {
return '';
}
export default ({ type, fields = '', createStateTypeDefs = true, showRecords }) => {
const dataMaskingType = !showRecords ? 'type DataMasking { thresholdValue: Int }' : '';

return `
hits(
score: String
offset: Int
sort: [Sort]
filters: JSON
before: String
after: String
first: Int
last: Int
searchAfter: JSON
trackTotalHits: Boolean = true
): ${name}Connection`;
};

export default ({ type, fields = '', createStateTypeDefs = true, fieldsToExclude }) => {
return `
type ${type.name} {
aggregations(
filters: JSON

include_missing: Boolean
# Should term aggregations be affected by queries that contain filters on their field. For example if a query is filtering primary_site by Blood should the term aggregation on primary_site return all values or just Blood. Set to False for UIs that allow users to select multiple values of an aggregation.
aggregations_filter_themselves: Boolean
): ${type.name}Aggregations

${!showRecords ? 'dataMasking: DataMasking' : ''}

configs: ${createStateTypeDefs ? 'ConfigsWithState' : 'ConfigsWithoutState'}

${generateHitsTypeString(type.name, fieldsToExclude)}
hits(
score: String
offset: Int
sort: [Sort]
filters: JSON
before: String
after: String
first: Int
last: Int
searchAfter: JSON
trackTotalHits: Boolean = true
): ${type.name}Connection


mapping: JSON
}

type ${type.name}Aggregations {
${mappingToAggsType(type.mapping)}
}

${dataMaskingType}

type ${type.name}Connection {
total: Int!
edges: [${type.name}Edge]
${showRecords ? `edges: [${type.name}Edge]` : ''}
}

type ${type.name}Edge {
Expand Down
4 changes: 2 additions & 2 deletions modules/server/src/mapping/mappingToFields.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import mappingToObjectTypes from './mappingToObjectTypes';
import mappingToScalarFields from './mappingToScalarFields';

const mappingToFields = ({ enableDocumentHits, type, parent }) => {
const fieldsToExclude = enableDocumentHits ? [] : ['hits'];
const showRecords = enableDocumentHits;
return [
mappingToObjectTypes(type.name, type.mapping, parent, type.extendedFields),
Object.entries(type.mapping)
Expand All @@ -29,7 +29,7 @@ const mappingToFields = ({ enableDocumentHits, type, parent }) => {
type.customFields,
],
createStateTypeDefs: 'createState' in type ? type.createState : true,
fieldsToExclude,
showRecords,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

given the meaningful ternaries within createStateTypeDefs relying on negatives, it may be good to keep the same original name here (enableDocumentHits) to facilitate keeping the larger mental context. i.e. changing the name to this synonym doesn't add clarity while it does add complexity, now having to keep in mind "this means that".

...or better yet, make this hideHits if you seek to add clarity through simplicity, with a bonus of "not asking negatives" within that other scope.

}),
].join();
};
Expand Down
95 changes: 95 additions & 0 deletions modules/server/src/mapping/masking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { Aggregation } from './types';

/**
* This returns a total count that is less than or equal to the actual total hits in the query.
* It is calculated by adding +1 for values under threshold and bucket.doc_count
* for values greater than or equal to
*
* @param aggregation an aggregation with the most buckets which has data masking applied
* @returns hits total value
*/
const calculateHitsFromAggregation = ({
aggregation,
}: {
aggregation: Aggregation | undefined;
}) => {
if (!aggregation) {
console.error('No aggregation found for calculating hits.');
return 0;
}
return aggregation.buckets.reduce(
(totalAcc, bucket) => (bucket.belowThreshold ? totalAcc + 1 : totalAcc + bucket.doc_count),
0,
);
};

/**
*
* 1) Iterate through aggs applying data masking to buckets if applicable
* 2) Find the agg with the most bucket count and data masking applied to be used in calculating hits.total
*
* @param aggregations - aggregations from query
* @param thresholdMin - threshold value
* @returns aggregations with data masking applied and hits total
*/
export const applyAggregationMasking = ({
aggregations,
thresholdMin,
}: {
aggregations: Record<
string,
{
bucket_count: number;
buckets: Array<{
doc_count: number;
key: string;
}>;
}
>;
thresholdMin: number;
}) => {
Comment on lines +38 to +50
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very helpful to see the return type declaration. This one is quite large though and may benefit from using some type aliases. No immediate suggestions from me, but the content of aggregations is probably used in multiple places and a good candidate.

Somethign like (naming only very quickly considered):

type AggregationData = {
    bucket_count: number;
    buckets: Array<{
    doc_count: number;
    key: string;
  }>;
}

const applyAggregationMasking(...): { aggregations: Record<string, AggregationData>, thresholdMin: number } => {...}

// set data masked properties to one less than the configured threshold value (under threshold)
const THRESHOLD_REPLACEMENT_VALUE = thresholdMin - 1;

const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce<{
aggsTotal: Record<string, Aggregation>;
totalHitsAgg: { key: string; bucketCount: number };
}>(
({ aggsTotal, totalHitsAgg }, [type, aggregation]) => {
// mask buckets if under threshold
const dataMaskedBuckets = aggregation.buckets.map((bucket) =>
bucket.doc_count < thresholdMin
? { ...bucket, doc_count: THRESHOLD_REPLACEMENT_VALUE, belowThreshold: true }
: { ...bucket, belowThreshold: false },
);

// update total hits selected agg if needed
const bucketIsMasked = dataMaskedBuckets.some((bucket) => bucket.belowThreshold);
const hitsAgg =
totalHitsAgg.bucketCount < aggregation.bucket_count && bucketIsMasked
? { key: type, bucketCount: aggregation.bucket_count }
: totalHitsAgg;

return {
aggsTotal: {
...aggsTotal,
[type]: {
...aggregation,
buckets: dataMaskedBuckets,
},
},
totalHitsAgg: hitsAgg,
};
},
{
aggsTotal: {},
totalHitsAgg: { key: '', bucketCount: 0 },
},
);

const hitsTotal = calculateHitsFromAggregation({
aggregation: dataMaskedAggregations[totalHitsAgg.key],
});

return { hitsTotal, dataMaskedAggregations };
};
20 changes: 12 additions & 8 deletions modules/server/src/mapping/resolveAggregations.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import getFields from 'graphql-fields';

import { buildQuery, buildAggregations, flattenAggregations } from '../middleware';
import { buildAggregations, buildQuery, flattenAggregations } from '../middleware';

import { resolveSetsInSqon } from './hackyTemporaryEsSetResolution';
import esSearch from './utils/esSearch';
import compileFilter from './utils/compileFilter';
import esSearch from './utils/esSearch';

let toGraphqlField = (acc, [a, b]) => ({ ...acc, [a.replace(/\./g, '__')]: b });

export default ({ type, getServerSideFilter }) =>
async (
export default ({ type, getServerSideFilter }) => {
return async (
obj,
{ offset = 0, filters, aggregations_filter_themselves, include_missing = true },
{ filters, aggregations_filter_themselves, include_missing = true },
context,
info,
) => {
Expand Down Expand Up @@ -58,5 +56,11 @@ export default ({ type, getServerSideFilter }) =>
includeMissing: include_missing,
});

return Object.entries(aggregations).reduce(toGraphqlField, {});
return aggregations;
};
};

const toGraphqlField = (acc, [a, b]) => ({ ...acc, [a.replace(/\./g, '__')]: b });
export const aggregationsToGraphql = (aggregations) => {
return Object.entries(aggregations).reduce(toGraphqlField, {});
};
Loading