From 7061634baffba7b1b74957d534af4e57be67de0f Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 25 Nov 2024 07:28:00 -0500 Subject: [PATCH 01/21] add conditional prop to schema --- .../src/schema/{Aggregations.js => Aggregations.ts} | 8 +++++++- modules/server/src/schema/Root.js | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) rename modules/server/src/schema/{Aggregations.js => Aggregations.ts} (63%) diff --git a/modules/server/src/schema/Aggregations.js b/modules/server/src/schema/Aggregations.ts similarity index 63% rename from modules/server/src/schema/Aggregations.js rename to modules/server/src/schema/Aggregations.ts index dcb155ff8..10610f0ff 100644 --- a/modules/server/src/schema/Aggregations.js +++ b/modules/server/src/schema/Aggregations.ts @@ -1,4 +1,9 @@ -export let typeDefs = ` +/** + * + * @param enableDocumentHits if false, agg only mode is enabled, add to GQL schema definition + * @returns typedef string + */ +export const typeDefs = ({ enableDocumentHits }: { enableDocumentHits: boolean }) => ` type Stats { max: Float min: Float @@ -13,6 +18,7 @@ export let typeDefs = ` key_as_string: String top_hits(_source:[String], size:Int): JSON filter_by_term(filter: JSON): JSON + ${!enableDocumentHits ? 'belowThreshold: Boolean' : ''} } type NumericAggregations { diff --git a/modules/server/src/schema/Root.js b/modules/server/src/schema/Root.js index d5e3fed77..5e1d45918 100644 --- a/modules/server/src/schema/Root.js +++ b/modules/server/src/schema/Root.js @@ -65,7 +65,7 @@ let RootTypeDefs = ({ types, rootTypes, scalarTypes }) => ` export let typeDefs = ({ enableDocumentHits, types, rootTypes, scalarTypes }) => [ RootTypeDefs({ types, rootTypes, scalarTypes }), - AggregationsTypeDefs, + AggregationsTypeDefs({ enableDocumentHits }), SetTypeDefs, SortTypeDefs, ConfigsTypeDefs, From dab77afddacfba49e42665c61f7fdf406043131a Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 25 Nov 2024 11:29:23 -0500 Subject: [PATCH 02/21] conditional schema field --- .../src/mapping/createConnectionTypeDefs.js | 38 +++++++------------ modules/server/src/mapping/mappingToFields.js | 4 +- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/modules/server/src/mapping/createConnectionTypeDefs.js b/modules/server/src/mapping/createConnectionTypeDefs.js index 5b892ffce..7f8492fbe 100644 --- a/modules/server/src/mapping/createConnectionTypeDefs.js +++ b/modules/server/src/mapping/createConnectionTypeDefs.js @@ -1,26 +1,6 @@ import mappingToAggsType from './mappingToAggsType'; -const generateHitsTypeString = (name, fieldsToExclude) => { - if (fieldsToExclude.includes('hits')) { - return ''; - } - - return ` - hits( - score: String - offset: Int - sort: [Sort] - filters: JSON - before: String - after: String - first: Int - last: Int - searchAfter: JSON - trackTotalHits: Boolean = true - ): ${name}Connection`; -}; - -export default ({ type, fields = '', createStateTypeDefs = true, fieldsToExclude }) => { +export default ({ type, fields = '', createStateTypeDefs = true, showRecords }) => { return ` type ${type.name} { aggregations( @@ -33,9 +13,19 @@ export default ({ type, fields = '', createStateTypeDefs = true, fieldsToExclude configs: ${createStateTypeDefs ? 'ConfigsWithState' : 'ConfigsWithoutState'} - ${generateHitsTypeString(type.name, fieldsToExclude)} + hits( + score: String + offset: Int + sort: [Sort] + filters: JSON + before: String + after: String + first: Int + last: Int + searchAfter: JSON + trackTotalHits: Boolean = true + ): ${name}Connection - mapping: JSON } @@ -45,7 +35,7 @@ export default ({ type, fields = '', createStateTypeDefs = true, fieldsToExclude type ${type.name}Connection { total: Int! - edges: [${type.name}Edge] + ${showRecords ? `edges: [${type.name}Edge]` : ''} } type ${type.name}Edge { diff --git a/modules/server/src/mapping/mappingToFields.js b/modules/server/src/mapping/mappingToFields.js index eadd87022..d2899d6ed 100644 --- a/modules/server/src/mapping/mappingToFields.js +++ b/modules/server/src/mapping/mappingToFields.js @@ -6,7 +6,7 @@ import mappingToObjectTypes from './mappingToObjectTypes'; import mappingToScalarFields from './mappingToScalarFields'; const mappingToFields = ({ enableDocumentHits, type, parent }) => { - const fieldsToExclude = enableDocumentHits ? [] : ['hits']; + const showRecords = enableDocumentHits; return [ mappingToObjectTypes(type.name, type.mapping, parent, type.extendedFields), Object.entries(type.mapping) @@ -29,7 +29,7 @@ const mappingToFields = ({ enableDocumentHits, type, parent }) => { type.customFields, ], createStateTypeDefs: 'createState' in type ? type.createState : true, - fieldsToExclude, + showRecords, }), ].join(); }; From 8f7f1296c6cb0db1b8e938115ff01ae8d041ab81 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 25 Nov 2024 12:43:50 -0500 Subject: [PATCH 03/21] conditional add dataMasking schema fields --- modules/server/src/mapping/createConnectionTypeDefs.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/server/src/mapping/createConnectionTypeDefs.js b/modules/server/src/mapping/createConnectionTypeDefs.js index 7f8492fbe..b7c2f320c 100644 --- a/modules/server/src/mapping/createConnectionTypeDefs.js +++ b/modules/server/src/mapping/createConnectionTypeDefs.js @@ -1,16 +1,19 @@ import mappingToAggsType from './mappingToAggsType'; export default ({ type, fields = '', createStateTypeDefs = true, showRecords }) => { + const dataMaskingType = !showRecords ? 'type DataMasking { thresholdValue: Int }' : ''; + return ` type ${type.name} { aggregations( filters: JSON - include_missing: Boolean # Should term aggregations be affected by queries that contain filters on their field. For example if a query is filtering primary_site by Blood should the term aggregation on primary_site return all values or just Blood. Set to False for UIs that allow users to select multiple values of an aggregation. aggregations_filter_themselves: Boolean ): ${type.name}Aggregations + ${!showRecords ? 'dataMasking: DataMasking' : ''} + configs: ${createStateTypeDefs ? 'ConfigsWithState' : 'ConfigsWithoutState'} hits( @@ -24,7 +27,7 @@ export default ({ type, fields = '', createStateTypeDefs = true, showRecords }) last: Int searchAfter: JSON trackTotalHits: Boolean = true - ): ${name}Connection + ): ${type.name}Connection mapping: JSON } @@ -33,6 +36,8 @@ export default ({ type, fields = '', createStateTypeDefs = true, showRecords }) ${mappingToAggsType(type.mapping)} } + ${dataMaskingType} + type ${type.name}Connection { total: Int! ${showRecords ? `edges: [${type.name}Edge]` : ''} From 7af8cffd38d412164cbe681bb403ad647b0baf6e Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 25 Nov 2024 12:44:14 -0500 Subject: [PATCH 04/21] add data masking logic --- modules/server/src/mapping/masking.ts | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 modules/server/src/mapping/masking.ts diff --git a/modules/server/src/mapping/masking.ts b/modules/server/src/mapping/masking.ts new file mode 100644 index 000000000..927963500 --- /dev/null +++ b/modules/server/src/mapping/masking.ts @@ -0,0 +1,40 @@ +/** + * + * @param param0 + */ +export const applyAggregationMasking = ({ + aggregations, + thresholdMin, +}: { + aggregations: Record< + string, + { + bucket_count: number; + buckets: Array<{ + doc_count: number; + key: string; + }>; + } + >; + thresholdMin: number; +}) => { + const x = Object.entries(aggregations).reduce((acc, [aggName, aggValue]) => { + const buckets = aggValue.buckets; + const isApplyingThreshold = buckets.some((bucket) => bucket.doc_count < thresholdMin); + if (isApplyingThreshold) { + const modifiedAggValue = { + ...aggValue, + buckets: buckets.map((bucket) => ({ + ...bucket, + doc_count: thresholdMin - 1, + belowThreshold: true, + })), + }; + return { ...acc, [aggName]: modifiedAggValue }; + } + return { ...acc, [aggName]: aggValue }; + }, {}); + + console.log(JSON.stringify(x)); + return x; +}; From 5c00113ab6fc5f93b0fc06a638129befde7ac0f8 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 25 Nov 2024 15:11:39 -0500 Subject: [PATCH 05/21] add agg only conditional to resolvers --- .../src/mapping/createConnectionResolvers.ts | 87 ++++++++++++------- .../server/src/mapping/resolveAggregations.js | 30 +++++-- 2 files changed, 80 insertions(+), 37 deletions(-) diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index 8f8fbd482..5d0704e13 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -3,7 +3,8 @@ import { IResolvers } from '@graphql-tools/utils'; import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; import { GetServerSideFilterFn } from '@/utils/getDefaultServerSideFilter'; -import resolveAggregations from './resolveAggregations'; +import { parseResolveInfo } from 'graphql-parse-resolve-info'; +import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; import resolveHits from './resolveHits'; // TODO: tighten these types @@ -24,35 +25,61 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ getServerSideFilter, Parallel, type, -}) => ({ - [type.name]: { - aggregations: resolveAggregations({ type, getServerSideFilter }), - configs: async (parentObj, { fieldNames }: { fieldNames: string[] }) => { - return { - downloads: type.config?.[ConfigProperties.DOWNLOADS], - extended: fieldNames - ? type.extendedFields.filter((extendedField: ExtendedConfigsInterface) => - fieldNames.includes(extendedField.fieldName), - ) - : type.extendedFields, - ...(createStateResolvers && { - facets: type.config?.[ConfigProperties.FACETS], - matchbox: type.config?.[ConfigProperties.MATCHBOX], - table: type.config?.[ConfigProperties.TABLE], - }), - }; +}) => { + const configs = async (parentObj, { fieldNames }: { fieldNames: string[] }) => { + return { + downloads: type.config?.[ConfigProperties.DOWNLOADS], + extended: fieldNames + ? type.extendedFields.filter((extendedField: ExtendedConfigsInterface) => + fieldNames.includes(extendedField.fieldName), + ) + : type.extendedFields, + ...(createStateResolvers && { + facets: type.config?.[ConfigProperties.FACETS], + matchbox: type.config?.[ConfigProperties.MATCHBOX], + table: type.config?.[ConfigProperties.TABLE], + }), + }; + }; + + const aggregationsQuery = resolveAggregations({ type, getServerSideFilter }); + const aggregationsResolver = (obj, args, context, info) => { + const aggs = aggregationsQuery(obj, args, context, info); + console.log('queried', JSON.stringify(aggs)); + return aggregationsToGraphql(aggs); + }; + + const defaultHitsResolver = resolveHits({ type, Parallel, getServerSideFilter }); + const hitsResolver = enableDocumentHits + ? defaultHitsResolver + : async (obj, args, context, info) => { + console.log('alt hits'); + const parsedResolveInfo = parseResolveInfo(info); + console.log('parsed', JSON.stringify(parsedResolveInfo)); + + // IF query is querying aggregations + // calculate hits based on data masked values + // otherwise return 0 + + return { total: 0 }; + }; + + return { + [type.name]: { + aggregations: aggregationsResolver, + configs, + hits: hitsResolver, + // keeping this available for backwards compatibility, but hoping to remove it + // TODO: investigate its current usage and need. remove otherwise + // Update 2023-02: ENABLE_ADMIN prevents error comes up on facets. + // `aggregation` vs numericAggregation` cannot be assessed, requires "mapping". + ...(enableAdmin && { + mapping: async () => { + return type.mapping; + }, + }), }, - ...(enableDocumentHits && { hits: resolveHits({ type, Parallel, getServerSideFilter }) }), - // keeping this available for backwards compatibility, but hoping to remove it - // TODO: investigate its current usage and need. remove otherwise - // Update 2023-02: ENABLE_ADMIN prevents error comes up on facets. - // `aggregation` vs numericAggregation` cannot be assessed, requires "mapping". - ...(enableAdmin && { - mapping: async () => { - return type.mapping; - }, - }), - }, -}); + }; +}; export default createConnectionResolvers; diff --git a/modules/server/src/mapping/resolveAggregations.js b/modules/server/src/mapping/resolveAggregations.js index ecb058e32..618aa4e0f 100644 --- a/modules/server/src/mapping/resolveAggregations.js +++ b/modules/server/src/mapping/resolveAggregations.js @@ -1,15 +1,14 @@ import getFields from 'graphql-fields'; -import { buildQuery, buildAggregations, flattenAggregations } from '../middleware'; +import { buildAggregations, buildQuery, flattenAggregations } from '../middleware'; import { resolveSetsInSqon } from './hackyTemporaryEsSetResolution'; -import esSearch from './utils/esSearch'; +import { applyAggregationMasking } from './masking'; import compileFilter from './utils/compileFilter'; +import esSearch from './utils/esSearch'; -let toGraphqlField = (acc, [a, b]) => ({ ...acc, [a.replace(/\./g, '__')]: b }); - -export default ({ type, getServerSideFilter }) => - async ( +export default ({ type, getServerSideFilter }) => { + return async ( obj, { offset = 0, filters, aggregations_filter_themselves, include_missing = true }, context, @@ -58,5 +57,22 @@ export default ({ type, getServerSideFilter }) => includeMissing: include_missing, }); - return Object.entries(aggregations).reduce(toGraphqlField, {}); + /* + * Apply thresholding + */ + // TODO: check if buckets are even requested + console.log('aggregations', JSON.stringify(aggregations)); + // TODO: env var this value + const thresholdMin = 200; + + const result = applyAggregationMasking({ aggregations, thresholdMin }); + + console.log('thres result', result); + return result; }; +}; + +const toGraphqlField = (acc, [a, b]) => ({ ...acc, [a.replace(/\./g, '__')]: b }); +export const aggregationsToGraphql = (aggregations) => { + return Object.entries(aggregations).reduce(toGraphqlField, {}); +}; From a3ebbe49c991ba9b96ef3547b92231f8254e09ab Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 25 Nov 2024 15:16:18 -0500 Subject: [PATCH 06/21] clarify todo --- modules/server/src/mapping/createConnectionResolvers.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index 5d0704e13..c4a458292 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -57,6 +57,7 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ const parsedResolveInfo = parseResolveInfo(info); console.log('parsed', JSON.stringify(parsedResolveInfo)); + // TODO: // IF query is querying aggregations // calculate hits based on data masked values // otherwise return 0 From 7bbcb4b05944bc4fdf8fe6650089e4e6e35fa3c4 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Tue, 26 Nov 2024 13:35:33 -0500 Subject: [PATCH 07/21] get optional aggregations field and values for hits resolution --- .../src/mapping/createConnectionResolvers.ts | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index c4a458292..44c4d56a1 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -4,6 +4,7 @@ import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; import { GetServerSideFilterFn } from '@/utils/getDefaultServerSideFilter'; import { parseResolveInfo } from 'graphql-parse-resolve-info'; +import { calculateHitsFromAggregations } from './masking'; import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; import resolveHits from './resolveHits'; @@ -42,10 +43,11 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ }; }; + // TODO: memoise instead of context + // just same request really - maybe JSON.stringify const aggregationsQuery = resolveAggregations({ type, getServerSideFilter }); - const aggregationsResolver = (obj, args, context, info) => { - const aggs = aggregationsQuery(obj, args, context, info); - console.log('queried', JSON.stringify(aggs)); + const aggregationsResolver = async (obj, args, context, info) => { + const aggs = await aggregationsQuery(obj, args, context, info); return aggregationsToGraphql(aggs); }; @@ -53,16 +55,26 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ const hitsResolver = enableDocumentHits ? defaultHitsResolver : async (obj, args, context, info) => { - console.log('alt hits'); - const parsedResolveInfo = parseResolveInfo(info); - console.log('parsed', JSON.stringify(parsedResolveInfo)); + /* + * Checks for aggregations field in full query and retrieves args + * Popular parsing `info` libs do not include these operations properties + */ + const typeNameConnectionProperty = info.operation.selectionSet.selections[0]; + const isAggregationsQueried = typeNameConnectionProperty.selectionSet.selections.some( + (selection) => selection.name.value === 'aggregations', + ); - // TODO: - // IF query is querying aggregations - // calculate hits based on data masked values - // otherwise return 0 - - return { total: 0 }; + /* + * Calculate "hits" based on aggregations otherwise return 0 + */ + if (isAggregationsQueried) { + // other args are ok to pass through as they share context and parent field + const aggregations = await aggregationsQuery(obj, ...info.variableValues, context, info); + const total = calculateHitsFromAggregations({ aggregations }); + return { total }; + } else { + return { total: 0 }; + } }; return { From 426c46e6e77b7a240f099ecf9628c328534a0404 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Tue, 26 Nov 2024 13:41:18 -0500 Subject: [PATCH 08/21] add mask func stub --- modules/server/src/mapping/masking.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/modules/server/src/mapping/masking.ts b/modules/server/src/mapping/masking.ts index 927963500..aaa663bf2 100644 --- a/modules/server/src/mapping/masking.ts +++ b/modules/server/src/mapping/masking.ts @@ -38,3 +38,10 @@ export const applyAggregationMasking = ({ console.log(JSON.stringify(x)); return x; }; + +export const calculateHitsFromAggregations = ({ aggregations }) => { + console.log('calc hits', aggregations); + // iterate over aggregations and buckets + // calc buckets based on value for properties over threshold and +1 for values under threshold + // nb: if aggregation has all buckets over threshold, that will be accurate total hits value +}; From ce4f9f6b5d0277ce1e006e694d3fd8d9d617b177 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Tue, 26 Nov 2024 13:49:32 -0500 Subject: [PATCH 09/21] fix missing prop from spread --- modules/server/src/mapping/createConnectionResolvers.ts | 2 +- modules/server/src/mapping/masking.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index 44c4d56a1..7124528a7 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -69,7 +69,7 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ */ if (isAggregationsQueried) { // other args are ok to pass through as they share context and parent field - const aggregations = await aggregationsQuery(obj, ...info.variableValues, context, info); + const aggregations = await aggregationsQuery(obj, info.variableValues, context, info); const total = calculateHitsFromAggregations({ aggregations }); return { total }; } else { diff --git a/modules/server/src/mapping/masking.ts b/modules/server/src/mapping/masking.ts index aaa663bf2..b05d3cfed 100644 --- a/modules/server/src/mapping/masking.ts +++ b/modules/server/src/mapping/masking.ts @@ -44,4 +44,5 @@ export const calculateHitsFromAggregations = ({ aggregations }) => { // iterate over aggregations and buckets // calc buckets based on value for properties over threshold and +1 for values under threshold // nb: if aggregation has all buckets over threshold, that will be accurate total hits value + return -999; }; From 78bc6230db6fbe3b1d7002632d07155acfa47296 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Tue, 3 Dec 2024 16:39:57 -0500 Subject: [PATCH 10/21] masking logic --- modules/server/src/mapping/masking.ts | 97 ++++++++++++++++++++------- 1 file changed, 72 insertions(+), 25 deletions(-) diff --git a/modules/server/src/mapping/masking.ts b/modules/server/src/mapping/masking.ts index b05d3cfed..605d954aa 100644 --- a/modules/server/src/mapping/masking.ts +++ b/modules/server/src/mapping/masking.ts @@ -1,6 +1,37 @@ +type Bucket = { + doc_count: number; + key: string; + belowThreshold: boolean; +}; + +type Aggregation = { + bucket_count: number; + buckets: Bucket[]; +}; + /** + * This returns a total count that is less than or equal to the actual total hits in the query. + * It is calculated by adding +1 for values under threshold and bucket.doc_count + * for values greater than or equal to * - * @param param0 + * @param aggregation an aggregation with the most buckets which has data masking applied + * @returns hits total value + */ +const calculateHitsFromAggregation = ({ aggregation }: { aggregation: Aggregation }) => { + return aggregation.buckets.reduce( + (totalAcc, bucket) => (bucket.belowThreshold ? totalAcc + 1 : totalAcc + bucket.doc_count), + 0, + ); +}; + +/** + * + * 1) Iterate through aggs applying data masking to buckets if applicable + * 2) Find the agg with the most bucket count and data masking applied to be used in calculating hits.total + * + * @param aggregations - aggregations from query + * @param thresholdMin - threshold value + * @returns aggregations with data masking applied and hits total */ export const applyAggregationMasking = ({ aggregations, @@ -18,31 +49,47 @@ export const applyAggregationMasking = ({ >; thresholdMin: number; }) => { - const x = Object.entries(aggregations).reduce((acc, [aggName, aggValue]) => { - const buckets = aggValue.buckets; - const isApplyingThreshold = buckets.some((bucket) => bucket.doc_count < thresholdMin); - if (isApplyingThreshold) { - const modifiedAggValue = { - ...aggValue, - buckets: buckets.map((bucket) => ({ - ...bucket, - doc_count: thresholdMin - 1, - belowThreshold: true, - })), + const THRESHOLD_REPLACEMENT_VALUE = thresholdMin - 1; + + const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce( + ({ aggsTotal, totalHitsAgg }, [type, aggregation]) => { + + // mask buckets if under threshold + const dataMaskedBuckets = aggregation.buckets.map((bucket) => + bucket.doc_count < thresholdMin + ? { ...bucket, doc_count: THRESHOLD_REPLACEMENT_VALUE, belowThreshold: true } + : bucket, + ); + + // update total hits agg if needed + const bucketIsMasked = dataMaskedBuckets.some((bucket) => + Object.hasOwn(bucket, 'belowThreshold'), + ); + const hitsAgg = + totalHitsAgg.bucketCount < aggregation.bucket_count && bucketIsMasked + ? { key: type, bucketCount: aggregation.bucket_count } + : totalHitsAgg; + + return { + aggsTotal: { + ...aggsTotal, + [type]: { + ...aggregation, + buckets: dataMaskedBuckets, + }, + }, + totalHitsAgg: hitsAgg, }; - return { ...acc, [aggName]: modifiedAggValue }; - } - return { ...acc, [aggName]: aggValue }; - }, {}); + }, + { + aggsTotal: {}, + totalHitsAgg: { key: '', bucketCount: 0 }, + }, + ); - console.log(JSON.stringify(x)); - return x; -}; + const hitsTotal = calculateHitsFromAggregation({ + aggregation: dataMaskedAggregations[totalHitsAgg.key], + }); -export const calculateHitsFromAggregations = ({ aggregations }) => { - console.log('calc hits', aggregations); - // iterate over aggregations and buckets - // calc buckets based on value for properties over threshold and +1 for values under threshold - // nb: if aggregation has all buckets over threshold, that will be accurate total hits value - return -999; + return { hitsTotal, dataMaskedAggregations }; }; From c0a04130ffbc3133e16568d270bbf4acc6270402 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Wed, 4 Dec 2024 15:50:38 -0500 Subject: [PATCH 11/21] conditional resolve hits --- .../src/mapping/createConnectionResolvers.ts | 59 +++++++++++-------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index 7124528a7..fe3efdd73 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -3,8 +3,7 @@ import { IResolvers } from '@graphql-tools/utils'; import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; import { GetServerSideFilterFn } from '@/utils/getDefaultServerSideFilter'; -import { parseResolveInfo } from 'graphql-parse-resolve-info'; -import { calculateHitsFromAggregations } from './masking'; +import { applyAggregationMasking } from './masking'; import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; import resolveHits from './resolveHits'; @@ -43,39 +42,17 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ }; }; - // TODO: memoise instead of context - // just same request really - maybe JSON.stringify const aggregationsQuery = resolveAggregations({ type, getServerSideFilter }); const aggregationsResolver = async (obj, args, context, info) => { const aggs = await aggregationsQuery(obj, args, context, info); return aggregationsToGraphql(aggs); }; + // hits resolver doesnt have access to aggregations field const defaultHitsResolver = resolveHits({ type, Parallel, getServerSideFilter }); const hitsResolver = enableDocumentHits ? defaultHitsResolver - : async (obj, args, context, info) => { - /* - * Checks for aggregations field in full query and retrieves args - * Popular parsing `info` libs do not include these operations properties - */ - const typeNameConnectionProperty = info.operation.selectionSet.selections[0]; - const isAggregationsQueried = typeNameConnectionProperty.selectionSet.selections.some( - (selection) => selection.name.value === 'aggregations', - ); - - /* - * Calculate "hits" based on aggregations otherwise return 0 - */ - if (isAggregationsQueried) { - // other args are ok to pass through as they share context and parent field - const aggregations = await aggregationsQuery(obj, info.variableValues, context, info); - const total = calculateHitsFromAggregations({ aggregations }); - return { total }; - } else { - return { total: 0 }; - } - }; + : resolveHitsFromAggs(aggregationsQuery); return { [type.name]: { @@ -95,4 +72,34 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ }; }; +/** + * Resolve hits from aggregations + * If "aggregations" field is not in query, return 0 + * + * @param aggregationsQuery - resolver ES query code for aggregations + * @returns Returns a total count that is less than or equal to the actual total hits in the query. + */ +const resolveHitsFromAggs = (aggregationsQuery) => async (obj, args, context, info) => { + /* + * Get "aggregations" field from full query if found + * Popular gql parsing libs parse the "info" property which may not include full query based on schema + */ + const fileNameConnectionProperty = info.operation.selectionSet.selections[0]; + const aggregationsSelectionSet = fileNameConnectionProperty.selectionSet.selections.find( + (selection) => selection.name.value === 'aggregations', + ); + + if (aggregationsSelectionSet) { + const modifiedInfo = { ...info, fieldNodes: [aggregationsSelectionSet] }; + const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo); + const { hitsTotal: total } = applyAggregationMasking({ + aggregations, + thresholdMin: 200, + }); + return { total }; + } else { + return { total: 0 }; + } +}; + export default createConnectionResolvers; From 8790d77604dbd4f0ae5cf362738877c7448cd21b Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Wed, 4 Dec 2024 15:51:06 -0500 Subject: [PATCH 12/21] move thresholding higher on resolver chain --- modules/server/src/mapping/resolveAggregations.js | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/modules/server/src/mapping/resolveAggregations.js b/modules/server/src/mapping/resolveAggregations.js index 618aa4e0f..6c66cee69 100644 --- a/modules/server/src/mapping/resolveAggregations.js +++ b/modules/server/src/mapping/resolveAggregations.js @@ -57,18 +57,7 @@ export default ({ type, getServerSideFilter }) => { includeMissing: include_missing, }); - /* - * Apply thresholding - */ - // TODO: check if buckets are even requested - console.log('aggregations', JSON.stringify(aggregations)); - // TODO: env var this value - const thresholdMin = 200; - - const result = applyAggregationMasking({ aggregations, thresholdMin }); - - console.log('thres result', result); - return result; + return aggregations; }; }; From edec98ff0399e682d5ea002ca493b19b6d0d5966 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Wed, 4 Dec 2024 16:16:02 -0500 Subject: [PATCH 13/21] add falsey belowThreshold value instead of null --- modules/server/src/mapping/masking.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/modules/server/src/mapping/masking.ts b/modules/server/src/mapping/masking.ts index 605d954aa..e1992d8f9 100644 --- a/modules/server/src/mapping/masking.ts +++ b/modules/server/src/mapping/masking.ts @@ -53,18 +53,15 @@ export const applyAggregationMasking = ({ const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce( ({ aggsTotal, totalHitsAgg }, [type, aggregation]) => { - // mask buckets if under threshold const dataMaskedBuckets = aggregation.buckets.map((bucket) => bucket.doc_count < thresholdMin ? { ...bucket, doc_count: THRESHOLD_REPLACEMENT_VALUE, belowThreshold: true } - : bucket, + : { ...bucket, belowThreshold: false }, ); - // update total hits agg if needed - const bucketIsMasked = dataMaskedBuckets.some((bucket) => - Object.hasOwn(bucket, 'belowThreshold'), - ); + // update total hits selected agg if needed + const bucketIsMasked = dataMaskedBuckets.some((bucket) => bucket.belowThreshold); const hitsAgg = totalHitsAgg.bucketCount < aggregation.bucket_count && bucketIsMasked ? { key: type, bucketCount: aggregation.bucket_count } From b310cc0d3c367f3e3f9de2d70c287b7102f48f96 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Wed, 4 Dec 2024 16:58:49 -0500 Subject: [PATCH 14/21] data mask threshold env var --- modules/server/.env.schema | 3 +- modules/server/src/config/constants.ts | 1 + .../src/mapping/createConnectionResolvers.ts | 48 ++++++++++--------- .../server/src/mapping/resolveAggregations.js | 1 - modules/server/src/schema/Root.js | 2 + modules/server/src/server.js | 3 ++ 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/modules/server/.env.schema b/modules/server/.env.schema index 473ceec70..e9dcd741a 100644 --- a/modules/server/.env.schema +++ b/modules/server/.env.schema @@ -1,8 +1,10 @@ ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS=false CONFIG_PATH=./configs +DATA_MASK_THRESHOLD= DEBUG=false DOCUMENT_TYPE='' DOWNLOAD_STREAM_BUFFER_SIZE=2000 +ENABLE_DOCUMENT_HITS=true ENABLE_LOGS=false ES_HOST=http://localhost:9200 ES_INDEX='' @@ -15,4 +17,3 @@ MAX_RESULTS_WINDOW=10000 PING_MS=2200 PING_PATH=/ping PORT=5050 -ENABLE_DOCUMENT_HITS=true diff --git a/modules/server/src/config/constants.ts b/modules/server/src/config/constants.ts index fe7841c61..b5bd4c927 100644 --- a/modules/server/src/config/constants.ts +++ b/modules/server/src/config/constants.ts @@ -4,6 +4,7 @@ export const ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS = stringToBool( process.env.ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS, ); export const CONFIG_FILES_PATH = process.env.CONFIG_PATH || './configs'; +export const DATA_MASK_THRESHOLD = process.env.CONFIG_PATH || Number.MAX_SAFE_INTEGER; export const DEBUG_MODE = stringToBool(process.env.DEBUG); export const DOCUMENT_TYPE = process.env.DOCUMENT_TYPE || ''; export const DOWNLOAD_STREAM_BUFFER_SIZE = diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index fe3efdd73..eaf69c97f 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -12,6 +12,7 @@ type CreateConnectionResolversArgs = { createStateResolvers?: boolean; enableAdmin: boolean; enableDocumentHits: boolean; + dataMaskThreshold: number; getServerSideFilter?: GetServerSideFilterFn; Parallel: any; type: Record; @@ -22,6 +23,7 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ createStateResolvers = true, enableAdmin, enableDocumentHits, + dataMaskThreshold, getServerSideFilter, Parallel, type, @@ -48,11 +50,10 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ return aggregationsToGraphql(aggs); }; - // hits resolver doesnt have access to aggregations field const defaultHitsResolver = resolveHits({ type, Parallel, getServerSideFilter }); const hitsResolver = enableDocumentHits ? defaultHitsResolver - : resolveHitsFromAggs(aggregationsQuery); + : resolveHitsFromAggs(aggregationsQuery, dataMaskThreshold); return { [type.name]: { @@ -79,27 +80,28 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ * @param aggregationsQuery - resolver ES query code for aggregations * @returns Returns a total count that is less than or equal to the actual total hits in the query. */ -const resolveHitsFromAggs = (aggregationsQuery) => async (obj, args, context, info) => { - /* - * Get "aggregations" field from full query if found - * Popular gql parsing libs parse the "info" property which may not include full query based on schema - */ - const fileNameConnectionProperty = info.operation.selectionSet.selections[0]; - const aggregationsSelectionSet = fileNameConnectionProperty.selectionSet.selections.find( - (selection) => selection.name.value === 'aggregations', - ); +const resolveHitsFromAggs = + (aggregationsQuery, dataMaskThreshold) => async (obj, args, context, info) => { + /* + * Get "aggregations" field from full query if found + * Popular gql parsing libs parse the "info" property which may not include full query based on schema + */ + const fileNameConnectionProperty = info.operation.selectionSet.selections[0]; + const aggregationsSelectionSet = fileNameConnectionProperty.selectionSet.selections.find( + (selection) => selection.name.value === 'aggregations', + ); - if (aggregationsSelectionSet) { - const modifiedInfo = { ...info, fieldNodes: [aggregationsSelectionSet] }; - const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo); - const { hitsTotal: total } = applyAggregationMasking({ - aggregations, - thresholdMin: 200, - }); - return { total }; - } else { - return { total: 0 }; - } -}; + if (aggregationsSelectionSet) { + const modifiedInfo = { ...info, fieldNodes: [aggregationsSelectionSet] }; + const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo); + const { hitsTotal: total } = applyAggregationMasking({ + aggregations, + thresholdMin: dataMaskThreshold, + }); + return { total }; + } else { + return { total: 0 }; + } + }; export default createConnectionResolvers; diff --git a/modules/server/src/mapping/resolveAggregations.js b/modules/server/src/mapping/resolveAggregations.js index 6c66cee69..92dfdac51 100644 --- a/modules/server/src/mapping/resolveAggregations.js +++ b/modules/server/src/mapping/resolveAggregations.js @@ -3,7 +3,6 @@ import getFields from 'graphql-fields'; import { buildAggregations, buildQuery, flattenAggregations } from '../middleware'; import { resolveSetsInSqon } from './hackyTemporaryEsSetResolution'; -import { applyAggregationMasking } from './masking'; import compileFilter from './utils/compileFilter'; import esSearch from './utils/esSearch'; diff --git a/modules/server/src/schema/Root.js b/modules/server/src/schema/Root.js index 5e1d45918..1305e7b6a 100644 --- a/modules/server/src/schema/Root.js +++ b/modules/server/src/schema/Root.js @@ -77,6 +77,7 @@ let resolveObject = () => ({}); export let resolvers = ({ enableAdmin, enableDocumentHits, + dataMaskThreshold, types, rootTypes, scalarTypes, @@ -134,6 +135,7 @@ export let resolvers = ({ createStateResolvers: 'createState' in type ? type.createState : true, enableAdmin, enableDocumentHits, + dataMaskThreshold, getServerSideFilter, Parallel, type, diff --git a/modules/server/src/server.js b/modules/server/src/server.js index 60fb7b630..bfe03f31e 100644 --- a/modules/server/src/server.js +++ b/modules/server/src/server.js @@ -18,6 +18,7 @@ const { ES_PASS, ES_LOG, //TODO: ES doesn't include a logger anymore PING_PATH, + DATA_MASK_THRESHOLD, } = ENV_CONFIG; export const buildEsClient = (esHost = '', esUser = '', esPass = '') => { @@ -50,6 +51,7 @@ export default async ({ configsSource = CONFIG_FILES_PATH, enableAdmin = ENABLE_ADMIN, enableDocumentHits = ENABLE_DOCUMENT_HITS, + dataMaskThreshold = DATA_MASK_THRESHOLD, enableLogs = ENABLE_LOGS, esClient: customEsClient = undefined, esHost = ES_HOST, @@ -87,6 +89,7 @@ export default async ({ configsSource, enableAdmin, enableDocumentHits, + dataMaskThreshold, esClient, getServerSideFilter, graphqlOptions, From d8fcb2024642a228927ffbad48b26dafefd6b4f6 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Thu, 5 Dec 2024 10:19:18 -0500 Subject: [PATCH 15/21] update wrong process.env path --- modules/server/src/config/constants.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/server/src/config/constants.ts b/modules/server/src/config/constants.ts index b5bd4c927..441476de8 100644 --- a/modules/server/src/config/constants.ts +++ b/modules/server/src/config/constants.ts @@ -4,7 +4,7 @@ export const ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS = stringToBool( process.env.ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS, ); export const CONFIG_FILES_PATH = process.env.CONFIG_PATH || './configs'; -export const DATA_MASK_THRESHOLD = process.env.CONFIG_PATH || Number.MAX_SAFE_INTEGER; +export const DATA_MASK_THRESHOLD = process.env.DATA_MASK_THRESHOLD || Number.MAX_SAFE_INTEGER; export const DEBUG_MODE = stringToBool(process.env.DEBUG); export const DOCUMENT_TYPE = process.env.DOCUMENT_TYPE || ''; export const DOWNLOAD_STREAM_BUFFER_SIZE = From 46080c121c76e45c599cd9a7ecd97cd21e48ea8e Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Thu, 5 Dec 2024 16:46:29 -0500 Subject: [PATCH 16/21] seperate resolver creation from routing --- .../src/mapping/createConnectionResolvers.ts | 74 +++--------------- modules/server/src/mapping/resolvers.ts | 77 +++++++++++++++++++ 2 files changed, 88 insertions(+), 63 deletions(-) create mode 100644 modules/server/src/mapping/resolvers.ts diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index eaf69c97f..b1f3b9fca 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -1,11 +1,8 @@ import { IResolvers } from '@graphql-tools/utils'; -import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; import { GetServerSideFilterFn } from '@/utils/getDefaultServerSideFilter'; -import { applyAggregationMasking } from './masking'; -import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; -import resolveHits from './resolveHits'; +import { createResolvers } from './resolvers'; // TODO: tighten these types type CreateConnectionResolversArgs = { @@ -28,38 +25,20 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ Parallel, type, }) => { - const configs = async (parentObj, { fieldNames }: { fieldNames: string[] }) => { - return { - downloads: type.config?.[ConfigProperties.DOWNLOADS], - extended: fieldNames - ? type.extendedFields.filter((extendedField: ExtendedConfigsInterface) => - fieldNames.includes(extendedField.fieldName), - ) - : type.extendedFields, - ...(createStateResolvers && { - facets: type.config?.[ConfigProperties.FACETS], - matchbox: type.config?.[ConfigProperties.MATCHBOX], - table: type.config?.[ConfigProperties.TABLE], - }), - }; - }; - - const aggregationsQuery = resolveAggregations({ type, getServerSideFilter }); - const aggregationsResolver = async (obj, args, context, info) => { - const aggs = await aggregationsQuery(obj, args, context, info); - return aggregationsToGraphql(aggs); - }; - - const defaultHitsResolver = resolveHits({ type, Parallel, getServerSideFilter }); - const hitsResolver = enableDocumentHits - ? defaultHitsResolver - : resolveHitsFromAggs(aggregationsQuery, dataMaskThreshold); + const { aggregations, hits, configs } = createResolvers({ + createStateResolvers, + type, + Parallel, + getServerSideFilter, + dataMaskThreshold, + enableDocumentHits, + }); return { [type.name]: { - aggregations: aggregationsResolver, + aggregations, configs, - hits: hitsResolver, + hits, // keeping this available for backwards compatibility, but hoping to remove it // TODO: investigate its current usage and need. remove otherwise // Update 2023-02: ENABLE_ADMIN prevents error comes up on facets. @@ -73,35 +52,4 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({ }; }; -/** - * Resolve hits from aggregations - * If "aggregations" field is not in query, return 0 - * - * @param aggregationsQuery - resolver ES query code for aggregations - * @returns Returns a total count that is less than or equal to the actual total hits in the query. - */ -const resolveHitsFromAggs = - (aggregationsQuery, dataMaskThreshold) => async (obj, args, context, info) => { - /* - * Get "aggregations" field from full query if found - * Popular gql parsing libs parse the "info" property which may not include full query based on schema - */ - const fileNameConnectionProperty = info.operation.selectionSet.selections[0]; - const aggregationsSelectionSet = fileNameConnectionProperty.selectionSet.selections.find( - (selection) => selection.name.value === 'aggregations', - ); - - if (aggregationsSelectionSet) { - const modifiedInfo = { ...info, fieldNodes: [aggregationsSelectionSet] }; - const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo); - const { hitsTotal: total } = applyAggregationMasking({ - aggregations, - thresholdMin: dataMaskThreshold, - }); - return { total }; - } else { - return { total: 0 }; - } - }; - export default createConnectionResolvers; diff --git a/modules/server/src/mapping/resolvers.ts b/modules/server/src/mapping/resolvers.ts new file mode 100644 index 000000000..45f6d68c3 --- /dev/null +++ b/modules/server/src/mapping/resolvers.ts @@ -0,0 +1,77 @@ +import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; +import { applyAggregationMasking } from './masking'; +import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; +import resolveHits from './resolveHits'; + +/** + * Resolve hits from aggregations + * If "aggregations" field is not in query, return 0 + * + * @param aggregationsQuery - resolver ES query code for aggregations + * @returns Returns a total count that is less than or equal to the actual total hits in the query. + */ +const resolveHitsFromAggs = + (aggregationsQuery, dataMaskThreshold) => async (obj, args, context, info) => { + /* + * Get "aggregations" field from full query if found + * Popular gql parsing libs parse the "info" property which may not include full query based on schema + */ + const fileNameConnectionProperty = info.operation.selectionSet.selections[0]; + const aggregationsSelectionSet = fileNameConnectionProperty.selectionSet.selections.find( + (selection) => selection.name.value === 'aggregations', + ); + + if (aggregationsSelectionSet) { + const modifiedInfo = { ...info, fieldNodes: [aggregationsSelectionSet] }; + const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo); + const { hitsTotal: total } = applyAggregationMasking({ + aggregations, + thresholdMin: dataMaskThreshold, + }); + return { total }; + } else { + return { total: 0 }; + } + }; + +export const createResolvers = ({ + createStateResolvers, + type, + Parallel, + getServerSideFilter, + dataMaskThreshold, + enableDocumentHits, +}) => { + // configs + const configs = async (parentObj, { fieldNames }: { fieldNames: string[] }) => { + return { + downloads: type.config?.[ConfigProperties.DOWNLOADS], + extended: fieldNames + ? type.extendedFields.filter((extendedField: ExtendedConfigsInterface) => + fieldNames.includes(extendedField.fieldName), + ) + : type.extendedFields, + ...(createStateResolvers && { + facets: type.config?.[ConfigProperties.FACETS], + matchbox: type.config?.[ConfigProperties.MATCHBOX], + table: type.config?.[ConfigProperties.TABLE], + }), + }; + }; + + // aggregations + const aggregationsQuery = resolveAggregations({ type, getServerSideFilter }); + + const aggregations = async (obj, args, context, info) => { + const aggs = await aggregationsQuery(obj, args, context, info); + return aggregationsToGraphql(aggs); + }; + + // hits + const defaultHitsResolver = resolveHits({ type, Parallel, getServerSideFilter }); + const hits = enableDocumentHits + ? defaultHitsResolver + : resolveHitsFromAggs(aggregationsQuery, dataMaskThreshold); + + return { hits, aggregations, configs }; +}; From 75cb5ab2b25381f200a7798630b92f58713a5d2a Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Thu, 5 Dec 2024 16:58:09 -0500 Subject: [PATCH 17/21] check for undefiend value in lookup --- modules/server/src/mapping/resolvers.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/server/src/mapping/resolvers.ts b/modules/server/src/mapping/resolvers.ts index 45f6d68c3..8bd54875a 100644 --- a/modules/server/src/mapping/resolvers.ts +++ b/modules/server/src/mapping/resolvers.ts @@ -1,4 +1,5 @@ import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; +import { get } from 'lodash'; import { applyAggregationMasking } from './masking'; import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; import resolveHits from './resolveHits'; @@ -16,8 +17,8 @@ const resolveHitsFromAggs = * Get "aggregations" field from full query if found * Popular gql parsing libs parse the "info" property which may not include full query based on schema */ - const fileNameConnectionProperty = info.operation.selectionSet.selections[0]; - const aggregationsSelectionSet = fileNameConnectionProperty.selectionSet.selections.find( + const aggregationsPath = 'operation.selectionSet.selections[0].selectionSet.selections'; + const aggregationsSelectionSet = get(info, aggregationsPath, []).find( (selection) => selection.name.value === 'aggregations', ); From 13c7b5a282af669ed5a42ad2f8d54f16874cb061 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Thu, 5 Dec 2024 19:36:56 -0500 Subject: [PATCH 18/21] clarify comment --- modules/server/src/mapping/resolvers.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/server/src/mapping/resolvers.ts b/modules/server/src/mapping/resolvers.ts index 8bd54875a..6f2b6afa5 100644 --- a/modules/server/src/mapping/resolvers.ts +++ b/modules/server/src/mapping/resolvers.ts @@ -22,6 +22,10 @@ const resolveHitsFromAggs = (selection) => selection.name.value === 'aggregations', ); + /* + * This function is used for "aggregation only mode" of Arranger where "hits" is based on "aggregations" + * A user might request only the "hits" field in a GQL query, in which case return 0 + */ if (aggregationsSelectionSet) { const modifiedInfo = { ...info, fieldNodes: [aggregationsSelectionSet] }; const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo); From 4581e2bd69b8b8f6a4bfe7424d06bd2692ec3af0 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Thu, 5 Dec 2024 20:29:13 -0500 Subject: [PATCH 19/21] add threshold value explanation --- modules/server/src/mapping/masking.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/server/src/mapping/masking.ts b/modules/server/src/mapping/masking.ts index e1992d8f9..800334983 100644 --- a/modules/server/src/mapping/masking.ts +++ b/modules/server/src/mapping/masking.ts @@ -49,6 +49,7 @@ export const applyAggregationMasking = ({ >; thresholdMin: number; }) => { + // set data masked properties to one less than the configured threshold value (under threshold) const THRESHOLD_REPLACEMENT_VALUE = thresholdMin - 1; const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce( From 104813c24b9f54c31e825cd3038d0fff65c41547 Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 9 Dec 2024 17:27:10 -0500 Subject: [PATCH 20/21] move types to common file --- modules/server/src/mapping/masking.ts | 26 ++++++------ modules/server/src/mapping/resolvers.ts | 38 +++++++++++++++-- modules/server/src/mapping/types.ts | 55 +++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 15 deletions(-) create mode 100644 modules/server/src/mapping/types.ts diff --git a/modules/server/src/mapping/masking.ts b/modules/server/src/mapping/masking.ts index 800334983..a0b72ec23 100644 --- a/modules/server/src/mapping/masking.ts +++ b/modules/server/src/mapping/masking.ts @@ -1,13 +1,4 @@ -type Bucket = { - doc_count: number; - key: string; - belowThreshold: boolean; -}; - -type Aggregation = { - bucket_count: number; - buckets: Bucket[]; -}; +import { Aggregation } from './types'; /** * This returns a total count that is less than or equal to the actual total hits in the query. @@ -17,7 +8,15 @@ type Aggregation = { * @param aggregation an aggregation with the most buckets which has data masking applied * @returns hits total value */ -const calculateHitsFromAggregation = ({ aggregation }: { aggregation: Aggregation }) => { +const calculateHitsFromAggregation = ({ + aggregation, +}: { + aggregation: Aggregation | undefined; +}) => { + if (!aggregation) { + console.error('No aggregation found for calculating hits.'); + return 0; + } return aggregation.buckets.reduce( (totalAcc, bucket) => (bucket.belowThreshold ? totalAcc + 1 : totalAcc + bucket.doc_count), 0, @@ -52,7 +51,10 @@ export const applyAggregationMasking = ({ // set data masked properties to one less than the configured threshold value (under threshold) const THRESHOLD_REPLACEMENT_VALUE = thresholdMin - 1; - const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce( + const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce<{ + aggsTotal: Record; + totalHitsAgg: { key: string; bucketCount: number }; + }>( ({ aggsTotal, totalHitsAgg }, [type, aggregation]) => { // mask buckets if under threshold const dataMaskedBuckets = aggregation.buckets.map((bucket) => diff --git a/modules/server/src/mapping/resolvers.ts b/modules/server/src/mapping/resolvers.ts index 6f2b6afa5..3ef751c98 100644 --- a/modules/server/src/mapping/resolvers.ts +++ b/modules/server/src/mapping/resolvers.ts @@ -1,8 +1,11 @@ import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; +import { GraphQLResolveInfo } from 'graphql'; import { get } from 'lodash'; +import { Context } from 'vm'; import { applyAggregationMasking } from './masking'; import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; import resolveHits from './resolveHits'; +import { Aggregation, Hits, Root } from './types'; /** * Resolve hits from aggregations @@ -12,7 +15,20 @@ import resolveHits from './resolveHits'; * @returns Returns a total count that is less than or equal to the actual total hits in the query. */ const resolveHitsFromAggs = - (aggregationsQuery, dataMaskThreshold) => async (obj, args, context, info) => { + ( + aggregationsQuery: ( + obj: Root, + args: { + filters?: object; + include_missing?: boolean; + aggregations_filter_themselves?: boolean; + }, + context: Context, + info: GraphQLResolveInfo, + ) => Record, + dataMaskThreshold: number, + ) => + async (obj: Root, args: Hits, context: Context, info: GraphQLResolveInfo) => { /* * Get "aggregations" field from full query if found * Popular gql parsing libs parse the "info" property which may not include full query based on schema @@ -46,9 +62,16 @@ export const createResolvers = ({ getServerSideFilter, dataMaskThreshold, enableDocumentHits, +}: { + createStateResolvers: boolean; + type; + Parallel; + getServerSideFilter; + dataMaskThreshold: number; + enableDocumentHits: boolean; }) => { // configs - const configs = async (parentObj, { fieldNames }: { fieldNames: string[] }) => { + const configs = async (parentObj: Root, { fieldNames }: { fieldNames: string[] }) => { return { downloads: type.config?.[ConfigProperties.DOWNLOADS], extended: fieldNames @@ -67,7 +90,16 @@ export const createResolvers = ({ // aggregations const aggregationsQuery = resolveAggregations({ type, getServerSideFilter }); - const aggregations = async (obj, args, context, info) => { + const aggregations = async ( + obj: Root, + args: { + filters?: object; + include_missing?: boolean; + aggregations_filter_themselves?: boolean; + }, + context: Context, + info: GraphQLResolveInfo, + ) => { const aggs = await aggregationsQuery(obj, args, context, info); return aggregationsToGraphql(aggs); }; diff --git a/modules/server/src/mapping/types.ts b/modules/server/src/mapping/types.ts new file mode 100644 index 000000000..3cf726f18 --- /dev/null +++ b/modules/server/src/mapping/types.ts @@ -0,0 +1,55 @@ +import { Client } from '@elastic/elasticsearch'; + +export type Bucket = { + doc_count: number; + key: string; + belowThreshold: boolean; +}; + +export type Aggregation = { + bucket_count: number; + buckets: Bucket[]; +}; + +export type Root = Record; + +enum Missing { + first, + last, +} + +enum Mode { + avg, + max, + min, + sum, +} + +enum Order { + asc, + desc, +} + +export type Sort = { + fieldName: string; + order: Order; + mode: Mode; + missing: Missing; +}; + +export type Hits = { + score: string; + offset: number; + sort: [Sort]; + filters: JSON; + before: string; + after: string; + first: number; + last: number; + searchAfter: JSON; + trackTotalHits: boolean; +}; + +export type Context = { + es: Client; +}; From b792460747e46fe053b5464aece86ba3f49732eb Mon Sep 17 00:00:00 2001 From: Ciaran Schutte Date: Mon, 9 Dec 2024 18:36:19 -0500 Subject: [PATCH 21/21] typing --- .../src/mapping/createConnectionResolvers.ts | 2 +- .../server/src/mapping/resolveAggregations.js | 2 +- modules/server/src/mapping/resolvers.ts | 26 +++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/server/src/mapping/createConnectionResolvers.ts b/modules/server/src/mapping/createConnectionResolvers.ts index b1f3b9fca..01318bf25 100644 --- a/modules/server/src/mapping/createConnectionResolvers.ts +++ b/modules/server/src/mapping/createConnectionResolvers.ts @@ -5,7 +5,7 @@ import { GetServerSideFilterFn } from '@/utils/getDefaultServerSideFilter'; import { createResolvers } from './resolvers'; // TODO: tighten these types -type CreateConnectionResolversArgs = { +export type CreateConnectionResolversArgs = { createStateResolvers?: boolean; enableAdmin: boolean; enableDocumentHits: boolean; diff --git a/modules/server/src/mapping/resolveAggregations.js b/modules/server/src/mapping/resolveAggregations.js index 92dfdac51..aeb4b2d0c 100644 --- a/modules/server/src/mapping/resolveAggregations.js +++ b/modules/server/src/mapping/resolveAggregations.js @@ -9,7 +9,7 @@ import esSearch from './utils/esSearch'; export default ({ type, getServerSideFilter }) => { return async ( obj, - { offset = 0, filters, aggregations_filter_themselves, include_missing = true }, + { filters, aggregations_filter_themselves, include_missing = true }, context, info, ) => { diff --git a/modules/server/src/mapping/resolvers.ts b/modules/server/src/mapping/resolvers.ts index 3ef751c98..e34f05ef4 100644 --- a/modules/server/src/mapping/resolvers.ts +++ b/modules/server/src/mapping/resolvers.ts @@ -2,6 +2,7 @@ import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types'; import { GraphQLResolveInfo } from 'graphql'; import { get } from 'lodash'; import { Context } from 'vm'; +import { CreateConnectionResolversArgs } from './createConnectionResolvers'; import { applyAggregationMasking } from './masking'; import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations'; import resolveHits from './resolveHits'; @@ -35,7 +36,7 @@ const resolveHitsFromAggs = */ const aggregationsPath = 'operation.selectionSet.selections[0].selectionSet.selections'; const aggregationsSelectionSet = get(info, aggregationsPath, []).find( - (selection) => selection.name.value === 'aggregations', + (selection) => selection.kind === 'Field' && selection.name.value === 'aggregations', ); /* @@ -44,6 +45,9 @@ const resolveHitsFromAggs = */ if (aggregationsSelectionSet) { const modifiedInfo = { ...info, fieldNodes: [aggregationsSelectionSet] }; + // @ts-ignore + // modifying the query info field inline so it can query aggregations correctly + // not idiomatic so doesn't line up with typings from graphql const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo); const { hitsTotal: total } = applyAggregationMasking({ aggregations, @@ -62,14 +66,7 @@ export const createResolvers = ({ getServerSideFilter, dataMaskThreshold, enableDocumentHits, -}: { - createStateResolvers: boolean; - type; - Parallel; - getServerSideFilter; - dataMaskThreshold: number; - enableDocumentHits: boolean; -}) => { +}: Omit) => { // configs const configs = async (parentObj: Root, { fieldNames }: { fieldNames: string[] }) => { return { @@ -93,9 +90,9 @@ export const createResolvers = ({ const aggregations = async ( obj: Root, args: { - filters?: object; - include_missing?: boolean; - aggregations_filter_themselves?: boolean; + filters: object; + include_missing: boolean; + aggregations_filter_themselves: boolean; }, context: Context, info: GraphQLResolveInfo, @@ -108,7 +105,10 @@ export const createResolvers = ({ const defaultHitsResolver = resolveHits({ type, Parallel, getServerSideFilter }); const hits = enableDocumentHits ? defaultHitsResolver - : resolveHitsFromAggs(aggregationsQuery, dataMaskThreshold); + : // @ts-ignore + // typing resolveAggregations requires typing a lot of code down the chain + // TODO: improve typing + resolveHitsFromAggs(aggregationsQuery, dataMaskThreshold); return { hits, aggregations, configs }; };