feat(#413): Expand image query pipeline to query fields with missing …

…content
Duke-MatSci · Jun 23, 2023 · 4fbf8d2 · 4fbf8d2
1 parent 8cc87f8
commit 4fbf8d2
Show file tree

Hide file tree

Showing 2 changed files with 253 additions and 24 deletions.
diff --git a/resfulservice/src/graphql/resolver/imageExplorer/query.js b/resfulservice/src/graphql/resolver/imageExplorer/query.js
@@ -1,21 +1,22 @@
 const { imageTransformer } = require('../../transformer');
 const errorFormater = require('../../../utils/errorFormater');
 const paginator = require('../../../utils/paginator');
-const { validateImageSearchOptions, imageQuery } = require('../../../pipelines/image-pipeline');
+const { validateImageSearchOptions, validateImageSearchOptionsForUndefinedContent, imageQuery } = require('../../../pipelines/image-pipeline');
 
 const imageExplorerQuery = {
   searchImages: async (_, { input }, { user, req, isAuthenticated }) => {
     req.logger?.info('[searchImages]: Function entry');
     try {
       const search = validateImageSearchOptions(input);
+      const searchArgs = { search, selectedImgWithoutContent: validateImageSearchOptionsForUndefinedContent(input), input };
+
       const pagination = input
-        ? paginator((await imageQuery({ search, input }))?.counts, input.pageNumber, input.pageSize)
-        : paginator((await imageQuery({ search, input }))?.counts);
+        ? paginator((await imageQuery({ ...searchArgs }))?.counts, input.pageNumber, input.pageSize)
+        : paginator((await imageQuery({ ...searchArgs }))?.counts);
       const images = imageTransformer((await imageQuery({
-        search,
+        ...searchArgs,
         skip: pagination.skip,
-        limit: pagination.limit,
-        input
+        limit: pagination.limit
       })).images);
       return Object.assign(pagination, { images });
     } catch (err) {
@@ -30,7 +31,8 @@ const imageExplorerQuery = {
       const pagination = input
         ? paginator((await imageQuery())?.counts, input.pageNumber, input.pageSize)
         : paginator((await imageQuery())?.counts);
-      const images = imageTransformer((await imageQuery({ skip: pagination.skip, limit: pagination.limit }))?.images);
+      const imgArray = await imageQuery({ skip: pagination.skip, limit: pagination.limit });
+      const images = imageTransformer(imgArray.images);
       return Object.assign(pagination, { images });
     } catch (err) {
       req.logger?.error(`[images]: ${err}`);

diff --git a/resfulservice/src/pipelines/image-pipeline.js b/resfulservice/src/pipelines/image-pipeline.js
@@ -5,13 +5,179 @@ const errorFormater = require('../utils/errorFormater');
 const targetField = 'content.PolymerNanocomposite.MICROSTRUCTURE.ImageFile';
 const commonFields = 'content.PolymerNanocomposite.DATA_SOURCE.Citation.CommonFields';
 const fillerFields = 'content.PolymerNanocomposite.MATERIALS.Filler.FillerComponent';
+const imgWithoutContentField = '$xml_str';
+const listCap = 1000; // Pull as many as a thousand from the generated list array
+
+const imageWithoutContentQuery = (search, selectedImg) => {
+  const stages = [];
+
+  // Stage this by default
+  stages.push({
+    $match: {
+      content: {
+        $eq: undefined
+      },
+      xml_str: {
+        $regex: '<MICROSTRUCTURE><ImageFile>' // Check if Image exist first
+      }
+    }
+  });
+
+  if (search.length) {
+    search.map(query => stages.push(query));
+  }
+
+  // This logic is used to select a single image. A Mongoose Object ID must be provided in the request.
+  if (selectedImg) {
+    stages.push({
+      $match: {
+        _id: mongoose.Types.ObjectId(selectedImg)
+      }
+    });
+  }
+
+  // Add metadata field
+  stages.push({
+    $addFields: {
+      'metaData.title': {
+        $arrayElemAt: [{ $split: [{ $arrayElemAt: [{ $split: [imgWithoutContentField, '<Control_ID>'] }, 1] }, '</Control_ID>'] }, 0]
+      },
+      'metaData.sampleId': {
+        $arrayElemAt: [{ $split: [{ $arrayElemAt: [{ $split: [imgWithoutContentField, '<ID>'] }, 1] }, '</ID>'] }, 0]
+      },
+      'metaData.id': '$_id',
+      'metaData.doi': {
+        $arrayElemAt: [{ $split: [{ $arrayElemAt: [{ $split: [imgWithoutContentField, '<DOI>'] }, 1] }, '</DOI>'] }, 0]
+      },
+      prepFile: { $split: [imgWithoutContentField, '<ImageFile><File>'] },
+      prepAuthors: { $split: [imgWithoutContentField, '<Author>'] },
+      prepKeywords: { $split: [imgWithoutContentField, '<Keyword>'] }
+    }
+  });
+
+  // Verify above prepped list exist
+  stages.push({
+    $addFields: {
+      keywordsList: {
+        $cond: [
+          {
+            $gt: [
+              {
+                $size: '$prepKeywords'
+              }, 1
+            ]
+          }, '$prepKeywords', []
+        ]
+      },
+      authorsList: {
+        $cond: [
+          {
+            $gt: [
+              {
+                $size: '$prepAuthors'
+              }, 1
+            ]
+          }, '$prepAuthors', []
+        ]
+      }
+    }
+  });
+
+  // Generate list based on verifed prepped list
+  stages.push({
+    $addFields: {
+      imageFile: {
+        $map: {
+          input: {
+            $slice: [
+              '$prepFile', 2, listCap
+            ]
+          },
+          as: 'currImage',
+          in: {
+            $arrayElemAt: [
+              {
+                $split: [
+                  '$$currImage', '</File>'
+                ]
+              }, 0
+            ]
+          }
+        }
+      },
+      'metaData.keywords': {
+        $map: {
+          input: {
+            $slice: [
+              '$keywordsList', 2, listCap
+            ]
+          },
+          as: 'currKeywords',
+          in: {
+            $arrayElemAt: [
+              {
+                $split: [
+                  '$$currKeywords', '</Keyword>'
+                ]
+              }, 0
+            ]
+          }
+        }
+      },
+      'metaData.authors': {
+        $map: {
+          input: {
+            $slice: [
+              '$authorsList', 1, listCap
+            ]
+          },
+          as: 'currAuthors',
+          in: {
+            $arrayElemAt: [
+              {
+                $split: [
+                  '$$currAuthors', '</Author>'
+                ]
+              }, 0
+            ]
+          }
+        }
+      }
+    }
+  });
+
+  stages.push({
+    $unwind: {
+      path: '$imageFile',
+      preserveNullAndEmptyArrays: false
+    }
+  });
+
+  stages.push({
+    $addFields: {
+      'image.metaData': '$metaData',
+      'image.File': '$imageFile',
+      'image.Description': '$title'
+    }
+  });
+
+  stages.push({
+    $project: {
+      _id: 0,
+      images: '$image'
+    }
+  });
+
+  return stages;
+};
 
 exports.imageQuery = async (args) => {
   const skip = args?.skip ?? 0;
   const limit = args?.limit ?? 20;
   const search = args?.search ?? false;
   const input = args?.input ?? false;
   const selectedImg = args?.selectedImg ?? false;
+  const selectImageWithContentQuery = args?.selectedImgWithoutContent ?? false;
 
   const stages = [];
 
@@ -91,29 +257,43 @@ exports.imageQuery = async (args) => {
     }
   });
 
-  // Regrouping after removing undefined files
-  stages.push({
-    $group: {
-      _id: 'null',
-      image: {
-        $push: '$image'
-      }
-    }
-  });
-
   stages.push({
     $project: {
       _id: 0,
-      counts: { $size: '$image' },
-      images: {
-        $slice: ['$image', skip, limit]
-      }
+      images: '$image'
     }
   });
 
-  const queryData = await Xmls.aggregate(stages);
-  if (queryData.length) {
-    const { counts, images } = queryData.pop();
+  const consolidateResponse = async () => {
+    return await Xmls.aggregate([
+      {
+        $facet: {
+          withContent: stages,
+          withoutContent: imageWithoutContentQuery(selectImageWithContentQuery, selectedImg)
+        }
+      },
+      {
+        $project: {
+          concantenatedQueryResponse: {
+            $concatArrays: ['$withContent', '$withoutContent']
+          }
+        }
+      },
+      {
+        $project: {
+          counts: { $size: '$concantenatedQueryResponse' },
+          images: {
+            $slice: ['$concantenatedQueryResponse', skip, limit]
+          }
+        }
+      }
+    ]);
+  };
+
+  const queryResponse = await consolidateResponse();
+
+  if (queryResponse.length) {
+    const { counts, images } = queryResponse.pop();
     return { counts, images };
   }
   return { counts: 0, images: [] };
@@ -199,6 +379,16 @@ exports.validateImageSearchOptions = (input) => {
     return searchQuery;
   }
 
+  // Search by Sample ID
+  if (search === 'filterByID') {
+    searchQuery.push({
+      $match: {
+        title: { $regex: searchValue, $options: 'gi' }
+      }
+    });
+    return searchQuery;
+  }
+
   // Search by sentence
   // const wholeSentence = new RegExp(searchValue, 'gi');
   searchQuery.push({
@@ -216,3 +406,40 @@ exports.validateImageSearchOptions = (input) => {
   });
   return searchQuery;
 };
+
+exports.validateImageSearchOptionsForUndefinedContent = (input) => {
+  const searchQuery = [];
+  const search = input?.search;
+  const searchValue = input?.searchValue || '';
+
+  if (!searchValue) throw errorFormater('Search value cannot be empty', 422);
+
+  // If "search" term exist else set searchQuery to empty array so its pipeline query can be ignored at runtime
+  if (search) {
+    // Search by Sample ID
+    switch (search) {
+      case 'filterByID':
+      case 'Keyword':
+        searchQuery.push({
+          $match: {
+            xml_str: {
+              $regex: `<ID>${searchValue}`,
+              $options: 'gi'
+            }
+          }
+        });
+        break;
+      default:
+        searchQuery.push({
+          $match: {
+            xml_str: {
+              $eq: undefined
+            }
+          }
+        });
+    }
+    return searchQuery;
+  }
+
+  return searchQuery;
+};