From ce503278ddf47adc9ce3c6f72825009939604317 Mon Sep 17 00:00:00 2001 From: Franco Borrelli Date: Wed, 4 Oct 2023 16:19:07 -0300 Subject: [PATCH] feat(KNO-4367): abstract validateExtractedFilePath --- src/lib/marshal/email-layout/reader.ts | 39 +++++++------------------ src/lib/marshal/email-layout/writer.ts | 4 +-- src/lib/marshal/shared/helpers.ts | 37 ++++++++++++++++++++++++ src/lib/marshal/workflow/reader.ts | 40 ++------------------------ 4 files changed, 52 insertions(+), 68 deletions(-) diff --git a/src/lib/marshal/email-layout/reader.ts b/src/lib/marshal/email-layout/reader.ts index 16b77e2..54338cd 100644 --- a/src/lib/marshal/email-layout/reader.ts +++ b/src/lib/marshal/email-layout/reader.ts @@ -1,5 +1,3 @@ -import path from "node:path"; - import * as fs from "fs-extra"; import { set } from "lodash"; @@ -7,9 +5,9 @@ import { JsonDataError } from "@/lib/helpers/error"; import { ParseJsonResult, readJson } from "@/lib/helpers/json"; import { AnyObj, mapValuesDeep, ObjPath, omitDeep } from "@/lib/helpers/object"; import { - checkIfValidExtractedFilePathFormat, FILEPATH_MARKED_RE, readExtractedFileSync, + validateExtractedFilePath, } from "@/lib/marshal/shared/helpers"; import { EmailLayoutDirContext } from "@/lib/run-context"; @@ -63,6 +61,11 @@ const joinExtractedFiles = async ( ): Promise => { const errors: JsonDataError[] = []; + // Tracks each new valid extracted file path seen (rebased to be relative to + // layout.json) in the layout json node. Mutated in place, and used + // to validate the uniqueness of an extracted path encountered. + const uniqueFilePaths = {}; + mapValuesDeep(layoutJson, (relpath: string, key: string, parts) => { // If not marked with the @suffix, there's nothing to do. if (!FILEPATH_MARKED_RE.test(key)) return; @@ -73,7 +76,10 @@ const joinExtractedFiles = async ( // Check if the extracted path found at the current field path is valid const invalidFilePathError = validateExtractedFilePath( relpath, - layoutDirCtx, + layoutDirCtx.abspath, + LAYOUT_JSON, + uniqueFilePaths, + objPathToFieldStr, ); if (invalidFilePathError) { errors.push(invalidFilePathError); @@ -110,28 +116,3 @@ const joinExtractedFiles = async ( return [layoutJson, errors]; }; - -/* - * Validate the extracted file path based on its format and uniqueness (but not - * the presence). - */ -const validateExtractedFilePath = ( - val: unknown, - emailLayoutDirCtx: EmailLayoutDirContext, -): JsonDataError | undefined => { - const layoutJsonPath = path.resolve(emailLayoutDirCtx.abspath, LAYOUT_JSON); - // Validate the file path format, and that it is unique per layout. - if ( - !checkIfValidExtractedFilePathFormat(val, layoutJsonPath) || - typeof val !== "string" - ) { - const error = new JsonDataError( - "must be a relative path string to a unique file within the directory", - String(val), - ); - - return error; - } - - return undefined; -}; diff --git a/src/lib/marshal/email-layout/writer.ts b/src/lib/marshal/email-layout/writer.ts index 8f709ee..c77e042 100644 --- a/src/lib/marshal/email-layout/writer.ts +++ b/src/lib/marshal/email-layout/writer.ts @@ -24,8 +24,8 @@ type CompiledExtractionSettings = Map; /* Traverse a given email layout data and compile extraction settings of every extractable * field into a sorted map. - * - * NOTE: Currently we do NOT support content extraction at nested levels for email layouts. + * + * NOTE: Currently we do NOT support content extraction at nested levels for email layouts. */ const compileExtractionSettings = ( emailLayout: EmailLayoutData, diff --git a/src/lib/marshal/shared/helpers.ts b/src/lib/marshal/shared/helpers.ts index 55c16d5..909d3c5 100644 --- a/src/lib/marshal/shared/helpers.ts +++ b/src/lib/marshal/shared/helpers.ts @@ -79,6 +79,43 @@ export const readExtractedFileSync = ( return [content!, undefined]; }; +/* + * Validate the extracted file path based on its format and uniqueness (but not + * the presence). + * + * Note, the uniqueness check is based on reading from and writing to + * uniqueFilePaths, which is MUTATED in place. + */ + +/* eslint-disable max-params */ +export const validateExtractedFilePath = ( + val: unknown, + sourceFileAbspath: string, + sourceJson: string, + uniqueFilePaths: Record, + objPathToFieldStr: string, +): JsonDataError | undefined => { + const jsonPath = path.resolve(sourceFileAbspath, sourceJson); + // Validate the file path format, and that it is unique per entity. + if ( + !checkIfValidExtractedFilePathFormat(val, jsonPath) || + typeof val !== "string" || + val in uniqueFilePaths + ) { + const error = new JsonDataError( + "must be a relative path string to a unique file within the directory", + objPathToFieldStr, + ); + + return error; + } + + // Keep track of all the valid extracted file paths that have been seen, so + // we can validate each file path's uniqueness as we traverse. + uniqueFilePaths[val] = true; + return undefined; +}; + /* * Validate the file path format of an extracted field. The file path must be: * diff --git a/src/lib/marshal/workflow/reader.ts b/src/lib/marshal/workflow/reader.ts index 7ee553b..d0c0786 100644 --- a/src/lib/marshal/workflow/reader.ts +++ b/src/lib/marshal/workflow/reader.ts @@ -14,9 +14,9 @@ import { omitDeep, } from "@/lib/helpers/object"; import { - checkIfValidExtractedFilePathFormat, FILEPATH_MARKED_RE, readExtractedFileSync, + validateExtractedFilePath, } from "@/lib/marshal/shared/helpers"; import { WorkflowDirContext } from "@/lib/run-context"; @@ -36,41 +36,6 @@ export type WorkflowDirData = WorkflowDirContext & { // (e.g. workflow.json, then visual_blocks.json) const MAX_EXTRACTION_LEVEL = 2; -/* - * Validate the extracted file path based on its format and uniqueness (but not - * the presence). - * - * Note, the uniqueness check is based on reading from and writing to - * uniqueFilePaths, which is MUTATED in place. - */ -const validateExtractedFilePath = ( - val: unknown, - workflowDirCtx: WorkflowDirContext, - uniqueFilePaths: Record, - objPathToFieldStr: string, -): JsonDataError | undefined => { - const workflowJsonPath = path.resolve(workflowDirCtx.abspath, WORKFLOW_JSON); - - // Validate the file path format, and that it is unique per workflow. - if ( - !checkIfValidExtractedFilePathFormat(val, workflowJsonPath) || - typeof val !== "string" || - val in uniqueFilePaths - ) { - const error = new JsonDataError( - "must be a relative path string to a unique file within the directory", - objPathToFieldStr, - ); - - return error; - } - - // Keep track of all the valid extracted file paths that have been seen, so - // we can validate each file path's uniqueness as we traverse. - uniqueFilePaths[val] = true; - return undefined; -}; - /* * Given a workflow json object, compiles all referenced extracted files from it * and returns the updated object with the extracted content joined and inlined. @@ -148,7 +113,8 @@ const joinExtractedFiles = async ( const invalidFilePathError = validateExtractedFilePath( rebasedFilePath, - workflowDirCtx, + workflowDirCtx.abspath, + WORKFLOW_JSON, uniqueFilePaths, objPathToFieldStr, );