Skip to content

Commit

Permalink
feat(KNO-4367): abstract validateExtractedFilePath
Browse files Browse the repository at this point in the history
  • Loading branch information
francoborr committed Oct 4, 2023
1 parent 77caaa0 commit ce50327
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 68 deletions.
39 changes: 10 additions & 29 deletions src/lib/marshal/email-layout/reader.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import path from "node:path";

import * as fs from "fs-extra";
import { set } from "lodash";

import { JsonDataError } from "@/lib/helpers/error";
import { ParseJsonResult, readJson } from "@/lib/helpers/json";
import { AnyObj, mapValuesDeep, ObjPath, omitDeep } from "@/lib/helpers/object";
import {
checkIfValidExtractedFilePathFormat,
FILEPATH_MARKED_RE,
readExtractedFileSync,
validateExtractedFilePath,
} from "@/lib/marshal/shared/helpers";
import { EmailLayoutDirContext } from "@/lib/run-context";

Expand Down Expand Up @@ -63,6 +61,11 @@ const joinExtractedFiles = async (
): Promise<JoinExtractedFilesResult> => {
const errors: JsonDataError[] = [];

// Tracks each new valid extracted file path seen (rebased to be relative to
// layout.json) in the layout json node. Mutated in place, and used
// to validate the uniqueness of an extracted path encountered.
const uniqueFilePaths = {};

mapValuesDeep(layoutJson, (relpath: string, key: string, parts) => {
// If not marked with the @suffix, there's nothing to do.
if (!FILEPATH_MARKED_RE.test(key)) return;
Expand All @@ -73,7 +76,10 @@ const joinExtractedFiles = async (
// Check if the extracted path found at the current field path is valid
const invalidFilePathError = validateExtractedFilePath(
relpath,
layoutDirCtx,
layoutDirCtx.abspath,
LAYOUT_JSON,
uniqueFilePaths,
objPathToFieldStr,
);
if (invalidFilePathError) {
errors.push(invalidFilePathError);
Expand Down Expand Up @@ -110,28 +116,3 @@ const joinExtractedFiles = async (

return [layoutJson, errors];
};

/*
* Validate the extracted file path based on its format and uniqueness (but not
* the presence).
*/
const validateExtractedFilePath = (
val: unknown,
emailLayoutDirCtx: EmailLayoutDirContext,
): JsonDataError | undefined => {
const layoutJsonPath = path.resolve(emailLayoutDirCtx.abspath, LAYOUT_JSON);
// Validate the file path format, and that it is unique per layout.
if (
!checkIfValidExtractedFilePathFormat(val, layoutJsonPath) ||
typeof val !== "string"
) {
const error = new JsonDataError(
"must be a relative path string to a unique file within the directory",
String(val),
);

return error;
}

return undefined;
};
4 changes: 2 additions & 2 deletions src/lib/marshal/email-layout/writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ type CompiledExtractionSettings = Map<ObjKeyOrArrayIdx[], ExtractionSettings>;

/* Traverse a given email layout data and compile extraction settings of every extractable
* field into a sorted map.
*
* NOTE: Currently we do NOT support content extraction at nested levels for email layouts.
*
* NOTE: Currently we do NOT support content extraction at nested levels for email layouts.
*/
const compileExtractionSettings = (
emailLayout: EmailLayoutData<WithAnnotation>,
Expand Down
37 changes: 37 additions & 0 deletions src/lib/marshal/shared/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,43 @@ export const readExtractedFileSync = (
return [content!, undefined];
};

/*
* Validate the extracted file path based on its format and uniqueness (but not
* the presence).
*
* Note, the uniqueness check is based on reading from and writing to
* uniqueFilePaths, which is MUTATED in place.
*/

/* eslint-disable max-params */
export const validateExtractedFilePath = (
val: unknown,
sourceFileAbspath: string,
sourceJson: string,
uniqueFilePaths: Record<string, boolean>,
objPathToFieldStr: string,
): JsonDataError | undefined => {
const jsonPath = path.resolve(sourceFileAbspath, sourceJson);
// Validate the file path format, and that it is unique per entity.
if (
!checkIfValidExtractedFilePathFormat(val, jsonPath) ||
typeof val !== "string" ||
val in uniqueFilePaths
) {
const error = new JsonDataError(
"must be a relative path string to a unique file within the directory",
objPathToFieldStr,
);

return error;
}

// Keep track of all the valid extracted file paths that have been seen, so
// we can validate each file path's uniqueness as we traverse.
uniqueFilePaths[val] = true;
return undefined;
};

/*
* Validate the file path format of an extracted field. The file path must be:
*
Expand Down
40 changes: 3 additions & 37 deletions src/lib/marshal/workflow/reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import {
omitDeep,
} from "@/lib/helpers/object";
import {
checkIfValidExtractedFilePathFormat,
FILEPATH_MARKED_RE,
readExtractedFileSync,
validateExtractedFilePath,
} from "@/lib/marshal/shared/helpers";
import { WorkflowDirContext } from "@/lib/run-context";

Expand All @@ -36,41 +36,6 @@ export type WorkflowDirData = WorkflowDirContext & {
// (e.g. workflow.json, then visual_blocks.json)
const MAX_EXTRACTION_LEVEL = 2;

/*
* Validate the extracted file path based on its format and uniqueness (but not
* the presence).
*
* Note, the uniqueness check is based on reading from and writing to
* uniqueFilePaths, which is MUTATED in place.
*/
const validateExtractedFilePath = (
val: unknown,
workflowDirCtx: WorkflowDirContext,
uniqueFilePaths: Record<string, boolean>,
objPathToFieldStr: string,
): JsonDataError | undefined => {
const workflowJsonPath = path.resolve(workflowDirCtx.abspath, WORKFLOW_JSON);

// Validate the file path format, and that it is unique per workflow.
if (
!checkIfValidExtractedFilePathFormat(val, workflowJsonPath) ||
typeof val !== "string" ||
val in uniqueFilePaths
) {
const error = new JsonDataError(
"must be a relative path string to a unique file within the directory",
objPathToFieldStr,
);

return error;
}

// Keep track of all the valid extracted file paths that have been seen, so
// we can validate each file path's uniqueness as we traverse.
uniqueFilePaths[val] = true;
return undefined;
};

/*
* Given a workflow json object, compiles all referenced extracted files from it
* and returns the updated object with the extracted content joined and inlined.
Expand Down Expand Up @@ -148,7 +113,8 @@ const joinExtractedFiles = async (

const invalidFilePathError = validateExtractedFilePath(
rebasedFilePath,
workflowDirCtx,
workflowDirCtx.abspath,
WORKFLOW_JSON,
uniqueFilePaths,
objPathToFieldStr,
);
Expand Down

0 comments on commit ce50327

Please sign in to comment.