diff --git a/schematic/manifest/generator.py b/schematic/manifest/generator.py
index 69b86e136..981e6d8e3 100644
--- a/schematic/manifest/generator.py
+++ b/schematic/manifest/generator.py
@@ -1904,6 +1904,8 @@ def get_manifest(
         # TODO: avoid explicitly exposing Synapse store functionality
         # just instantiate a Store class and let it decide at runtime/config
         # the store type
+        # TODO: determine which parts of fileview are necessary for `get` operations
+        # and pass query parameters at object instantiation to avoid having to re-query
         if access_token:
             # for getting an existing manifest on AWS
             store = SynapseStorage(access_token=access_token)
diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py
index 9b13bebaf..56c9a3443 100644
--- a/schematic/models/validate_attribute.py
+++ b/schematic/models/validate_attribute.py
@@ -2119,7 +2119,9 @@ def filename_validation(
 
         where_clauses = []
 
-        dataset_clause = f"parentId='{dataset_scope}'"
+        dataset_clause = SynapseStorage.build_clause_from_dataset_id(
+            dataset_id=dataset_scope
+        )
         where_clauses.append(dataset_clause)
 
         self._login(
diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py
index 14c260cb4..e0e8017bd 100644
--- a/schematic/store/synapse.py
+++ b/schematic/store/synapse.py
@@ -19,12 +19,10 @@
 import numpy as np
 import pandas as pd
 import synapseclient
-import synapseutils
 from opentelemetry import trace
 from synapseclient import Annotations as OldAnnotations
 from synapseclient import (
     Column,
-    Entity,
     EntityViewSchema,
     EntityViewType,
     File,
@@ -416,6 +414,30 @@ def query_fileview(
                 else:
                     raise AccessCredentialsError(self.storageFileview)
 
+    @staticmethod
+    def build_clause_from_dataset_id(
+        dataset_id: Optional[str] = None, dataset_folder_list: Optional[list] = None
+    ) -> str:
+        """
+        Method to build a where clause for a Synapse FileView query based on a dataset ID that can be used before an object is initialized.
+        Args:
+            dataset_id: Synapse ID of a dataset that should be used to limit the query
+            dataset_folder_list: List of Synapse IDs of a dataset and all its subfolders that should be used to limit the query
+        Returns:
+            clause for the query or an empty string if no dataset ID is provided
+        """
+        # Calling this method without specifying synIDs will complete but will not scope the view
+        if (not dataset_id) and (not dataset_folder_list):
+            return ""
+
+        # This will be used to gather files under a dataset recursively with a fileview query instead of walking
+        if dataset_folder_list:
+            search_folders = ", ".join(f"'{synId}'" for synId in dataset_folder_list)
+            return f"parentId IN ({search_folders})"
+
+        # `dataset_id` should be provided when all files are stored directly under the dataset folder
+        return f"parentId='{dataset_id}'"
+
     def _build_query(
         self, columns: Optional[list] = None, where_clauses: Optional[list] = None
     ):
@@ -666,7 +688,7 @@ def getStorageDatasetsInProject(self, projectId: str) -> list[tuple[str, str]]:
     def getFilesInStorageDataset(
         self, datasetId: str, fileNames: List = None, fullpath: bool = True
     ) -> List[Tuple[str, str]]:
-        """Gets all files in a given dataset folder.
+        """Gets all files (excluding manifest files) in a given dataset folder.
 
         Args:
             datasetId: synapse ID of a storage dataset.
@@ -680,105 +702,58 @@ def getFilesInStorageDataset(
         Raises:
             ValueError: Dataset ID not found.
         """
-        # select all files within a given storage dataset folder (top level folder in
-        # a Synapse storage project or folder marked with contentType = 'dataset')
-        walked_path = synapseutils.walk(
-            self.syn, datasetId, includeTypes=["folder", "file"]
-        )
-
-        current_entity_location = self.synapse_entity_tracker.get(
-            synapse_id=datasetId, syn=self.syn, download_file=False
-        )
-
-        def walk_back_to_project(
-            current_location: Entity, location_prefix: str, skip_entry: bool
-        ) -> str:
-            """
-            Recursively walk back up the project structure to get the paths of the
-            names of each of the directories where we started the walk function.
-
-            Args:
-                current_location (Entity): The current entity location in the project structure.
-                location_prefix (str): The prefix to prepend to the path.
-                skip_entry (bool): Whether to skip the current entry in the path. When
-                    this is True it means we are looking at our starting point. If our
-                    starting point is the project itself we can go ahead and return
-                    back the project as the prefix.
-
-            Returns:
-                str: The path of the names of each of the directories up to the project root.
-            """
-            if (
-                skip_entry
-                and "concreteType" in current_location
-                and current_location["concreteType"] == PROJECT_ENTITY
-            ):
-                return f"{current_location.name}/{location_prefix}"
+        file_list = []
 
-            updated_prefix = (
-                location_prefix
-                if skip_entry
-                else f"{current_location.name}/{location_prefix}"
-            )
-            if (
-                "concreteType" in current_location
-                and current_location["concreteType"] == PROJECT_ENTITY
-            ):
-                return updated_prefix
-            current_location = self.synapse_entity_tracker.get(
-                synapse_id=current_location["parentId"],
-                syn=self.syn,
-                download_file=False,
+        # Get path to dataset folder by using childern to avoid cases where the dataset is the scope of the view
+        if self.storageFileviewTable.empty:
+            raise ValueError(
+                f"Fileview {self.storageFileview} is empty, please check the table and the provided synID and try again."
             )
-            return walk_back_to_project(
-                current_location=current_location,
-                location_prefix=updated_prefix,
-                skip_entry=False,
+
+        child_path = self.storageFileviewTable.loc[
+            self.storageFileviewTable["parentId"] == datasetId, "path"
+        ]
+        if child_path.empty:
+            raise LookupError(
+                f"Dataset {datasetId} could not be found in fileview {self.storageFileview}."
             )
+        child_path = child_path.iloc[0]
 
-        prefix = walk_back_to_project(
-            current_location=current_entity_location,
-            location_prefix="",
-            skip_entry=True,
-        )
+        # Get the dataset path by eliminating the child's portion of the path to account for nested datasets
+        parent = child_path.split("/")[:-1]
+        parent = "/".join(parent)
 
-        project_id = self.getDatasetProject(datasetId)
-        project = self.synapse_entity_tracker.get(
-            synapse_id=project_id, syn=self.syn, download_file=False
-        )
-        project_name = project.name
-        file_list = []
+        # Format dataset path to be used in table query
+        dataset_path = f"'{parent}/%'"
 
-        # iterate over all results
-        for dirpath, _, path_filenames in walked_path:
-            # iterate over all files in a folder
-            for path_filename in path_filenames:
-                if ("manifest" not in path_filename[0] and not fileNames) or (
-                    fileNames and path_filename[0] in fileNames
-                ):
-                    # don't add manifest to list of files unless it is specified in the
-                    # list of specified fileNames; return all found files
-                    # except the manifest if no fileNames have been specified
-                    # TODO: refactor for clarity/maintainability
-
-                    if fullpath:
-                        # append directory path to filename
-                        if dirpath[0].startswith(f"{project_name}/"):
-                            path_without_project_prefix = (
-                                dirpath[0] + "/"
-                            ).removeprefix(f"{project_name}/")
-                            path_filename = (
-                                prefix + path_without_project_prefix + path_filename[0],
-                                path_filename[1],
-                            )
-                        else:
-                            path_filename = (
-                                prefix + dirpath[0] + "/" + path_filename[0],
-                                path_filename[1],
-                            )
+        # When querying, only include files to exclude entity files and subdirectories
+        where_clauses = [f"path like {dataset_path}", "type='file'"]
+
+        # Requery the fileview to specifically get the files in the given dataset
+        self.query_fileview(columns=["id", "path"], where_clauses=where_clauses)
+
+        # Exclude manifest files
+        non_manifest_files = self.storageFileviewTable.loc[
+            ~self.storageFileviewTable["path"].str.contains("synapse_storage_manifest"),
+            :,
+        ]
+
+        # Remove all files that are not in the list of fileNames
+        if fileNames:
+            filename_regex = "|".join(fileNames)
+
+            matching_files = non_manifest_files["path"].str.contains(
+                filename_regex, case=False, regex=True
+            )
+
+            non_manifest_files = non_manifest_files.loc[matching_files, :]
+
+        # Truncate path if necessary
+        if not fullpath:
+            non_manifest_files.path = non_manifest_files.path.apply(os.path.basename)
 
-                    # add file name file id tuple, rearranged so that id is first and name follows
-                    file_list.append(path_filename[::-1])
+        # Return list of files as expected by other methods
+        file_list = list(non_manifest_files.itertuples(index=False, name=None))
 
         return file_list
 
diff --git a/schematic/utils/cli_utils.py b/schematic/utils/cli_utils.py
index 07c97af90..c6412d349 100644
--- a/schematic/utils/cli_utils.py
+++ b/schematic/utils/cli_utils.py
@@ -4,10 +4,11 @@
 # pylint: disable=anomalous-backslash-in-string
 
 import logging
-
-from typing import Any, Mapping, Sequence, Union, Optional
-from functools import reduce
 import re
+from functools import reduce
+from typing import Any, Mapping, Optional, Sequence, Union
+
+from schematic.utils.general import SYN_ID_REGEX
 
 logger = logging.getLogger(__name__)
 
@@ -69,7 +70,7 @@ def parse_syn_ids(
     if not syn_ids:
         return None
 
-    project_regex = re.compile("(syn\d+\,?)+")
+    project_regex = re.compile(SYN_ID_REGEX)
     valid = project_regex.fullmatch(syn_ids)
 
     if not valid:
diff --git a/schematic/utils/general.py b/schematic/utils/general.py
index 997a193bf..cdc1be2f2 100644
--- a/schematic/utils/general.py
+++ b/schematic/utils/general.py
@@ -24,6 +24,8 @@
 
 T = TypeVar("T")
 
+SYN_ID_REGEX = r"(syn\d+\,?)+"
+
 
 def find_duplicates(_list: list[T]) -> set[T]:
     """Find duplicate items in a list"""
diff --git a/schematic_api/api/openapi/api.yaml b/schematic_api/api/openapi/api.yaml
index 8689a8ae2..7c32e8d90 100644
--- a/schematic_api/api/openapi/api.yaml
+++ b/schematic_api/api/openapi/api.yaml
@@ -692,8 +692,8 @@ paths:
         - Synapse Storage
   /storage/dataset/files:
     get:
-      summary: Get all files in a given dataset folder
-      description: Get all files in a given dataset folder
+      summary: Get all files (excluding manifest files) in a given dataset folder
+      description: Get all files (excluding manifest files) in a given dataset folder
       operationId: schematic_api.api.routes.get_files_storage_dataset
       security:
         - access_token: []
diff --git a/tests/integration/test_commands.py b/tests/integration/test_commands.py
index 8658f6e6b..4367d817b 100644
--- a/tests/integration/test_commands.py
+++ b/tests/integration/test_commands.py
@@ -4,14 +4,14 @@
 import uuid
 from io import BytesIO
 
+import numpy as np
+import pandas as pd
 import pytest
 import requests
-from openpyxl import load_workbook
 from click.testing import CliRunner
-import pandas as pd
-import numpy as np
+from openpyxl import load_workbook
 
-from schematic.configuration.configuration import Configuration, CONFIG
+from schematic.configuration.configuration import CONFIG, Configuration
 from schematic.manifest.commands import manifest
 from schematic.models.commands import model
 from tests.conftest import ConfigurationForTesting
@@ -95,14 +95,14 @@ def test_validate_valid_manifest(self, runner: CliRunner) -> None:
         # command has no (python) errors, has exit code 0
         assert result.exit_code == 0
         # command output has success message
-        assert result.output.split("\n")[4] == (
+        result_list = result.output.split("\n")
+        assert (
             "Your manifest has been validated successfully. "
             "There are no errors in your manifest, "
             "and it can be submitted without any modifications."
-        )
+        ) in result_list
         # command output has no validation errors
-        for line in result.output.split("\n")[4]:
-            assert not line.startswith("error")
+        errors = [errors for result in result_list if result.startswith("error")]
 
     def test_validate_invalid_manifest(self, runner: CliRunner) -> None:
         """
@@ -504,9 +504,10 @@ def test_generate_empty_excel_manifest(
                 os.remove("tests/data/example.Biospecimen.schema.json")
 
         # command output has excel file creation message
+        result_list = result.output.split("\n")
         assert (
-            result.output.split("\n")[7]
-            == "Find the manifest template using this Excel file path: ./CLI_empty_excel.xlsx"
+            "Find the manifest template using this Excel file path: ./CLI_empty_excel.xlsx"
+            in result_list
         )
 
         sheet1 = workbook["Sheet1"]
@@ -665,18 +666,19 @@ def test_generate_bulk_rna_google_sheet_manifest(
             # Reset config to it's default values
             CONFIG.load_config("config_example.yml")
 
-        assert result.output.split("\n")[7] == (
-            "Find the manifest template using this Google Sheet URL:"
-        )
-        assert result.output.split("\n")[8].startswith(
-            "https://docs.google.com/spreadsheets/d/"
-        )
-        assert result.output.split("\n")[9] == (
+        result_list = result.output.split("\n")
+        assert "Find the manifest template using this Google Sheet URL:" in result_list
+        assert (
             "Find the manifest template using this CSV file path: "
             "./CLI_gs_bulk_rna.csv"
-        )
-
-        google_sheet_url = result.output.split("\n")[8]
+        ) in result_list
+        google_sheet_result = [
+            result
+            for result in result_list
+            if result.startswith("https://docs.google.com/spreadsheets/d/")
+        ]
+        assert len(google_sheet_result) == 1
+        google_sheet_url = google_sheet_result[0]
 
         # Download the Google Sheets content as an Excel file and load into openpyxl
         export_url = f"{google_sheet_url}/export?format=xlsx"
@@ -908,18 +910,19 @@ def test_generate_bulk_rna_google_sheet_manifest_with_annotations(
                 os.remove("tests/data/example.BulkRNA-seqAssay.schema.json")
             os.remove("./CLI_gs_bulk_rna_annos.csv")
 
-        assert result.output.split("\n")[10] == (
-            "Find the manifest template using this Google Sheet URL:"
-        )
-        assert result.output.split("\n")[11].startswith(
-            "https://docs.google.com/spreadsheets/d/"
-        )
-        assert result.output.split("\n")[12] == (
+        result_list = result.output.split("\n")
+        assert "Find the manifest template using this Google Sheet URL:" in result_list
+        assert (
             "Find the manifest template using this CSV file path: "
             "./CLI_gs_bulk_rna_annos.csv"
-        )
-
-        google_sheet_url = result.output.split("\n")[11]
+        ) in result_list
+        google_sheet_result = [
+            result
+            for result in result_list
+            if result.startswith("https://docs.google.com/spreadsheets/d/")
+        ]
+        assert len(google_sheet_result) == 1
+        google_sheet_url = google_sheet_result[0]
 
         # Download the Google Sheets content as an Excel file and load into openpyxl
         export_url = f"{google_sheet_url}/export?format=xlsx"
@@ -1177,10 +1180,11 @@ def test_generate_mock_component_excel_manifest(self, runner: CliRunner) -> None
             # TODO: remove with https://sagebionetworks.jira.com/browse/SCHEMATIC-202
             # Reset config to it's default values
             CONFIG.load_config("config_example.yml")
-
         # Command output has excel file message
-        assert result.output.split("\n")[8] == (
+        result_list = result.output.split("\n")
+        assert (
             "Find the manifest template using this Excel file path: ./CLI_mock_comp.xlsx"
+            in result_list
         )
 
         sheet1 = workbook["Sheet1"]
diff --git a/tests/integration/test_metadata_model.py b/tests/integration/test_metadata_model.py
index 2178a83b8..2f604ed5d 100644
--- a/tests/integration/test_metadata_model.py
+++ b/tests/integration/test_metadata_model.py
@@ -139,7 +139,6 @@ async def test_submit_filebased_manifest_file_and_entities_valid_manifest_submit
             dir=create_temp_folder(path=tempfile.gettempdir()),
         ) as tmp_file:
             df.to_csv(tmp_file.name, index=False)
-
             # WHEN the manifest is submitted (Assertions are handled in the helper method)
             self._submit_and_verify_manifest(
                 helpers=helpers,
@@ -229,7 +228,6 @@ async def test_submit_filebased_manifest_file_and_entities_mock_filename(
             dir=create_temp_folder(path=tempfile.gettempdir()),
         ) as tmp_file:
             df.to_csv(tmp_file.name, index=False)
-
             # WHEN the manifest is submitted (Assertions are handled in the helper method)
             self._submit_and_verify_manifest(
                 helpers=helpers,
@@ -450,6 +448,11 @@ def _submit_and_verify_manifest(
             raise ValueError(
                 "expected_manifest_id or expected_manifest_name must be provided"
             )
+        # HACK: must requery the fileview to get new files, since SynapseStorage will query the last state
+        # of the fileview which may not contain any new folders in the fileview.
+        # This is a workaround to fileviews not always containing the latest information
+        # Since the tests don't always follow a similar process as testing resources are created and destroyed
+        synapse_store.query_fileview(force_requery=True)
 
         # Spies
         if already_spied:
diff --git a/tests/integration/test_store_synapse.py b/tests/integration/test_store_synapse.py
index 085a48fc3..5c39acc3c 100644
--- a/tests/integration/test_store_synapse.py
+++ b/tests/integration/test_store_synapse.py
@@ -1,10 +1,17 @@
-from unittest.mock import MagicMock
+from re import compile as re_compile
+from unittest.mock import MagicMock, create_autospec, patch
 
 import numpy as np
+import pandas as pd
 import pytest
+from synapseclient import Synapse
+from synapseclient.core.cache import Cache
+from synapseclient.table import build_table
 
+from schematic.configuration.configuration import Configuration
 from schematic.schemas.data_model_graph import DataModelGraphExplorer
 from schematic.store.synapse import SynapseStorage
+from schematic.utils.general import SYN_ID_REGEX
 from schematic.utils.validate_utils import comma_separated_list_regex
 
 
@@ -125,3 +132,327 @@ def test_process_row_annotations_get_validation(
             "value2",
             "value3",
         ]
+
+    @pytest.mark.parametrize(
+        "asset_view, dataset_id, expected_files",
+        [
+            (
+                "syn23643253",
+                "syn61374924",
+                [
+                    ("syn61374926", "schematic - main/BulkRNASeq and files/txt1.txt"),
+                    ("syn61374930", "schematic - main/BulkRNASeq and files/txt2.txt"),
+                    ("syn62282794", "schematic - main/BulkRNASeq and files/txt3.txt"),
+                    ("syn62282720", "schematic - main/BulkRNASeq and files/txt4.txt"),
+                ],
+            ),
+            (
+                "syn23643253",
+                "syn25614635",
+                [
+                    (
+                        "syn25614636",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_A.txt",
+                    ),
+                    (
+                        "syn25614637",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_B.txt",
+                    ),
+                    (
+                        "syn25614638",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_C.txt",
+                    ),
+                ],
+            ),
+            (
+                "syn63917487",
+                "syn63917494",
+                [
+                    (
+                        "syn63917520",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt1.txt",
+                    ),
+                    (
+                        "syn63917521",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt2.txt",
+                    ),
+                    (
+                        "syn63917522",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt3.txt",
+                    ),
+                    (
+                        "syn63917518",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt4.txt",
+                    ),
+                ],
+            ),
+            (
+                "syn23643253",
+                "syn63927665",
+                [
+                    (
+                        "syn63927671",
+                        "schematic - main/BulkRNAseq nested files/data/txt1.txt",
+                    ),
+                    (
+                        "syn63927672",
+                        "schematic - main/BulkRNAseq nested files/data/txt2.txt",
+                    ),
+                    (
+                        "syn63927673",
+                        "schematic - main/BulkRNAseq nested files/data/txt3.txt",
+                    ),
+                    (
+                        "syn63927670",
+                        "schematic - main/BulkRNAseq nested files/data/txt4.txt",
+                    ),
+                ],
+            ),
+            (
+                "syn23643253",
+                "syn63987067",
+                [
+                    (
+                        "syn63987072",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt1.txt",
+                    ),
+                    (
+                        "syn63987073",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt2.txt",
+                    ),
+                    (
+                        "syn63987074",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt3.txt",
+                    ),
+                    (
+                        "syn63987071",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt4.txt",
+                    ),
+                ],
+            ),
+        ],
+        ids=[
+            "top level folder",
+            "nested dataset",
+            "contentType:dataset annotation",
+            "nested data files",
+            "doubly nested data files",
+        ],
+    )
+    @pytest.mark.parametrize(
+        "filenames",
+        [None, ["txt1.txt", "txt2.txt"]],
+        ids=["no file filtering", "file filtering"],
+    )
+    def test_get_files_in_storage_dataset(
+        self, filenames, asset_view, dataset_id, expected_files, synapse_store
+    ):
+        # GIVEN a SynapseStorage object with the appropriate asset view
+        synapse_store.storageFileview = asset_view
+        # WHEN getFilesInStorageDataset is called for the given dataset
+        dataset_files = synapse_store.getFilesInStorageDataset(
+            datasetId=dataset_id, fileNames=filenames
+        )
+        # AND the filenames are filtered as appropriate
+        if filenames:
+            files_to_remove = []
+            for f in expected_files:
+                retain = False
+                for name in filenames:
+                    if name in f[1]:
+                        retain = True
+                if not retain:
+                    files_to_remove.append(f)
+
+            for file in files_to_remove:
+                expected_files.remove(file)
+
+        # THEN the expected files are returned
+        # AND there are no unexpected files
+        assert dataset_files == expected_files
+        # AND the (synId, path) order is correct
+        synapse_id_regex = re_compile(SYN_ID_REGEX)
+        if dataset_files:
+            assert synapse_id_regex.fullmatch(dataset_files[0][0])
+
+    @pytest.mark.parametrize(
+        "asset_view, dataset_id, exception, exception_message",
+        [
+            (
+                "syn23643253",
+                "syn64319379",
+                LookupError,
+                "Dataset syn64319379 could not be found",
+            ),
+            ("syn64367119", "syn61374924", ValueError, "Fileview syn64367119 is empty"),
+        ],
+        ids=[
+            "empty dataset",
+            "empty view",
+        ],
+    )
+    def test_get_files_in_storage_dataset_exception(
+        self, asset_view, dataset_id, exception, exception_message, synapse_store
+    ):
+        # GIVEN a SynapseStorage object with the appropriate asset view
+        synapse_store.storageFileview = asset_view
+        # AND the correct and up-to-date fileview
+        synapse_store.query_fileview(force_requery=True)
+        # WHEN getFilesInStorageDataset is called
+        # THEN the appropriate exception should be raised, with the appropriate message
+        with pytest.raises(exception, match=exception_message):
+            synapse_store.getFilesInStorageDataset(datasetId=dataset_id, fileNames=None)
+
+    @pytest.mark.parametrize(
+        "asset_view, dataset_id, expected_files",
+        [
+            (
+                "syn23643253",
+                "syn61374924",
+                [
+                    ("syn61374926", "schematic - main/BulkRNASeq and files/txt1.txt"),
+                    ("syn61374930", "schematic - main/BulkRNASeq and files/txt2.txt"),
+                    ("syn62282794", "schematic - main/BulkRNASeq and files/txt3.txt"),
+                    ("syn62282720", "schematic - main/BulkRNASeq and files/txt4.txt"),
+                ],
+            ),
+            (
+                "syn23643253",
+                "syn25614635",
+                [
+                    (
+                        "syn25614636",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_A.txt",
+                    ),
+                    (
+                        "syn25614637",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_B.txt",
+                    ),
+                    (
+                        "syn25614638",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_C.txt",
+                    ),
+                ],
+            ),
+            (
+                "syn63917487",
+                "syn63917494",
+                [
+                    (
+                        "syn63917520",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt1.txt",
+                    ),
+                    (
+                        "syn63917521",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt2.txt",
+                    ),
+                    (
+                        "syn63917522",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt3.txt",
+                    ),
+                    (
+                        "syn63917518",
+                        "schematic - main/Test files and dataset annotations/Test BulkRNAseq w annotation/txt4.txt",
+                    ),
+                ],
+            ),
+            (
+                "syn23643253",
+                "syn63927665",
+                [
+                    (
+                        "syn63927671",
+                        "schematic - main/BulkRNAseq nested files/data/txt1.txt",
+                    ),
+                    (
+                        "syn63927672",
+                        "schematic - main/BulkRNAseq nested files/data/txt2.txt",
+                    ),
+                    (
+                        "syn63927673",
+                        "schematic - main/BulkRNAseq nested files/data/txt3.txt",
+                    ),
+                    (
+                        "syn63927670",
+                        "schematic - main/BulkRNAseq nested files/data/txt4.txt",
+                    ),
+                ],
+            ),
+            (
+                "syn23643253",
+                "syn63987067",
+                [
+                    (
+                        "syn63987072",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt1.txt",
+                    ),
+                    (
+                        "syn63987073",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt2.txt",
+                    ),
+                    (
+                        "syn63987074",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt3.txt",
+                    ),
+                    (
+                        "syn63987071",
+                        "schematic - main/BulkRNAseq and double nested files/dataset/folder 1/data/txt4.txt",
+                    ),
+                ],
+            ),
+        ],
+        ids=[
+            "top level folder",
+            "nested dataset",
+            "contentType:dataset annotation",
+            "nested data files",
+            "doubly nested data files",
+        ],
+    )
+    @pytest.mark.parametrize(
+        "filenames",
+        [None, ["txt1.txt", "txt2.txt"]],
+        ids=["no file filtering", "file filtering"],
+    )
+    def test_mock_get_files_in_storage_dataset(
+        self,
+        synapse_store,
+        filenames,
+        asset_view,
+        dataset_id,
+        expected_files,
+    ):
+        # GIVEN a test configuration
+        TEST_CONFIG = Configuration()
+
+        with patch(
+            "schematic.store.synapse.CONFIG", return_value=TEST_CONFIG
+        ) as mock_config:
+            # AND the appropriate asset view
+            mock_config.synapse_master_fileview_id = asset_view
+            # AND the appropriately filtered filenames
+            if filenames:
+                files_to_remove = []
+                for f in expected_files:
+                    retain = False
+                    for name in filenames:
+                        if name in f[1]:
+                            retain = True
+                    if not retain:
+                        files_to_remove.append(f)
+
+                for file in files_to_remove:
+                    expected_files.remove(file)
+
+            # WHEN getFilesInStorageDataset is called for the given dataset
+            dataset_files = synapse_store.getFilesInStorageDataset(
+                datasetId=dataset_id, fileNames=filenames
+            )
+
+            # THEN the expected files are returned
+            # AND there are no unexpected files
+            assert dataset_files == expected_files
+            # AND the (synId, path) order is correct
+            synapse_id_regex = re_compile(SYN_ID_REGEX)
+            if dataset_files:
+                assert synapse_id_regex.fullmatch(dataset_files[0][0])
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
index ade80fbe9..2358019d9 100644
--- a/tests/test_manifest.py
+++ b/tests/test_manifest.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 import pytest
+from pandas.testing import assert_series_equal
 
 from schematic.configuration.configuration import Configuration
 from schematic.manifest.generator import ManifestGenerator
@@ -103,6 +104,9 @@ def mock_create_blank_google_sheet():
 
 @pytest.fixture(params=[True, False], ids=["sheet_url", "data_frame"])
 def manifest(dataset_id, manifest_generator, request):
+    """
+    Only seems to be used for TestManifestGenerator.test_get_manifest_first_time
+    """
     # Rename request param for readability
     sheet_url = request.param
 
@@ -763,13 +767,11 @@ def test_create_manifests(
             assert all_results == expected_result
 
     @pytest.mark.parametrize(
-        "component,datasetId,expected_file_based,expected_rows,expected_files",
+        "component, datasetId, expected_rows, expected_files, expected_annotations",
         [
-            ("Biospecimen", "syn61260107", False, 4, None),
             (
                 "BulkRNA-seqAssay",
                 "syn61374924",
-                True,
                 4,
                 pd.Series(
                     [
@@ -780,18 +782,50 @@ def test_create_manifests(
                     ],
                     name="Filename",
                 ),
+                {
+                    "File Format": pd.Series(
+                        ["BAM", "CRAM", "CSV/TSV", ""], name="File Format"
+                    ),
+                    "Genome Build": pd.Series(
+                        ["GRCh37", "GRCh38", "GRCm38", ""], name="Genome Build"
+                    ),
+                },
+            ),
+            (
+                "BulkRNA-seqAssay",
+                "syn25614635",
+                3,
+                pd.Series(
+                    [
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_A.txt",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_B.txt",
+                        "schematic - main/TestDatasets/TestDataset-Annotations-v3/Sample_C.txt",
+                    ],
+                    name="Filename",
+                ),
+                {
+                    "File Format": pd.Series(
+                        ["txt", "csv", "fastq"], name="File Format"
+                    ),
+                    "Year of Birth": pd.Series(["1980", "", ""], name="Year of Birth"),
+                },
             ),
         ],
-        ids=["Record based", "File based"],
+        ids=[
+            "top level folder",
+            "nested dataset",
+        ],
     )
+    @pytest.mark.parametrize("use_annotations", [True, False])
     def test_get_manifest_with_files(
         self,
         helpers,
         component,
         datasetId,
-        expected_file_based,
         expected_rows,
         expected_files,
+        expected_annotations,
+        use_annotations,
     ):
         """
         Test to ensure that
@@ -813,27 +847,37 @@ def test_get_manifest_with_files(
             path_to_data_model=path_to_data_model,
             graph=graph_data_model,
             root=component,
-            use_annotations=True,
+            use_annotations=use_annotations,
         )
 
-        # WHEN a manifest is generated for the appropriate dataset as a dataframe
+        # WHEN a filebased manifest is generated for the appropriate dataset as a dataframe
         manifest = generator.get_manifest(
             dataset_id=datasetId, output_format="dataframe"
         )
 
-        # AND it is determined if the manifest is filebased
-        is_file_based = "Filename" in manifest.columns
-
         # AND the number of rows are checked
         n_rows = manifest.shape[0]
 
         # THEN the manifest should have the expected number of rows
         assert n_rows == expected_rows
 
-        # AND the manifest should be filebased or not as expected
-        assert is_file_based == expected_file_based
+        # AND the manifest should have the columns expected of filebased metadata
+        assert "Filename" in manifest.columns
+        assert "entityId" in manifest.columns
+
+        # AND the manifest should have the expected files from the dataset
+        assert_series_equal(
+            manifest["Filename"],
+            expected_files,
+            check_dtype=False,
+        )
 
-        # AND if the manifest is file based
-        if expected_file_based:
-            # THEN the manifest should have the expected files
-            assert manifest["Filename"].equals(expected_files)
+        # AND if annotations are used to generate the manifest
+        if use_annotations:
+            # THEN the annotations in the generated manifest should match the expected annotations
+            for attribute, annotations in expected_annotations.items():
+                assert_series_equal(
+                    manifest[attribute],
+                    annotations,
+                    check_dtype=False,
+                )
diff --git a/tests/test_store.py b/tests/test_store.py
index c9f32bec2..516b7805d 100644
--- a/tests/test_store.py
+++ b/tests/test_store.py
@@ -11,7 +11,7 @@
 import uuid
 from contextlib import nullcontext as does_not_raise
 from typing import Any, Callable, Generator
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pandas as pd
 import pytest
@@ -22,6 +22,7 @@
 from synapseclient.entity import File, Project
 from synapseclient.models import Annotations
 from synapseclient.models import Folder as FolderModel
+from synapseclient.table import build_table
 
 from schematic.configuration.configuration import CONFIG, Configuration
 from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
@@ -463,46 +464,103 @@ def test_getDatasetProject(self, dataset_id, synapse_store):
             (
                 True,
                 [
-                    ("syn126", "schematic - main/parent_folder/test_file"),
+                    ("syn126", "syn_mock", "schematic - main/parent_folder/test_file"),
                     (
                         "syn125",
+                        "syn_mock",
                         "schematic - main/parent_folder/test_folder/test_file_2",
                     ),
                 ],
             ),
-            (False, [("syn126", "test_file"), ("syn125", "test_file_2")]),
+            (
+                False,
+                [
+                    ("syn126", "syn_mock", "test_file"),
+                    ("syn125", "syn_mock", "test_file_2"),
+                ],
+            ),
         ],
     )
     def test_getFilesInStorageDataset(self, synapse_store, full_path, expected):
-        mock_return = [
+        mock_table_dataframe_return = pd.DataFrame(
+            {
+                "id": ["syn126", "syn125"],
+                "parentId": ["syn_mock", "syn_mock"],
+                "path": [
+                    "schematic - main/parent_folder/test_file",
+                    "schematic - main/parent_folder/test_folder/test_file_2",
+                ],
+            }
+        )
+
+        with patch.object(
+            synapse_store, "storageFileviewTable", mock_table_dataframe_return
+        ), patch.object(synapse_store, "query_fileview") as mocked_query:
+            # query_fileview is the function called to get the fileview
+            mocked_query.return_value = mock_table_dataframe_return
+
+            file_list = synapse_store.getFilesInStorageDataset(
+                datasetId="syn_mock", fileNames=None, fullpath=full_path
+            )
+            assert file_list == expected
+
+    @pytest.mark.parametrize(
+        "mock_table_dataframe, raised_exception, exception_message",
+        [
             (
-                ("parent_folder", "syn123"),
-                [("test_folder", "syn124")],
-                [("test_file", "syn126")],
+                pd.DataFrame(
+                    {
+                        "id": ["child_syn_mock"],
+                        "path": ["schematic - main/parent_folder/child_entity"],
+                        "parentId": ["wrong_syn_mock"],
+                    }
+                ),
+                LookupError,
+                "Dataset syn_mock could not be found",
             ),
             (
-                (
-                    os.path.join("schematic - main", "parent_folder", "test_folder"),
-                    "syn124",
+                pd.DataFrame(
+                    {
+                        "id": [],
+                        "path": [],
+                        "parentId": [],
+                    }
                 ),
-                [],
-                [("test_file_2", "syn125")],
+                ValueError,
+                "Fileview mock_table_id is empty",
             ),
-        ]
-        with patch(
-            "synapseutils.walk_functions._help_walk", return_value=mock_return
-        ) as mock_walk_patch, patch(
-            "schematic.store.synapse.SynapseStorage.getDatasetProject",
-            return_value="syn23643250",
-        ) as mock_project_id_patch, patch(
-            "synapseclient.entity.Entity.__getattr__", return_value="schematic - main"
-        ) as mock_project_name_patch, patch.object(
-            synapse_store.syn, "get", return_value=Project(name="schematic - main")
-        ):
-            file_list = synapse_store.getFilesInStorageDataset(
-                datasetId="syn_mock", fileNames=None, fullpath=full_path
-            )
-        assert file_list == expected
+        ],
+        ids=["missing_dataset", "empty_fileview"],
+    )
+    @pytest.mark.parametrize(
+        "full_path,",
+        [
+            (True),
+            (False),
+        ],
+    )
+    def test_get_files_in_storage_dataset_exception(
+        self,
+        synapse_store,
+        mock_table_dataframe,
+        raised_exception,
+        exception_message,
+        full_path,
+    ):
+        with patch.object(
+            synapse_store, "storageFileviewTable", mock_table_dataframe
+        ), patch.object(
+            synapse_store, "storageFileview", "mock_table_id"
+        ) as mock_table_id, patch.object(
+            synapse_store, "query_fileview"
+        ) as mocked_query:
+            # query_fileview is the function called to get the fileview
+            mocked_query.return_value = mock_table_dataframe
+
+            with pytest.raises(raised_exception, match=exception_message):
+                synapse_store.getFilesInStorageDataset(
+                    datasetId="syn_mock", fileNames=None, fullpath=full_path
+                )
 
     @pytest.mark.parametrize("downloadFile", [True, False])
     def test_getDatasetManifest(self, synapse_store, downloadFile):
@@ -926,6 +984,26 @@ async def mock_success_coro() -> None:
             await synapse_store._process_store_annos(new_tasks)
             mock_store_async.assert_not_called()
 
+    @pytest.mark.parametrize(
+        "dataset_id, dataset_folder_list, expected_clause",
+        [
+            ("syn12345678", None, "parentId='syn12345678'"),
+            (
+                "syn63927665",
+                ["syn63927665", "syn63927667"],
+                "parentId IN ('syn63927665', 'syn63927667')",
+            ),
+            (None, None, ""),
+        ],
+    )
+    def test_build_clause_from_dataset_id(
+        self, dataset_id, dataset_folder_list, expected_clause
+    ):
+        dataset_clause = SynapseStorage.build_clause_from_dataset_id(
+            dataset_id=dataset_id, dataset_folder_list=dataset_folder_list
+        )
+        assert dataset_clause == expected_clause
+
 
 class TestDatasetFileView:
     def test_init(self, dataset_id, dataset_fileview, synapse_store):