From d2177a940af1e8834dc6a9aba8ebd7db28469d49 Mon Sep 17 00:00:00 2001
From: chenv3 <43543446+chenv3@users.noreply.github.com>
Date: Wed, 8 Jan 2025 16:56:00 -0500
Subject: [PATCH] Add ability to start pipeline from cell ranger output, edit
 documentation for this new input, change pipeline rules to keep condition
 cell ranger input on the same line as previous input

---
 cell-seek                   | 20 +++++---
 docs/usage/run.md           | 93 ++++++++++++++++++++++++++-----------
 src/run.py                  | 70 ++++++++++++++++++++++++----
 workflow/Snakefile          |  2 +
 workflow/rules/atac.smk     | 10 ++--
 workflow/rules/cite.smk     | 20 ++------
 workflow/rules/gex.smk      | 18 +++----
 workflow/rules/multi.smk    | 21 ++++-----
 workflow/rules/multiome.smk | 16 ++-----
 workflow/rules/vdj.smk      |  4 +-
 10 files changed, 177 insertions(+), 97 deletions(-)

diff --git a/cell-seek b/cell-seek
index 278178c..5f39be8 100755
--- a/cell-seek
+++ b/cell-seek
@@ -324,17 +324,25 @@ def parsed_arguments(name, description):
 
         {3}{4}Description:{5}
           To run the cell-seek pipeline with your data raw data, please
-        provide a space seperated list of FastQ (globbing is supported) and an output
+        provide a space separated list of FastQ (globbing is supported) and an output
         directory to store results.
 
         {3}{4}Required arguments:{5}
           --input INPUT [INPUT ...]
-                                Input FastQ file(s) to process. The pipeline does NOT
-                                support single-end data. FastQ files for one or more
-                                samples can be provided. Multiple input FastQ files
-                                should be seperated by a space. Globbing for multiple
-                                file is also supported.
+                                Input FastQ file(s) or Cell Ranger output folders to
+                                process. The pipeline does NOT support single-end data. 
+                                FastQ files for one or more samples can be provided. 
+                                Multiple input FastQ files per sample can be provided. 
+                                Multiple input FastQ files should be separated by a 
+                                space.
+                                Cell Ranger output folders can be provided. It is 
+                                expected that the outs folder is contained within the 
+                                Cell Ranger output folders.
+                                Globbing for multiple files/folders is also supported.
+                                FastQ Input:
                                   Example: --input .tests/*.R?.fastq.gz
+                                Cell Ranger Input:
+                                  Example: --input .tests/*/
           --output OUTPUT
                                 Path to an output directory. This location is where
                                 the pipeline will create all of its output files, also
diff --git a/docs/usage/run.md b/docs/usage/run.md
index 602c4bc..a665ccc 100644
--- a/docs/usage/run.md
+++ b/docs/usage/run.md
@@ -39,12 +39,18 @@ The following is a breakdown of the required and optional arguments for each of
 Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.
 
   `--input INPUT [INPUT ...]`  
-> **Input FastQ file(s).**  
-> *type: file(s)*  
+> **Input FastQ file(s) or Cell Ranger folder(s).**  
+> *type: file(s) or folder(s)*  
 >
-> One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should seperated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
+> FastQ Input: One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should separated by a space. Multiple input FastQ files per sample can be provided. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
 >
 > ***Example:*** `--input .tests/*.R?.fastq.gz`
+>
+>
+> Cell Ranger Input: Cell Ranger output folders can be provided. It is expected that the outs folder is contained within the Cell Ranger output folders, and keep the normal output folder structure. Globbing is supported!
+>
+> ***Example:*** `--input .tests/*/
+
 
 ---  
   `--output OUTPUT`
@@ -219,12 +225,17 @@ Each of the following arguments are optional, and do not need to be provided.
 Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.
 
   `--input INPUT [INPUT ...]`  
-> **Input FastQ file(s).**  
-> *type: file(s)*  
+> **Input FastQ file(s) or Cell Ranger folder(s).**  
+> *type: file(s) or folder(s)*  
 >
-> One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should seperated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
+> FastQ Input: One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should separated by a space. Multiple input FastQ files per sample can be provided. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
 >
 > ***Example:*** `--input .tests/*.R?.fastq.gz`
+>
+>
+> Cell Ranger Input: Cell Ranger output folders can be provided. It is expected that the outs folder is contained within the Cell Ranger output folders, and keep the normal output folder structure. Globbing is supported!
+>
+> ***Example:*** `--input .tests/*/
 
 ---  
   `--output OUTPUT`
@@ -300,12 +311,17 @@ Each of the following arguments are required. Failure to provide a required argu
 Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.
 
   `--input INPUT [INPUT ...]`  
-> **Input FastQ file(s).**  
-> *type: file(s)*  
+> **Input FastQ file(s) or Cell Ranger folder(s).**  
+> *type: file(s) or folder(s)*  
 >
-> One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should seperated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
+> FastQ Input: One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should separated by a space. Multiple input FastQ files per sample can be provided. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
 >
 > ***Example:*** `--input .tests/*.R?.fastq.gz`
+>
+>
+> Cell Ranger Input: Cell Ranger output folders can be provided. It is expected that the outs folder is contained within the Cell Ranger output folders, and keep the normal output folder structure. Globbing is supported!
+>
+> ***Example:*** `--input .tests/*/
 
 ---  
   `--output OUTPUT`
@@ -347,7 +363,11 @@ Each of the following arguments are required. Failure to provide a required argu
 >
 > ***Example:*** `--cellranger 7.1.0`
 
----
+
+#### 2.3.2 Conditionally Required Arguments
+
+The following arguments are only required when FastQ files are used as input. They are not required when Cell Ranger output file is used as input.
+
 `--libraries LIBRARIES`
 > **Libraries file.**   
 > *type: file*
@@ -407,7 +427,7 @@ Each of the following arguments are required. Failure to provide a required argu
 >
 > ***Example:*** `--features features.csv`
 
-#### 2.3.2 Analysis Options
+#### 2.3.3 Analysis Options
 
 `--exclude-introns`
 > **Exclude introns from the count alignment.**   
@@ -458,13 +478,18 @@ There are multiple different combinations of library types that may result in th
 
 Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.
 
-  `--input INPUT [INPUT ...]`  
-> **Input FastQ file(s).**  
-> *type: file(s)*  
+`--input INPUT [INPUT ...]`  
+> **Input FastQ file(s) or Cell Ranger folder(s).**  
+> *type: file(s) or folder(s)*  
 >
-> One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should seperated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
+> FastQ Input: One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should separated by a space. Multiple input FastQ files per sample can be provided. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
 >
 > ***Example:*** `--input .tests/*.R?.fastq.gz`
+>
+>
+> Cell Ranger Input: Cell Ranger output folders can be provided. It is expected that the outs folder is contained within the Cell Ranger output folders, and keep the normal output folder structure. Globbing is supported!
+>
+> ***Example:*** `--input .tests/*/
 
 ---  
   `--output OUTPUT`
@@ -506,7 +531,10 @@ Each of the following arguments are required. Failure to provide a required argu
 >
 > ***Example:*** `--cellranger 7.1.0`
 
----
+#### 2.4.2 Conditionally Required Arguments
+
+The following arguments are only required when FastQ files are used as input. They are not required when Cell Ranger output file is used as input.
+
 `--libraries LIBRARIES`
 > **Libraries file.**   
 > *type: file*
@@ -535,7 +563,7 @@ Each of the following arguments are required. Failure to provide a required argu
 >
 > ***Example:*** `--libraries libraries.csv`
 
-#### 2.4.2 Analysis Options
+#### 2.4.3 Analysis Options
 
 Each of the following arguments are optional, and do not need to be provided.
 
@@ -682,12 +710,17 @@ Each of the following arguments are optional, and do not need to be provided.
 Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.
 
   `--input INPUT [INPUT ...]`  
-> **Input FastQ file(s).**  
-> *type: file(s)*  
+> **Input FastQ file(s) or Cell Ranger folder(s).**  
+> *type: file(s) or folder(s)*  
 >
-> One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should seperated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
+> FastQ Input: One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should separated by a space. Multiple input FastQ files per sample can be provided. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
 >
 > ***Example:*** `--input .tests/*.R?.fastq.gz`
+>
+>
+> Cell Ranger Input: Cell Ranger output folders can be provided. It is expected that the outs folder is contained within the Cell Ranger output folders, and keep the normal output folder structure. Globbing is supported!
+>
+> ***Example:*** `--input .tests/*/
 
 ---  
   `--output OUTPUT`
@@ -776,13 +809,18 @@ Each of the following arguments are required. Failure to provide a required argu
 
 Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.
 
-  `--input INPUT [INPUT ...]`  
-> **Input FastQ file(s).**  
-> *type: file(s)*  
+`--input INPUT [INPUT ...]`  
+> **Input FastQ file(s) or Cell Ranger folder(s).**  
+> *type: file(s) or folder(s)*  
 >
-> One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should seperated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
+> FastQ Input: One or more FastQ files can be provided. The pipeline does NOT support single-end data. From the command-line, each input file should separated by a space. Multiple input FastQ files per sample can be provided. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should always be gzipp-ed.
 >
 > ***Example:*** `--input .tests/*.R?.fastq.gz`
+>
+>
+> Cell Ranger Input: Cell Ranger output folders can be provided. It is expected that the outs folder is contained within the Cell Ranger output folders, and keep the normal output folder structure. Globbing is supported!
+>
+> ***Example:*** `--input .tests/*/
 
 ---  
   `--output OUTPUT`
@@ -816,7 +854,10 @@ Each of the following arguments are required. Failure to provide a required argu
 > ***Example:*** `--genome hg38`
 
 
----
+#### 2.6.2 Conditionally Required Arguments
+
+The following arguments are only required when FastQ files are used as input. They are not required when Cell Ranger output file is used as input.
+
 `--libraries LIBRARIES`
 > **Libraries file.**   
 > *type: file*
@@ -842,7 +883,7 @@ Each of the following arguments are required. Failure to provide a required argu
 > ***Example:*** `--libraries libraries.csv`
 
 
-#### 2.6.2 Analysis Options
+#### 2.6.3 Analysis Options
 
 The multiome pipeline currently does not have any applicable analysis flags.
 
diff --git a/src/run.py b/src/run.py
index f5e4eb1..f587092 100755
--- a/src/run.py
+++ b/src/run.py
@@ -79,7 +79,7 @@ def sym_safe(input_data, target, link):
     as input. If a symlink already exists, it will not try to create a new symlink.
     If relative source PATH is provided, it will be converted to an absolute PATH.
     It is currently forcing a link to be created for cellranger output folders, even if the provided
-    link parameter is False
+    link parameter is Fals
     @param input_data <list[<str>]>:
         List of input files to symlink to target location
     @param target <str>:
@@ -90,14 +90,26 @@ def sym_safe(input_data, target, link):
     input_fastqs = [] # store renamed fastq file names
     for file in input_data:
         if os.path.isdir(file): #Checking if provided file is a directory. If so, assumes it is a cellranger outs folder
-            filename = os.path.join(os.path.basename(os.path.dirname(file)), os.path.basename(file))
-            file = os.path.dirname(file)
-            link = True
+            if os.path.exists(os.path.join(file, 'outs')):
+                #filename = os.path.join(os.path.basename(os.path.dirname(file)), os.path.basename(file))
+                filename = os.path.basename(file)
+                link = True
+            else:
+                raise NameError("""\n\tFatal: Provided input '{}' does not match expected format!
+                Cannot determine if existing folder is a cellranger output folder. 
+                Please check the folder name and structure before trying again.
+                Here is example of expected cellranger output folder structure:
+                  input: sampleName     structure: sampleName/outs
+                """.format(file, sys.argv[0])
+                ) 
         else:
             filename = os.path.basename(file)
         try:
-            renamed = rename(filename)
-            renamed = os.path.join(target, renamed)
+            if not link:
+                renamed = rename(filename)
+                renamed = os.path.join(target, renamed)
+            else:
+                renamed = os.path.join(target, filename)
         except NameError as e:
             if not link:
                 # Don't care about creating the symlinks
@@ -107,11 +119,12 @@ def sym_safe(input_data, target, link):
                 raise e
 
         input_fastqs.append(renamed)
+        print(filename, file, renamed)
 
         if not exists(renamed) and link:
             # Create a symlink if it does not already exist
             # Follow source symlinks to resolve any binding issues
-            os.symlink(os.path.abspath(os.path.realpath(file)), renamed)
+            os.symlink(os.path.abspath(os.path.realpath(file)), renamed, target_is_directory=True)
 
     return input_fastqs
 
@@ -188,6 +201,9 @@ def setup(sub_args, ifiles, repo_path, output_path):
     # of FastQ and BAM files
     mixed_inputs(ifiles)
 
+    # Check if inputs are folders
+    folder_inputs(ifiles)
+
     # Resolves PATH to reference file
     # template or a user generated
     # reference genome built via build
@@ -412,6 +428,39 @@ def mixed_inputs(ifiles):
             """.format(" ".join(fq_files), " ".join(bam_files), sys.argv[0])
         )
 
+def folder_inputs(ifiles):
+    """Check if a user has provided directories as input. 
+    @params ifiles list[<str>]:
+        List containing pipeline input files (renamed symlinks)
+    """
+    folder_files, file_files = [], []
+    folders = False
+    files = False
+    for file in ifiles:
+        if os.path.isdir(file):
+            folders = True
+            folder_files.append(file)
+        else:
+            files = True
+            file_files.append(file)
+
+    if folders and files:
+        # User provided a mix of folders and files
+        raise TypeError("""\n\tFatal: Detected a mixture of --input data types.
+            A mixture of folders and files were provided; however, the pipeline
+            does NOT support processing a mixture of input FastQ files and 
+            cellranger outputs.
+            Input Folders:
+                {}
+            Input Files:
+                {}
+            Please do not run the pipeline with a mixture of files and folders.
+            This feature is currently not supported within '{}'. If you feel like 
+            this functionality should exist, feel free to open an issue on Github.
+            """.format(" ".join(folder_files), " ".join(file_files), sys.argv[0])
+        )
+    return(folders)
+
 def add_user_information(config):
     """Adds username and user's home directory to config.
     @params config <dict>:
@@ -823,18 +872,19 @@ def check_conditional_parameters(config):
         Config dictionary containing metadata to run pipeline
     """
     errorMessage = []
+    input_folders = folder_inputs(config['options']['input'])
     #Check if cellranger version is provided when required
     if config['options']['pipeline'] in ['gex', 'cite', 'multi'] and config['options']['cellranger'] == '':
         errorMessage += [
             "Error: Version of cellranger to use is required for {} pipeline\n \
             └── Please use the --cellranger flag to select one of the available versions: {}".format(
                 config['options']['pipeline'],
-                ', '.join(['7.1.0', '7.2.0', '8.0.0'])
+                ', '.join(['7.1.0', '7.2.0', '8.0.0', '9.0.0'])
             )
         ]
 
     #Check if libraries file is provided when required
-    if config['options']['pipeline'] in ['cite', 'multi', 'multiome'] and config['options']['libraries'] == 'None':
+    if config['options']['pipeline'] in ['cite', 'multi', 'multiome'] and config['options']['libraries'] == 'None' and not input_folders:
         errorMessage += [
             "Error: Libraries file is required for {} pipeline\n \
             └── Please use the --libraries flag to provide the CSV file with the columns: {}".format(
@@ -844,7 +894,7 @@ def check_conditional_parameters(config):
         ]
 
     #Check if features file is provided when required
-    if config['options']['pipeline'] in ['cite'] and config['options']['features'] == 'None':
+    if config['options']['pipeline'] in ['cite'] and config['options']['features'] == 'None' and not input_folders:
         errorMessage += [
             "Error: Features file is required for {} pipeline\n \
             └── Please use the --features flag to provide the CSV file with the columns: {}".format(
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 2c8f535..761d167 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -42,6 +42,8 @@ filter_file = config['options']['filter']	# Filter threshold file for QC analysi
 METADATA_FILE = config['options']['metadata']	# Metadata file for QC analysis (not used in all pipelines)
 if 'libraries' in config:
     lib_samples = list(config['libraries'].keys()) # Libraries file samples
+else:
+    lib_samples = samples # Handling the situation where cellranger outputs is used as input and no libraries file is provided
 pipeline_output = []
 
 
diff --git a/workflow/rules/atac.smk b/workflow/rules/atac.smk
index df171fb..c45ddd9 100644
--- a/workflow/rules/atac.smk
+++ b/workflow/rules/atac.smk
@@ -74,8 +74,7 @@ rule count:
                   --id {params.id} \\
                   --sample {params.sample} \\
                   --reference {params.reference} \\
-                  --fastqs {params.fastqs} \\
-                  {params.forcecells} \\
+                  --fastqs {params.fastqs} {params.forcecells} \\
               2>{log.err} 1>{log.log}
             fi
         else
@@ -83,8 +82,7 @@ rule count:
                 --id {params.id} \\
                 --sample {params.sample} \\
                 --reference {params.reference} \\
-                --fastqs {params.fastqs} \\
-                {params.forcecells} \\
+                --fastqs {params.fastqs} {params.forcecells} \\
             2>{log.err} 1>{log.log}
         fi
         """
@@ -115,5 +113,7 @@ rule sampleCleanup:
         cr_temp = join(workpath, "{sample}", "SC_ATAC_COUNTER_CS")
     shell:
         """
-        rm -r {params.cr_temp}
+        if [ -d '{params.cr_temp}' ]; then
+            rm -r {params.cr_temp}
+        fi
         """
diff --git a/workflow/rules/cite.smk b/workflow/rules/cite.smk
index 5f81daf..ee39443 100755
--- a/workflow/rules/cite.smk
+++ b/workflow/rules/cite.smk
@@ -1,11 +1,5 @@
 # Pipeline output definition
 
-# Single sample libraries files for cellranger count
-pipeline_output += expand(
-            join(workpath, "{sample}_libraries.csv"),
-            sample=lib_samples
-        )
-
 # CellRanger counts, summary report
 pipeline_output += expand(
             join(workpath, "{sample}", "outs", "web_summary.html"),
@@ -174,10 +168,7 @@ rule count:
                     --id={params.prefix} \\
                     --transcriptome={params.transcriptome} \\
                     --libraries={input.lib} \\
-                    --feature-ref={input.features} \\
-                    {params.introns} \\
-                    {params.createbam} \\
-                    {params.forcecells} \\
+                    --feature-ref={input.features} {params.introns} {params.createbam} {params.forcecells} \\
                 2>{log.err} 1>{log.log}
             fi
         else
@@ -185,10 +176,7 @@ rule count:
                 --id={params.prefix} \\
                 --transcriptome={params.transcriptome} \\
                 --libraries={input.lib} \\
-                --feature-ref={input.features} \\
-                {params.introns} \\
-                {params.createbam} \\
-                {params.forcecells} \\
+                --feature-ref={input.features} {params.introns} {params.createbam} {params.forcecells} \\
             2>{log.err} 1>{log.log}
         fi
         """
@@ -316,7 +304,9 @@ rule sampleCleanup:
         cr_temp = join(workpath, "{sample}", "SC_RNA_COUNTER_CS")
     shell:
         """
-        rm -r {params.cr_temp}
+        if [ -d '{params.cr_temp}' ]; then
+            rm -r {params.cr_temp}
+        fi
         """
 
 rule seuratQC:
diff --git a/workflow/rules/gex.smk b/workflow/rules/gex.smk
index b4f34bb..57055eb 100644
--- a/workflow/rules/gex.smk
+++ b/workflow/rules/gex.smk
@@ -194,10 +194,7 @@ rule count:
                     --id {params.id} \\
                     --sample {params.sample} \\
                     --transcriptome {params.transcriptome} \\
-                    --fastqs {params.fastqs} \\
-                    {params.excludeintrons} \\
-                    {params.createbam} \\
-                    {params.forcecells} \\
+                    --fastqs {params.fastqs} {params.excludeintrons} {params.createbam} {params.forcecells} \\
                 2>{log.err} 1>{log.log}
 	    fi
         else
@@ -205,10 +202,7 @@ rule count:
                 --id {params.id} \\
                 --sample {params.sample} \\
                 --transcriptome {params.transcriptome} \\
-                --fastqs {params.fastqs} \\
-                {params.excludeintrons} \\
-                {params.createbam} \\
-                {params.forcecells} \\
+                --fastqs {params.fastqs} {params.excludeintrons} {params.createbam} {params.forcecells} \\
             2>{log.err} 1>{log.log}
         fi
         """
@@ -393,7 +387,9 @@ rule sampleCleanup:
         cr_temp = join(workpath, "{sample}", "SC_RNA_COUNTER_CS")
     shell:
         """
-        rm -r {params.cr_temp}
+        if [ -d '{params.cr_temp}' ]; then
+            rm -r {params.cr_temp}
+        fi
         """
 
 rule aggregateCleanup:
@@ -432,7 +428,7 @@ rule seuratIntegrate:
 
 rule seuratIntegrateSummaryReport:
     input:
-        rds = rules.seuratIntegrate.output.rds
+        rds = join(workpath, "seurat", "integrate", "integrated_sct.rds")
     output:
         report = join(workpath, "seurat", "integrate", "IntegrateOverviewReport.html")
     params:
@@ -450,7 +446,7 @@ rule seuratIntegrateSummaryReport:
 
 rule copySeuratIntegrateSummaryReport:
   input:
-    report = rules.seuratIntegrateSummaryReport.output.report
+    report = join(workpath, "seurat", "integrate", "IntegrateOverviewReport.html")
   output:
     report = join(workpath, "finalreport", "seurat", "Integrate_Overview_Report.html")
   params:
diff --git a/workflow/rules/multi.smk b/workflow/rules/multi.smk
index 97abfae..e474cea 100644
--- a/workflow/rules/multi.smk
+++ b/workflow/rules/multi.smk
@@ -1,11 +1,5 @@
 # Pipeline output definition
 
-# Single sample libraries files for cellranger multi
-pipeline_output += expand(
-    join(workpath, "{sample}.csv"),
-    sample=lib_samples
-)
-
 # Cell Ranger multi output
 pipeline_output += expand(
     join(workpath, "{sample}", "outs", "config.csv"),
@@ -61,11 +55,12 @@ def conditional_flags(wildcards):
         if wildcards.sample in CELLCOUNT_DICT.keys():
             flags.append(f"--forcecells {CELLCOUNT_DICT[wildcards.sample]}")
 
-    f = open(libraries, 'r')
-    for line in f:
-        if all([i in line for i in [wildcards.sample, 'VDJ']]):
-            flags.append(f'--vdjref {config["references"][genome]["vdj_ref"]}')
-            break
+    if libraries != 'None':
+      f = open(libraries, 'r')
+      for line in f:
+          if all([i in line for i in [wildcards.sample, 'VDJ']]):
+              flags.append(f'--vdjref {config["references"][genome]["vdj_ref"]}')
+              break
 
     return(' '.join(flags))
 
@@ -167,5 +162,7 @@ rule sampleCleanup:
         cr_temp = join(workpath, "{sample}", "SC_MULTI_CS")
     shell:
         """
-        rm -r {params.cr_temp}
+        if [ -d '{params.cr_temp}' ]; then
+           rm -r {params.cr_temp}
+        fi
         """
diff --git a/workflow/rules/multiome.smk b/workflow/rules/multiome.smk
index c5f50a2..da33d79 100644
--- a/workflow/rules/multiome.smk
+++ b/workflow/rules/multiome.smk
@@ -1,11 +1,5 @@
 # Pipeline output definition
 
-# Single sample libraries files for cellranger count
-pipeline_output += expand(
-            join(workpath, "{sample}_libraries.csv"),
-            sample=lib_samples
-        )
-
 # CellRanger counts, summary report
 pipeline_output += expand(
             join(workpath, "{sample}", "outs", "web_summary.html"),
@@ -100,16 +94,14 @@ rule count:
                 cellranger-arc count \\
                     --id={params.prefix} \\
                     --reference={params.reference} \\
-                    --libraries={input.lib} \\
-                    {params.introns} \\
+                    --libraries={input.lib} {params.introns} \\
                 2>{log.err} 1>{log.log}
             fi
         else
             cellranger-arc count \\
                 --id={params.prefix} \\
                 --reference={params.reference} \\
-                --libraries={input.lib} \\
-                {params.introns} \\
+                --libraries={input.lib} {params.introns} \\
             2>{log.err} 1>{log.log}
         fi
         """
@@ -140,5 +132,7 @@ rule sampleCleanup:
         cr_temp = join(workpath, "{sample}", "SC_ATAC_GEX_COUNTER_CS")
     shell:
         """
-        rm -r {params.cr_temp}
+        if [ -d '{params.cr_temp}' ]; then
+            rm -r {params.cr_temp}
+        fi
         """
diff --git a/workflow/rules/vdj.smk b/workflow/rules/vdj.smk
index 6c1a8a7..9979650 100644
--- a/workflow/rules/vdj.smk
+++ b/workflow/rules/vdj.smk
@@ -105,5 +105,7 @@ rule sampleCleanup:
         cr_temp = join(workpath, "{sample}", "SC_VDJ_ASSEMBLER_CS")
     shell:
         """
-        rm -r {params.cr_temp}
+        if [ -d '{params.cr_temp}' ]; then
+            rm -r {params.cr_temp}
+        fi
         """