Skip to content

Commit

Permalink
set non-external samples' GENE_ANNOTATION to null if provided
Browse files Browse the repository at this point in the history
  • Loading branch information
AtaJadidAhari committed Oct 14, 2024
1 parent b1b623e commit 37b3c57
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion drop/config/SampleAnnotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def __init__(self, file, root, genome):
# external counts
self.extGeneCountIDs = self.createGroupIds(file_type="GENE_COUNTS_FILE", sep=',')
self.extSpliceCountIDs = self.createGroupIds(file_type="SPLICE_COUNTS_DIR", sep=',')

self.checkNonExternalGeneAnnotation()

def parse(self, sep='\t'):
"""
read and check sample annotation for missing columns
Expand Down Expand Up @@ -329,3 +330,10 @@ def getIDsByGroup(self, group, assay="RNA"):
def getSampleIDs(self, file_type):
ids = self.subsetFileMapping(file_type)["ID"]
return list(ids)

def checkNonExternalGeneAnnotation(self):
external_groups = set([g for g in self.extGeneCountIDs if len(self.extGeneCountIDs[g]) > 0])
non_external_samples = self.annotationTable[self.annotationTable['DROP_GROUP'].isin(external_groups) == False]
if sum(non_external_samples['GENE_ANNOTATION'].isna() == False) > 0:
logger.info("WARNING: Found %d samples that had `GENE_ANNOTATION` provided in sample annotation table but are not external samples. The provided GENE_ANNOTATIONs are ignored.\n" % (sum(non_external_samples['GENE_ANNOTATION'].isna() == False)))
self.annotationTable.loc[self.annotationTable['DROP_GROUP'].isin(non_external_samples) == False, "GENE_ANNOTATION"] = ""

0 comments on commit 37b3c57

Please sign in to comment.