Skip to content

Commit

Permalink
Added a '--prefer-mane-transcripts' mode that enforces MANE_Select ta…
Browse files Browse the repository at this point in the history
…gged Gencode transcripts where possible
  • Loading branch information
jamesemery committed Oct 18, 2024
1 parent a070efc commit fadb5fe
Show file tree
Hide file tree
Showing 28 changed files with 204 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ public abstract class BaseFuncotatorArgumentCollection implements Serializable {
)
public TranscriptSelectionMode transcriptSelectionMode = FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE;

@Advanced
@Argument(
fullName = FuncotatorArgumentDefinitions.PREFER_MANE_TRANSCRIPT_MODE,
optional = true,
doc = "If this flag is set, Funcotator will only consider 'MANE_Plus_Clinical' followed by 'MANE_select' transcripts if one is present for a given variant. If neither tag is present it use the default behavior."
)
public boolean MANETranscriptMode = false;

@Argument(
fullName = FuncotatorArgumentDefinitions.TRANSCRIPT_LIST_LONG_NAME,
optional = true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ public void onTraversalStart() {
new FlankSettings(0,0),
true,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
).stream()
.filter(DataSourceFuncotationFactory::isSupportingSegmentFuncotation)
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,8 @@ public void onTraversalStart() {
new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize),
false,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
);

logger.info("Initializing Funcotator Engine...");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class FuncotatorArgumentDefinitions {
public static final String TRANSCRIPT_SELECTION_MODE_LONG_NAME = "transcript-selection-mode";
public static final TranscriptSelectionMode TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE = TranscriptSelectionMode.CANONICAL;

public static final String PREFER_MANE_TRANSCRIPT_MODE = "prefer-mane-transcripts";

/**
* Do not give this a static default value or the integration tests will get hosed.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ private static boolean isValidDirectory(final Path p) {
* ignored for those that don't.
* @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid.
* @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant.
* @param preferMANETranscriptsWhereApplicable If this is set, in GencodeFunctationFactory, we will only emit MANE transcripts if any are availible for a given variant, otherwise behaves as normal.
* @return A {@link List} of {@link DataSourceFuncotationFactory} given the data source metadata, overrides, and transcript reporting priority information.
*/
public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFactoriesForDataSources(final Map<Path, Properties> dataSourceMetaData,
Expand All @@ -340,7 +341,8 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
final FlankSettings flankSettings,
final boolean doAttemptSegmentFuncotationForTranscriptDatasources,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {
Utils.nonNull(dataSourceMetaData);
Utils.nonNull(annotationOverridesMap);
Utils.nonNull(transcriptSelectionMode);
Expand Down Expand Up @@ -379,7 +381,7 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
case GENCODE:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false);
funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode,
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize);
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize, preferMANETranscriptsWhereApplicable);
break;
case VCF:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false);
Expand Down Expand Up @@ -596,7 +598,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
final FlankSettings flankSettings,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean onlyUseMANETranscriptsWhenApplicable) {
Utils.nonNull(dataSourceFile);
Utils.nonNull(dataSourceProperties);
Utils.nonNull(annotationOverridesMap);
Expand Down Expand Up @@ -626,7 +629,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
ncbiBuildVersion,
isSegmentFuncotationEnabled,
minBasesForValidSegment,
spliceSiteWindowSize
spliceSiteWindowSize,
onlyUseMANETranscriptsWhenApplicable
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
*/
private boolean isSegmentFuncotationEnabled;

/**
* If this is true, only MANE transcripts will be used for funcotation creation when at least one is present.
*/
private boolean onlyUseMANETranscripts;

//==================================================================================================================
// Constructors:

Expand Down Expand Up @@ -354,7 +359,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts,
annotationOverrides, mainFeatureInput, flankSettings, isDataSourceB37, ncbiBuildVersion,
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE);
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE, false);
}

/**
Expand Down Expand Up @@ -385,7 +390,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
final String ncbiBuildVersion,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {

super(mainFeatureInput, minBasesForValidSegment);

Expand Down Expand Up @@ -429,6 +435,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

// Initialize overrides / defaults:
initializeAnnotationOverrides( annotationOverrides );

this.onlyUseMANETranscripts = preferMANETranscriptsWhereApplicable; //todo preferMANETranscriptsWhereApplicable;
}

private Path localizeGencodeTranscriptFastaFile( final Path gencodeTranscriptFastaFilePath ) {
Expand Down Expand Up @@ -622,6 +630,27 @@ private static List<GencodeGtfGeneFeature> convertFeaturesToGencodeGtfGeneFeatur
.collect(Collectors.toList());
}

/**
* If MANE_Plus_Clinical transcripts are avalible, only return them, followed by MANE_Select transcripts, followed by all transcripts.
* @param transcripts of gencode transcripts to possibly filter
* @return
*/
private List<GencodeGtfTranscriptFeature> filterToMANETranscripts(final List<GencodeGtfTranscriptFeature> transcripts) {
final List<GencodeGtfTranscriptFeature> plusClincal = transcripts.stream()
.filter(g -> hasTag(g, MANE_PLUS_CLINICAL)).toList();
if (plusClincal.size() > 0) {
return plusClincal;
}

final List<GencodeGtfTranscriptFeature> maneSelectTranscripts = transcripts.stream()
.filter(g -> hasTag(g, MANE_SELECT)).toList();

if (maneSelectTranscripts.size() > 0) {
return maneSelectTranscripts;
}

return transcripts;
}

/**
* {@inheritDoc}
Expand Down Expand Up @@ -853,7 +882,7 @@ static boolean isVariantInCodingRegion(final GencodeFuncotation.VariantClassific
*/
private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext variant, final Allele altAllele, final GencodeGtfGeneFeature gtfFeature, final ReferenceContext reference) {

final List<GencodeGtfTranscriptFeature> transcriptList;
List<GencodeGtfTranscriptFeature> transcriptList;

// Only get basic transcripts if we're using data from Gencode:
if ( gtfFeature.getGtfSourceFileType().equals(GencodeGtfCodec.GTF_FILE_TYPE_STRING) ) {
Expand All @@ -863,6 +892,11 @@ private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext v
transcriptList = gtfFeature.getTranscripts();
}

// Filter out the non-MANE_Select/Mane_Plus_Clinical transcripts if we're only using MANE transcripts:
if (onlyUseMANETranscripts) {
transcriptList = filterToMANETranscripts(transcriptList);
}

return createFuncotationsHelper(variant, altAllele, reference, transcriptList);
}

Expand Down Expand Up @@ -979,9 +1013,14 @@ static final GencodeFuncotation createDefaultFuncotationsOnProblemVariant( final

private static boolean isBasic(final GencodeGtfTranscriptFeature transcript) {
// Check if this transcript has the `basic` tag:
return hasTag(transcript, GencodeGTFFieldConstants.FeatureTag.BASIC);
}

private static boolean hasTag(final GencodeGtfTranscriptFeature transcript, final GencodeGTFFieldConstants.FeatureTag tag) {
// Check if this transcript has the given tag:
return transcript.getOptionalFields().stream()
.filter( f -> f.getName().equals("tag") )
.filter( f -> f.getValue().equals(GencodeGTFFieldConstants.FeatureTag.BASIC.toString()) )
.filter( f -> f.getValue().equals(tag.toString()) )
.count() > 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public void testGetFuncotationFactoriesAndCreateFuncotationMapForVariant(final F
new FlankSettings(0, 0),
false,
FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT,
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE)
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE,
false)
);

for (int i = 0; i < entireVcf.getRight().size(); i++) {
Expand Down
Loading

0 comments on commit fadb5fe

Please sign in to comment.