Skip to content

Commit

Permalink
- User defined transcripts were being used as a filter rather than a …
Browse files Browse the repository at this point in the history
…priority order. The filtering step has been eliminated. Fixes #4918 (#4931)

- User defined transcripts were being used as a filter rather than a priority order.  The filtering step has been eliminated.  Closes #4918 
- Fixed previously unidentified issue where locus level ranking was being reversed.  Updated tests.  This was identified thanks to the thousands of tests in Funcotator (only one failed, but that was all it took).
  • Loading branch information
LeeTL1220 authored Jun 22, 2018
1 parent 4723490 commit ee7c717
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1971,7 +1971,8 @@ private static synchronized final SAMSequenceDictionary initializeB37SequenceDic
public static boolean isFuncotationInTranscriptList( final GencodeFuncotation funcotation,
final Set<String> acceptableTranscripts ) {
if ( funcotation.getAnnotationTranscript() != null ) {
return acceptableTranscripts.contains( getTranscriptIdWithoutVersionNumber(funcotation.getAnnotationTranscript()) );
final List<String> acceptableTranscriptsWithoutVersionNumbers = acceptableTranscripts.stream().map(tx -> getTranscriptIdWithoutVersionNumber(tx)).collect(Collectors.toList());
return acceptableTranscriptsWithoutVersionNumbers.contains( getTranscriptIdWithoutVersionNumber(funcotation.getAnnotationTranscript()) );
}
else {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public Comparator<GencodeFuncotation> getComparator(final Set<String> userReques
*/
CANONICAL {
public Comparator<GencodeFuncotation> getComparator(final Set<String> userRequestedTranscripts) {
return new CannonicalGencodeFuncotationComparator(userRequestedTranscripts);
return new CanonicalGencodeFuncotationComparator(userRequestedTranscripts);
}
},

Expand All @@ -169,7 +169,7 @@ public Comparator<GencodeFuncotation> getComparator(final Set<String> userReques
*/
ALL {
public Comparator<GencodeFuncotation> getComparator(final Set<String> userRequestedTranscripts) {
return new CannonicalGencodeFuncotationComparator(userRequestedTranscripts);
return new CanonicalGencodeFuncotationComparator(userRequestedTranscripts);
}
};

Expand Down Expand Up @@ -269,18 +269,17 @@ public ComparatorByLocusLevel(){}
@Override
public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
// Check locus/curation levels:
// NOTE: For this field you want LOW, not high. Therefore the final comparison starts with `b`, not `a`:
if ( (a.getLocusLevel() != null) && (b.getLocusLevel() == null) ) {
return -1;
}
else if ( (a.getLocusLevel() == null ) && (b.getLocusLevel() != null) ) {
return 1;
}
else if ( (a.getLocusLevel() != null) && (b.getLocusLevel() != null) && (!a.getLocusLevel().equals(b.getLocusLevel())) ) {
return b.getLocusLevel().compareTo( a.getLocusLevel() );
else if ( (a.getLocusLevel() == null ) && (b.getLocusLevel() == null) ) {
return 0;
}
else {
return 0;
return a.getLocusLevel().compareTo( b.getLocusLevel() );
}
}
}
Expand Down Expand Up @@ -314,6 +313,7 @@ public ComparatorByTranscriptSequenceLength(){}
@Override
public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
// Check transcript sequence length:
// Note that since we want longer transcripts to be sorted earlier in the list, we reverse b and a in the last check.
if ( (a.getTranscriptLength() != null) && (b.getTranscriptLength() == null) ) {
return -1;
}
Expand Down Expand Up @@ -404,10 +404,10 @@ public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
}

/**
* Comparator class for Cannonical order.
* Comparator class for Canonical order.
* Complex enough that a Lambda would be utter madness.
*/
static class CannonicalGencodeFuncotationComparator implements Comparator<GencodeFuncotation> {
static class CanonicalGencodeFuncotationComparator implements Comparator<GencodeFuncotation> {

private final Comparator<GencodeFuncotation> byUserTranscript;
private final Comparator<GencodeFuncotation> byIgrStatus;
Expand All @@ -420,7 +420,7 @@ static class CannonicalGencodeFuncotationComparator implements Comparator<Gencod

private final Comparator<GencodeFuncotation> chainedComparator;

public CannonicalGencodeFuncotationComparator( final Set<String> userRequestedTranscripts ) {
public CanonicalGencodeFuncotationComparator(final Set<String> userRequestedTranscripts ) {
byUserTranscript = new ComparatorByUserTranscript(userRequestedTranscripts);
byIgrStatus = new ComparatorByIgrStatus();
byVariantClassification = new ComparatorByVariantClassification();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {

/**
* Comparator to be used when sorting {@link Funcotation}s created by this {@link GencodeFuncotationFactory}.
* Will be either {@link TranscriptSelectionMode.BestEffectGencodeFuncotationComparator} or {@link TranscriptSelectionMode.CannonicalGencodeFuncotationComparator}.
* Will be either {@link TranscriptSelectionMode.BestEffectGencodeFuncotationComparator} or {@link TranscriptSelectionMode.CanonicalGencodeFuncotationComparator}.
*/
private final Comparator<GencodeFuncotation> gencodeFuncotationComparator;

Expand Down Expand Up @@ -292,10 +292,6 @@ private void sortAndFilterInPlace(final List<GencodeFuncotation> gencodeFuncotat
// Get our "Best Transcript" from our list.
sortFuncotationsByTranscriptForOutput(gencodeFuncotationList);

// Now we have to filter out the output gencodeFuncotations if they are not on the list the user provided:
// TODO: Is this correct behavior? The sorting takes care of ordering the transcripts. See https://github.com/broadinstitute/gatk/issues/4918
filterAnnotationsByUserTranscripts(gencodeFuncotationList, userRequestedTranscripts);

// Since the initial query was done on the entire gene footprint, we need to get rid of every transcript that does not overlap the variant at all (not even in flank)
// i.e. IGR.
filterAnnotationsByIGR(gencodeFuncotationList);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,7 @@ public enum FeatureTag {
/** shares an identical CDS but has alternative 3' UTR with respect to a reference variant. */
ALTERNATIVE_5_UTR("alternative_5_UTR"),

/** Please note that the ordering of the APPRIS_* tags is also used in sorting here. Do not re-order. */
/** Transcript expected to code for the main functional isoform based on a range of protein features (APPRIS pipeline). */
APPRIS_PRINCIPAL("appris_principal"),

Expand Down
Loading

0 comments on commit ee7c717

Please sign in to comment.